add lots of validation code and correct saving of pickle file
							
								
								
									
										44
									
								
								Compress.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,44 @@ | ||||
| import importlib | ||||
| import shutil | ||||
|  | ||||
|  | ||||
| def compress(filename: str): | ||||
|     supp_compr_algo = {"zip": "zipfile", "gzip": "gzip", "bzip2": "bz2", "lzma": "lzma"} | ||||
|     extensions = {"zip": ".zip", "gzip": ".gz", "bzip2": ".bz2", "lzma": ".xz"} | ||||
|     assert set(supp_compr_algo.keys()) == set(extensions.keys()) | ||||
|  | ||||
|     compression = "bzip2" | ||||
|  | ||||
|     if compression not in supp_compr_algo: | ||||
|         raise ValueError( | ||||
|             f"Unknown compression algorithm '{compression}'; must be one of {list(supp_compr_algo.keys())}") | ||||
|  | ||||
|     try_compression( | ||||
|         file=filename, | ||||
|         name=compression, | ||||
|         module_name=supp_compr_algo[compression], | ||||
|         extension=extensions[compression], | ||||
|         function=zip_compression if compression == "zip" else compression_open_context_manager | ||||
|     ) | ||||
|     print(f"Successfully compressed '{filename}' to '{filename + extensions[compression]}' " | ||||
|           f"using {compression} as compression algorithm") | ||||
|  | ||||
|  | ||||
| def try_compression(file: str, name: str, module_name: str, extension: str, function: callable): | ||||
|     try: | ||||
|         compression_module = importlib.import_module(module_name) | ||||
|  | ||||
|         function(file, compression_module, extension) | ||||
|     except ImportError as ex: | ||||
|         raise ImportError(f"compression='{name}' failed: required module could not be loaded ({ex})") | ||||
|  | ||||
|  | ||||
| def compression_open_context_manager(file: str, module, extension: str): | ||||
|     with open(file, "rb") as f_in: | ||||
|         with module.open(file + extension, "wb") as f_out: | ||||
|             shutil.copyfileobj(f_in, f_out) | ||||
|  | ||||
|  | ||||
| def zip_compression(file: str, module, extension: str): | ||||
|     with module.ZipFile(file + extension, "w", compression=module.ZIP_DEFLATED) as z: | ||||
|         z.write(file) | ||||
| @@ -17,8 +17,8 @@ class ImageDataset(Dataset): | ||||
|     def __init__(self, image_dir): | ||||
|         self.image_files = sorted(glob.glob(os.path.join(image_dir, "**", "*.jpg"), recursive=True)) | ||||
|         # Mean and std arrays could also be defined as class attributes | ||||
|         self.norm_mean = np.array([0.485, 0.456, 0.406], dtype=np.float32) | ||||
|         self.norm_std = np.array([0.229, 0.224, 0.225], dtype=np.float32) | ||||
|         # self.norm_mean = np.array([0.485, 0.456, 0.406], dtype=np.float32) | ||||
|         # self.norm_std = np.array([0.229, 0.224, 0.225], dtype=np.float32) | ||||
|  | ||||
|     def __getitem__(self, index): | ||||
|         # Open image file, convert to numpy array and scale to [0, 1] | ||||
| @@ -29,12 +29,7 @@ class ImageDataset(Dataset): | ||||
|             transforms.CenterCrop(size=(IMG_SIZE, IMG_SIZE)), | ||||
|         ]) | ||||
|         target_image = resize_transforms(target_image) | ||||
|  | ||||
|         # normalize image from 0-1 | ||||
|         target_image = np.array(target_image, dtype=np.float64) / 255.0 | ||||
|  | ||||
|         # Perform normalization for each channel | ||||
|         # image = (image - self.norm_mean) / self.norm_std | ||||
|         target_image = preprocess(target_image) | ||||
|  | ||||
|         # calculate image with black grid | ||||
|         doomed_image = ex4.ex4(target_image, (5, 5), (4, 4)) | ||||
| @@ -48,17 +43,36 @@ class ImageDataset(Dataset): | ||||
|         return len(self.image_files) | ||||
|  | ||||
|  | ||||
| def preprocess(input: np.array) -> np.array: | ||||
|     # normalize image from 0-1 | ||||
|     target_image = np.array(input, dtype=np.float64) / 255.0 | ||||
|  | ||||
|     # Perform normalization for each channel | ||||
|     # image = (image - self.norm_mean) / self.norm_std | ||||
|  | ||||
|     return target_image | ||||
|  | ||||
|  | ||||
| # postprecess should be the inverese function of preprocess! | ||||
| def postprocess(input: np.array) -> np.array: | ||||
|     target_image = (input * 255.0).astype(np.uint8) | ||||
|     return target_image | ||||
|  | ||||
|  | ||||
| def get_image_loader(path: str): | ||||
|     image_dataset = ImageDataset(path) | ||||
|     totlen = len(image_dataset) | ||||
|     trains, tests = torch.utils.data.dataset.random_split(image_dataset, (int(totlen * .7), totlen - int(totlen * .7)), | ||||
|  | ||||
|     test_set_size = .001 | ||||
|     trains, tests = torch.utils.data.dataset.random_split(image_dataset, lengths=(totlen - int(totlen * test_set_size), | ||||
|                                                                                   int(totlen * test_set_size)), | ||||
|                                                           generator=torch.Generator().manual_seed(42)) | ||||
|  | ||||
|     train_loader = DataLoader( | ||||
|         trains, | ||||
|         shuffle=True,  # shuffle the order of our samples | ||||
|         batch_size=5,  # stack 4 samples to a minibatch | ||||
|         num_workers=2  # no background workers (see comment below) | ||||
|         num_workers=4  # no background workers (see comment below) | ||||
|     ) | ||||
|  | ||||
|     test_loader = DataLoader( | ||||
|   | ||||
| @@ -26,16 +26,16 @@ def train_model(): | ||||
|     nn.train()  # init with train mode | ||||
|     nn.to(device)  # send net to device available | ||||
|  | ||||
|     optimizer = torch.optim.SGD(nn.parameters(), lr=0.1)  # todo adjust parameters and lr | ||||
|     optimizer = torch.optim.AdamW(nn.parameters(), lr=0.1, weight_decay=1e-5)  # todo adjust parameters and lr | ||||
|     loss_function = torch.nn.MSELoss() | ||||
|     n_epochs = 15  # todo epcchs here | ||||
|     n_epochs = 10  # todo epcchs here | ||||
|  | ||||
|     # todo look wtf is that | ||||
|     nn.double() | ||||
|  | ||||
|     train_sample_size = len(train_loader) | ||||
|     losses = [] | ||||
|     best_eval_loss = 0 | ||||
|     best_eval_loss = np.inf | ||||
|     for epoch in range(n_epochs): | ||||
|         print(f"Epoch {epoch}/{n_epochs}\n") | ||||
|         i = 0 | ||||
| @@ -55,12 +55,12 @@ def train_model(): | ||||
|                 end='') | ||||
|             i += train_loader.batch_size | ||||
|  | ||||
|             # eval model every 500th element | ||||
|             if i % 500 == 0: | ||||
|             # eval model every 3000th sample | ||||
|             if i % 15 == 0: | ||||
|                 print(f"\nEvaluating model") | ||||
|                 eval_loss = eval_model(nn, test_loader, loss_function, device) | ||||
|                 print(f"Evalution loss={eval_loss}") | ||||
|                 if eval_loss > best_eval_loss: | ||||
|                 if eval_loss < best_eval_loss: | ||||
|                     best_eval_loss = eval_loss | ||||
|                     save_model(nn) | ||||
|  | ||||
|   | ||||
							
								
								
									
										206
									
								
								Scoring.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,206 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
| """ | ||||
| Author -- Michael Widrich, Andreas Schörgenhumer | ||||
| Contact -- schoergenhumer@ml.jku.at | ||||
| Date -- 07.06.2022 | ||||
|  | ||||
| ############################################################################### | ||||
|  | ||||
| The following copyright statement applies to all code within this file. | ||||
|  | ||||
| Copyright statement: | ||||
| This  material,  no  matter  whether  in  printed  or  electronic  form, | ||||
| may  be  used  for personal  and non-commercial educational use only. | ||||
| Any reproduction of this manuscript, no matter whether as a whole or in parts, | ||||
| no matter whether in printed or in electronic form, requires explicit prior | ||||
| acceptance of the authors. | ||||
|  | ||||
| ############################################################################### | ||||
|  | ||||
| """ | ||||
| import argparse | ||||
| import bz2 | ||||
| import gzip | ||||
| import lzma | ||||
| import os | ||||
| import zipfile | ||||
|  | ||||
| import dill as pkl | ||||
| import numpy as np | ||||
| import onnx | ||||
| import onnxruntime | ||||
|  | ||||
| TEST_DATA_PATH = r"/daten/challenge/django/data/datasets/image_inpainting_2022/test.zip" | ||||
|  | ||||
|  | ||||
| def load_data(file: str): | ||||
|     if file.endswith(".zip"): | ||||
|         # "mode" cannot be "rb", so set it manually to "r" (still need the parameter or the function invocation fails) | ||||
|         # noinspection PyUnusedLocal | ||||
|         def zip_open(file_, mode): | ||||
|             with zipfile.ZipFile(file_, "r") as myzip: | ||||
|                 return myzip.open(myzip.namelist()[0]) | ||||
|  | ||||
|         open_fn = zip_open | ||||
|     elif file.endswith(".bz2"): | ||||
|         open_fn = bz2.open | ||||
|     elif file.endswith(".xz"): | ||||
|         open_fn = lzma.open | ||||
|     elif file.endswith(".gz"): | ||||
|         open_fn = gzip.open | ||||
|     else: | ||||
|         open_fn = open | ||||
|     with open_fn(file, "rb") as pfh: | ||||
|         return pkl.load(pfh) | ||||
|  | ||||
|  | ||||
| def rmse(predictions: list, targets: list): | ||||
|     def rmse_(prediction_array: np.ndarray, target_array: np.ndarray): | ||||
|         if prediction_array.shape != target_array.shape: | ||||
|             raise IndexError(f"Target shape is {target_array.shape} but prediction shape is {prediction_array.shape}") | ||||
|         prediction_array, target_array = np.asarray(prediction_array, np.float64), np.asarray(target_array, np.float64) | ||||
|         return np.sqrt(np.mean((prediction_array - target_array) ** 2)) | ||||
|  | ||||
|     # Compute RMSE for each sample | ||||
|     rmses = [rmse_(prediction, target) for prediction, target in zip(predictions, targets)] | ||||
|     return np.mean(rmses) | ||||
|  | ||||
|  | ||||
| def scoring_file(prediction_file: str, target_file: str): | ||||
|     """Computes the mean RMSE loss on two lists of numpy arrays stored in pickle files prediction_file and targets_file | ||||
|  | ||||
|     Computation of mean RMSE loss, as used in the challenge for exercise 5. See files "example_testset.pkl" and | ||||
|     "example_submission_random.pkl" for an example test set and example targets, respectively. The real test set | ||||
|     (without targets) will be available as download (see assignment sheet 2). | ||||
|  | ||||
|     Parameters | ||||
|     ---------- | ||||
|     prediction_file: str | ||||
|         File path of prediction file. Has to be a pickle file (or dill file) and contain a list of numpy arrays of dtype | ||||
|         uint8, as specified in assignment sheet 2. The file can optionally be compressed, which will be automatically | ||||
|         determined based on its file extension, of which the following are supported: | ||||
|         > ".zip": zip compression (https://docs.python.org/3/library/zipfile.html, including the requirement of the zlib | ||||
|           module: https://docs.python.org/3/library/zlib.html) | ||||
|         > ".gz": gzip compression (https://docs.python.org/3/library/gzip.html, also requires the zlib module) | ||||
|         > ".bz2": bzip2 compression (https://docs.python.org/3/library/bz2.html) | ||||
|         > ".xz": lzma compression (https://docs.python.org/3/library/lzma.html) | ||||
|         If none of these file extensions match, it is assumed to be a raw pickle file. | ||||
|     target_file: str | ||||
|         File path of target file. Has to be a pickle file (or dill file) and contain a list of numpy arrays of dtype | ||||
|         uint8, as specified in assignment sheet 2. The file can optionally be compressed (refer to "predictions_file" | ||||
|         above for more details). This file will not be available for the challenge. | ||||
|     """ | ||||
|     # Load predictions | ||||
|     predictions = load_data(prediction_file) | ||||
|     if not isinstance(predictions, list): | ||||
|         raise TypeError(f"Expected a list of numpy arrays as pickle file. " | ||||
|                         f"Got {type(predictions)} object in pickle file instead.") | ||||
|     if not all([isinstance(prediction, np.ndarray) and np.uint8 == prediction.dtype | ||||
|                 for prediction in predictions]): | ||||
|         raise TypeError("List of predictions contains elements which are not numpy arrays of dtype uint8") | ||||
|  | ||||
|     # Load targets | ||||
|     targets = load_data(target_file) | ||||
|     if len(targets) != len(predictions): | ||||
|         raise IndexError(f"list of targets has {len(targets)} elements " | ||||
|                          f"but list of submitted predictions has {len(predictions)} elements.") | ||||
|  | ||||
|     return rmse(predictions, targets) | ||||
|  | ||||
|  | ||||
| def make_predictions(onnx_model_rt, test_data: np.ndarray): | ||||
|     n_samples = len(test_data["input_arrays"]) | ||||
|  | ||||
|     # Create predictions for each sample (one by one) | ||||
|     predictions = [] | ||||
|     for sample_i in range(n_samples): | ||||
|         # Normalize input by maximal value | ||||
|         input_array = test_data["input_arrays"][sample_i].astype(np.float32) / 255 | ||||
|         known_array = test_data["known_arrays"][sample_i].astype(np.float32) | ||||
|         # Stack both inputs for the network | ||||
|         input_array = np.concatenate([input_array, known_array], axis=0) | ||||
|         # Pretend we have a minibatch dimension | ||||
|         inputs = input_array[None]  # Adds empty dimension | ||||
|  | ||||
|         # Get outputs for network | ||||
|         inputs_rt = {onnx_model_rt.get_inputs()[0].name: inputs} | ||||
|         outputs = onnx_model_rt.run(None, inputs_rt)[0]  # Get first return value | ||||
|         # We pretended to have a minibatch dimension -> remove this dimension | ||||
|         outputs = outputs[0] | ||||
|         if outputs.shape != known_array.shape: | ||||
|             raise ValueError(f"Unbatched model output shape is {outputs.shape} but should be {known_array.shape}") | ||||
|         # Get actual prediction from (entire) raw model output | ||||
|         prediction = outputs[known_array <= 0] | ||||
|  | ||||
|         # De-normalize prediction | ||||
|         prediction = prediction * 255 | ||||
|         # Clip the predictions to a valid range (we know our prediction values can only be in range 0-255 because of | ||||
|         # uint8 datatype!) | ||||
|         prediction = np.clip(prediction, a_min=0, a_max=255) | ||||
|         # Challenge server wants uint8 datatype for predictions | ||||
|         prediction = np.asarray(prediction, dtype=np.uint8) | ||||
|         # Add prediction for sample to list | ||||
|         predictions.append(prediction) | ||||
|  | ||||
|     return predictions | ||||
|  | ||||
|  | ||||
| def scoring_model(model_file: str, test_file: str, target_file: str): | ||||
|     """ | ||||
|     Computation of mean RMSE loss, as used in the challenge for exercise 5. The targets are loaded from the specified | ||||
|     "target_file" (pickle file containing list of numpy arrays), whereas the predictions are created using the model | ||||
|     stored at "model_file" using the original testset input data stored at "test_file". | ||||
|  | ||||
|     Parameters | ||||
|     ---------- | ||||
|     model_file : str | ||||
|         File path of the stored (trained) model. The model must be in ONNX format, and the model output must be the | ||||
|         entire image (rather than only the predicted missing pixel values as it is the case when directly submitting | ||||
|         the predictions via the pickled list of numpy arrays; see function "scoring_file"). The actual predictions are | ||||
|         extracted from this entire image ouput automatically. The input to the model will be the concatenated image | ||||
|         data and the known array data from the original testset input data, and the batch size is fixed to 1, i.e., | ||||
|         the input shape is (N=1, C=6, H=100, W=100). The output of the model (the entire image) is thus expected to | ||||
|         be (N=1, C=3, H=100, W=100), from which the actual predictions are extracted (given the known array). | ||||
|     test_file: str | ||||
|         File path of the original testset input data, which is a pickle file containing a dictionary with the following | ||||
|         entries: "input_arrays" (list of numpy arrays), "known_arrays" (list of numpy arrays), "offsets" (list of | ||||
|         integer 2-tuples), "spacings" (list of integer 2-tuples), "sample_ids" (list of strings). The file can | ||||
|         optionally be compressed, which will be automatically determined based on its file extension, of which the | ||||
|         following are supported: | ||||
|         > ".zip": zip compression (https://docs.python.org/3/library/zipfile.html, including the requirement of the zlib | ||||
|           module: https://docs.python.org/3/library/zlib.html) | ||||
|         > ".gz": gzip compression (https://docs.python.org/3/library/gzip.html, also requires the zlib module) | ||||
|         > ".bz2": bzip2 compression (https://docs.python.org/3/library/bz2.html) | ||||
|         > ".xz": lzma compression (https://docs.python.org/3/library/lzma.html) | ||||
|         If none of these file extensions match, it is assumed to be a raw pickle file. | ||||
|     target_file: str | ||||
|         File path of target file. Has to be a pickle file (or dill file) and contain a list of numpy arrays of dtype | ||||
|         uint8, as specified in assignment sheet 2. The file can optionally be compressed (refer to "test_file" above | ||||
|         for more details). This file will not be available for the challenge. | ||||
|     """ | ||||
|     targets = load_data(target_file) | ||||
|     model = onnx.load_model(model_file) | ||||
|     onnx.checker.check_model(model) | ||||
|     onnx_model_rt = onnxruntime.InferenceSession(model_file) | ||||
|     test_data = load_data(test_file) | ||||
|     predictions = make_predictions(onnx_model_rt, test_data) | ||||
|     return rmse(predictions, targets) | ||||
|  | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     parser = argparse.ArgumentParser() | ||||
|     parser.add_argument("--submission", type=str, help="Path to submission file") | ||||
|     parser.add_argument("--target", type=str, default=None, help="Path to target file") | ||||
|     args = parser.parse_args() | ||||
|     # Infer the type of submission: 1) eported ONNX model or 2) predictions file | ||||
|     if args.submission.endswith(".onnx"): | ||||
|         mse_loss = scoring_model(model_file=args.submission, test_file=TEST_DATA_PATH, target_file=args.target) | ||||
|     else: | ||||
|         # Prediction files are too big to keep, so ensure that they are always deleted after use | ||||
|         try: | ||||
|             mse_loss = scoring_file(prediction_file=args.submission, target_file=args.target) | ||||
|         finally: | ||||
|             pass | ||||
|             # if os.path.exists(args.submission): | ||||
|             #     os.remove(args.submission) | ||||
|     print(mse_loss) | ||||
							
								
								
									
										38
									
								
								netio.py
									
									
									
									
									
								
							
							
						
						| @@ -1,14 +1,46 @@ | ||||
| import torch | ||||
| import pickle | ||||
| import sys | ||||
|  | ||||
| import numpy as np | ||||
| import torch | ||||
| import Compress | ||||
| import DataLoader | ||||
| from Net import ImageNN | ||||
|  | ||||
| MODEL_PATH = 'impaintmodel.pt' | ||||
| PICKEL_PATH = 'impaintmodel.pkl' | ||||
|  | ||||
|  | ||||
| def save_model(model: torch.nn.Module): | ||||
|     torch.save(model, 'impaintmodel.pt') | ||||
|     print(f"Saved raw model to {MODEL_PATH}") | ||||
|     torch.save(model, MODEL_PATH) | ||||
|  | ||||
|     # read the provided testing pickle file | ||||
|     print("Generating pickle file with privided test data") | ||||
|     model.eval() | ||||
|     with open('testing/inputs.pkl', 'rb') as handle: | ||||
|         with open(PICKEL_PATH, 'wb') as writehandle: | ||||
|             b: dict = pickle.load(handle) | ||||
|             outarr = [] | ||||
|             i=0 | ||||
|             piclen = len(b['input_arrays']) | ||||
|             for pic in b['input_arrays']: | ||||
|                 pic = DataLoader.preprocess(pic) | ||||
|                 out = model(torch.from_numpy(pic)) | ||||
|                 out = DataLoader.postprocess(out.detach().numpy()) | ||||
|                 pickle.dump(out, writehandle, protocol=pickle.HIGHEST_PROTOCOL) | ||||
|  | ||||
|                 print( | ||||
|                     f'\rApplying model [{i}/{piclen}] {sys.getsizeof(outarr)}',end='') | ||||
|                 i += 1 | ||||
|  | ||||
|     # compress the generated pickle arr | ||||
|     Compress.compress(PICKEL_PATH) | ||||
|  | ||||
|  | ||||
| def load_model(): | ||||
|     model = ImageNN() | ||||
|     model.load_state_dict(torch.load('impaintmodel.pt')) | ||||
|     model.load_state_dict(torch.load(MODEL_PATH)) | ||||
|     model.eval() | ||||
|     return model | ||||
|  | ||||
|   | ||||
| @@ -1,7 +0,0 @@ | ||||
| unittest\unittest_input_0\00.jpg | ||||
| unittest\unittest_input_0\01.jpg | ||||
| unittest\unittest_input_0\02.jpg | ||||
| unittest\unittest_input_0\04.jpg | ||||
| unittest\unittest_input_0\05.jpg | ||||
| unittest\unittest_input_0\subfolder\06.jpg | ||||
| unittest\unittest_input_0\subfolder\07.jpg | ||||
| @@ -1,9 +0,0 @@ | ||||
| unittest\unittest_input_1\08.jpg | ||||
| unittest\unittest_input_1\09.jpg | ||||
| unittest\unittest_input_1\11.jpg | ||||
| unittest\unittest_input_1\12.jpg | ||||
| unittest\unittest_input_1\13.jpg | ||||
| unittest\unittest_input_1\14.jpg | ||||
| unittest\unittest_input_1\subfolder\15.jpg | ||||
| unittest\unittest_input_1\subfolder\subsubfolder\16.jpg | ||||
| unittest\unittest_input_1\subfolder\subsubfolder\17.jpg | ||||
| Before Width: | Height: | Size: 18 KiB | 
| Before Width: | Height: | Size: 73 KiB | 
| Before Width: | Height: | Size: 86 KiB | 
| Before Width: | Height: | Size: 94 KiB | 
| Before Width: | Height: | Size: 64 KiB | 
| Before Width: | Height: | Size: 82 KiB | 
| Before Width: | Height: | Size: 88 KiB | 
| Before Width: | Height: | Size: 73 KiB | 
| Before Width: | Height: | Size: 78 KiB | 
| Before Width: | Height: | Size: 19 KiB | 
| Before Width: | Height: | Size: 42 KiB | 
| Before Width: | Height: | Size: 64 KiB | 
| Before Width: | Height: | Size: 64 KiB | 
| Before Width: | Height: | Size: 70 KiB | 
| Before Width: | Height: | Size: 38 KiB | 
| Before Width: | Height: | Size: 39 KiB | 
| Before Width: | Height: | Size: 59 KiB | 
| Before Width: | Height: | Size: 73 KiB |