import os.path from enum import Enum from glob import glob from typing import TextIO import PIL from PIL import Image, ImageStat def mkdirIfNotExists(path: str): if not os.path.isdir(path): print(f"creating folder: {path}") os.makedirs(path) def validSuffix(file: str) -> bool: return file.endswith((".jpg", ".jpeg", ".JPG")) def validFileSize(file: str) -> bool: return os.path.getsize(file) < 250_000 class ErrorCodes(Enum): Extension = 1 FileSize = 2 InvalidImage = 3 ImageShape = 4 VarianceLZero = 5 Duplicate = 6 def log(filename: str, errorCode: ErrorCodes): print(f"[{filename}] :: {errorCode}") class Singleton(type): _instances = {} def __call__(cls, *args, **kwargs): if cls not in cls._instances: cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs) return cls._instances[cls] class FileLogger(metaclass=Singleton): file: TextIO or None = None def init(self, log_file_path: str): self.file = open(log_file_path, 'a') def close(self): self.file.close() def log(self, filename: str, errorcode: ErrorCodes): self.file.write(f"{filename},{errorcode.value}\n") def openImage(path: str) -> PIL.Image.Image or None: try: return Image.open(path) except (FileNotFoundError, PIL.UnidentifiedImageError, ValueError, TypeError) as e: return None def validImageShape(img: PIL.Image.Image) -> bool: # check read mode of channels if img.mode != "RGB": return False # check if nr and order of channels is correct if img.getbands() != ('R', 'G', 'B'): return False # check if file format is jepeg if img.format != "JPEG": return False # check if height and length is > 96 if img.height <= 96 or img.width <= 96: return False return True def validVariance(img: PIL.Image.Image) -> bool: variances = ImageStat.Stat(img).var # variance cannot be negative! return min(variances) > .0 def processFile(f: str): # check suffix if not validSuffix(f): log(f, ErrorCodes.Extension) FileLogger().log(f, ErrorCodes.Extension) return # check file size if not validFileSize(f): log(f, ErrorCodes.FileSize) FileLogger().log(f, ErrorCodes.FileSize) return img = openImage(f) if img is None: log(f, ErrorCodes.InvalidImage) FileLogger().log(f, ErrorCodes.InvalidImage) return if not validImageShape(img): log(f, ErrorCodes.ImageShape) FileLogger().log(f, ErrorCodes.ImageShape) return if not validVariance(img): log(f, ErrorCodes.VarianceLZero) FileLogger().log(f, ErrorCodes.VarianceLZero) return # lets copy the image data # todo check if image has already be copied # close image img.close() def findFiles(path: str) -> [str]: files = glob(path + '/**/*', recursive=True) print(files) # sort filenames files.sort() for f in files: processFile(f) def validate_images(input_dir: str, output_dir: str, log_file: str, formatter: str or None = None): # check if out_put dir exists mkdirIfNotExists(output_dir) # check if logfile dir exists logdir = os.path.dirname(log_file) mkdirIfNotExists(logdir) # init file logger FileLogger().init(log_file) # scan input dir for files findFiles(input_dir) pass # Press the green button in the gutter to run the script. if __name__ == '__main__': validate_images("./input", "./output", "logs/1/log.txt")