158 lines
3.6 KiB
Python
158 lines
3.6 KiB
Python
|
import os.path
|
||
|
from enum import Enum
|
||
|
from glob import glob
|
||
|
from typing import TextIO
|
||
|
|
||
|
import PIL
|
||
|
from PIL import Image, ImageStat
|
||
|
|
||
|
|
||
|
def mkdirIfNotExists(path: str):
|
||
|
if not os.path.isdir(path):
|
||
|
print(f"creating folder: {path}")
|
||
|
os.makedirs(path)
|
||
|
|
||
|
|
||
|
def validSuffix(file: str) -> bool:
|
||
|
return file.endswith((".jpg", ".jpeg", ".JPG"))
|
||
|
|
||
|
|
||
|
def validFileSize(file: str) -> bool:
|
||
|
return os.path.getsize(file) < 250_000
|
||
|
|
||
|
|
||
|
class ErrorCodes(Enum):
|
||
|
Extension = 1
|
||
|
FileSize = 2
|
||
|
InvalidImage = 3
|
||
|
ImageShape = 4
|
||
|
VarianceLZero = 5
|
||
|
Duplicate = 6
|
||
|
|
||
|
|
||
|
def log(filename: str, errorCode: ErrorCodes):
|
||
|
print(f"[{filename}] :: {errorCode}")
|
||
|
|
||
|
|
||
|
class Singleton(type):
|
||
|
_instances = {}
|
||
|
|
||
|
def __call__(cls, *args, **kwargs):
|
||
|
if cls not in cls._instances:
|
||
|
cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
|
||
|
return cls._instances[cls]
|
||
|
|
||
|
|
||
|
class FileLogger(metaclass=Singleton):
|
||
|
file: TextIO or None = None
|
||
|
|
||
|
def init(self, log_file_path: str):
|
||
|
self.file = open(log_file_path, 'a')
|
||
|
|
||
|
def close(self):
|
||
|
self.file.close()
|
||
|
|
||
|
def log(self, filename: str, errorcode: ErrorCodes):
|
||
|
self.file.write(f"{filename},{errorcode.value}\n")
|
||
|
|
||
|
|
||
|
def openImage(path: str) -> PIL.Image.Image or None:
|
||
|
try:
|
||
|
return Image.open(path)
|
||
|
except (FileNotFoundError, PIL.UnidentifiedImageError, ValueError, TypeError) as e:
|
||
|
return None
|
||
|
|
||
|
|
||
|
def validImageShape(img: PIL.Image.Image) -> bool:
|
||
|
# check read mode of channels
|
||
|
if img.mode != "RGB":
|
||
|
return False
|
||
|
|
||
|
# check if nr and order of channels is correct
|
||
|
if img.getbands() != ('R', 'G', 'B'):
|
||
|
return False
|
||
|
|
||
|
# check if file format is jepeg
|
||
|
if img.format != "JPEG":
|
||
|
return False
|
||
|
|
||
|
# check if height and length is > 96
|
||
|
if img.height <= 96 or img.width <= 96:
|
||
|
return False
|
||
|
|
||
|
return True
|
||
|
|
||
|
|
||
|
def validVariance(img: PIL.Image.Image) -> bool:
|
||
|
variances = ImageStat.Stat(img).var
|
||
|
# variance cannot be negative!
|
||
|
return min(variances) > .0
|
||
|
|
||
|
|
||
|
def processFile(f: str):
|
||
|
# check suffix
|
||
|
if not validSuffix(f):
|
||
|
log(f, ErrorCodes.Extension)
|
||
|
FileLogger().log(f, ErrorCodes.Extension)
|
||
|
return
|
||
|
|
||
|
# check file size
|
||
|
if not validFileSize(f):
|
||
|
log(f, ErrorCodes.FileSize)
|
||
|
FileLogger().log(f, ErrorCodes.FileSize)
|
||
|
return
|
||
|
|
||
|
img = openImage(f)
|
||
|
if img is None:
|
||
|
log(f, ErrorCodes.InvalidImage)
|
||
|
FileLogger().log(f, ErrorCodes.InvalidImage)
|
||
|
return
|
||
|
|
||
|
if not validImageShape(img):
|
||
|
log(f, ErrorCodes.ImageShape)
|
||
|
FileLogger().log(f, ErrorCodes.ImageShape)
|
||
|
return
|
||
|
|
||
|
if not validVariance(img):
|
||
|
log(f, ErrorCodes.VarianceLZero)
|
||
|
FileLogger().log(f, ErrorCodes.VarianceLZero)
|
||
|
return
|
||
|
|
||
|
# lets copy the image data
|
||
|
# todo check if image has already be copied
|
||
|
|
||
|
# close image
|
||
|
img.close()
|
||
|
|
||
|
|
||
|
def findFiles(path: str) -> [str]:
|
||
|
files = glob(path + '/**/*', recursive=True)
|
||
|
print(files)
|
||
|
|
||
|
# sort filenames
|
||
|
files.sort()
|
||
|
|
||
|
for f in files:
|
||
|
processFile(f)
|
||
|
|
||
|
|
||
|
def validate_images(input_dir: str, output_dir: str, log_file: str, formatter: str or None = None):
|
||
|
# check if out_put dir exists
|
||
|
mkdirIfNotExists(output_dir)
|
||
|
|
||
|
# check if logfile dir exists
|
||
|
logdir = os.path.dirname(log_file)
|
||
|
mkdirIfNotExists(logdir)
|
||
|
|
||
|
# init file logger
|
||
|
FileLogger().init(log_file)
|
||
|
|
||
|
# scan input dir for files
|
||
|
findFiles(input_dir)
|
||
|
pass
|
||
|
|
||
|
|
||
|
# Press the green button in the gutter to run the script.
|
||
|
if __name__ == '__main__':
|
||
|
validate_images("./input", "./output", "logs/1/log.txt")
|