2022-03-17 20:24:56 +00:00
|
|
|
"""
|
|
|
|
Author: Lukas Heiligenbrunner
|
|
|
|
Matr.Nr.: K12104785
|
|
|
|
Exercise 2
|
|
|
|
"""
|
|
|
|
|
2022-03-17 18:12:52 +00:00
|
|
|
import os.path
|
|
|
|
from enum import Enum
|
|
|
|
from glob import glob
|
|
|
|
from typing import TextIO
|
2022-03-17 20:24:56 +00:00
|
|
|
import hashlib
|
2022-03-17 18:12:52 +00:00
|
|
|
|
|
|
|
import PIL
|
|
|
|
from PIL import Image, ImageStat
|
|
|
|
|
|
|
|
|
|
|
|
def mkdirIfNotExists(path: str):
|
|
|
|
if not os.path.isdir(path):
|
|
|
|
print(f"creating folder: {path}")
|
|
|
|
os.makedirs(path)
|
|
|
|
|
|
|
|
|
|
|
|
def validSuffix(file: str) -> bool:
|
2022-03-17 20:24:56 +00:00
|
|
|
return file.endswith((".jpg", ".jpeg", ".JPG", ".JPEG"))
|
2022-03-17 18:12:52 +00:00
|
|
|
|
|
|
|
|
|
|
|
def validFileSize(file: str) -> bool:
|
|
|
|
return os.path.getsize(file) < 250_000
|
|
|
|
|
|
|
|
|
|
|
|
class ErrorCodes(Enum):
|
|
|
|
Extension = 1
|
|
|
|
FileSize = 2
|
|
|
|
InvalidImage = 3
|
|
|
|
ImageShape = 4
|
|
|
|
VarianceLZero = 5
|
|
|
|
Duplicate = 6
|
|
|
|
|
|
|
|
|
|
|
|
def log(filename: str, errorCode: ErrorCodes):
|
|
|
|
print(f"[{filename}] :: {errorCode}")
|
|
|
|
|
|
|
|
|
|
|
|
class Singleton(type):
|
|
|
|
_instances = {}
|
|
|
|
|
|
|
|
def __call__(cls, *args, **kwargs):
|
|
|
|
if cls not in cls._instances:
|
|
|
|
cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
|
|
|
|
return cls._instances[cls]
|
|
|
|
|
|
|
|
|
|
|
|
class FileLogger(metaclass=Singleton):
|
|
|
|
file: TextIO or None = None
|
|
|
|
|
|
|
|
def init(self, log_file_path: str):
|
|
|
|
self.file = open(log_file_path, 'a')
|
|
|
|
|
|
|
|
def close(self):
|
|
|
|
self.file.close()
|
|
|
|
|
|
|
|
def log(self, filename: str, errorcode: ErrorCodes):
|
2022-03-17 20:24:56 +00:00
|
|
|
# enforce a CRLF line ending with \r\n !!
|
|
|
|
self.file.write(f"{os.path.basename(filename)};{errorcode.value}\r\n")
|
2022-03-17 18:12:52 +00:00
|
|
|
|
|
|
|
|
|
|
|
def openImage(path: str) -> PIL.Image.Image or None:
|
|
|
|
try:
|
|
|
|
return Image.open(path)
|
|
|
|
except (FileNotFoundError, PIL.UnidentifiedImageError, ValueError, TypeError) as e:
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
def validImageShape(img: PIL.Image.Image) -> bool:
|
|
|
|
# check read mode of channels
|
|
|
|
if img.mode != "RGB":
|
|
|
|
return False
|
|
|
|
|
|
|
|
# check if nr and order of channels is correct
|
|
|
|
if img.getbands() != ('R', 'G', 'B'):
|
|
|
|
return False
|
|
|
|
|
|
|
|
# check if file format is jepeg
|
|
|
|
if img.format != "JPEG":
|
|
|
|
return False
|
|
|
|
|
|
|
|
# check if height and length is > 96
|
|
|
|
if img.height <= 96 or img.width <= 96:
|
|
|
|
return False
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
def validVariance(img: PIL.Image.Image) -> bool:
|
|
|
|
variances = ImageStat.Stat(img).var
|
|
|
|
# variance cannot be negative!
|
|
|
|
return min(variances) > .0
|
|
|
|
|
|
|
|
|
2022-03-17 20:24:56 +00:00
|
|
|
class FileCopy(metaclass=Singleton):
|
|
|
|
format: str
|
|
|
|
out_dir: str
|
|
|
|
|
|
|
|
hashmap: [str]
|
|
|
|
idx: int
|
|
|
|
|
|
|
|
def init(self, out_dir: str, format: str):
|
|
|
|
self.format = format
|
|
|
|
self.out_dir = out_dir
|
|
|
|
self.idx = 1
|
|
|
|
self.hashmap = []
|
|
|
|
|
|
|
|
def copyFile(self, img: PIL.Image.Image) -> bool:
|
|
|
|
hexhash = hashlib.md5(img.__array__()).hexdigest()
|
|
|
|
if hexhash in self.hashmap:
|
|
|
|
return False
|
|
|
|
# element already copied
|
|
|
|
else:
|
|
|
|
self.hashmap.append(hexhash)
|
|
|
|
# apply format string
|
|
|
|
filename = f"%{self.format}" % self.idx
|
|
|
|
self.idx += 1
|
|
|
|
# copy file
|
|
|
|
im = PIL.Image.fromarray(img.__array__())
|
|
|
|
im.save(os.path.join(self.out_dir, f"{filename}.jpg"), format="jpeg")
|
|
|
|
im.close()
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
def processFile(f: str) -> bool:
|
2022-03-17 18:12:52 +00:00
|
|
|
# check suffix
|
|
|
|
if not validSuffix(f):
|
|
|
|
log(f, ErrorCodes.Extension)
|
|
|
|
FileLogger().log(f, ErrorCodes.Extension)
|
2022-03-17 20:24:56 +00:00
|
|
|
return False
|
2022-03-17 18:12:52 +00:00
|
|
|
|
|
|
|
# check file size
|
|
|
|
if not validFileSize(f):
|
|
|
|
log(f, ErrorCodes.FileSize)
|
|
|
|
FileLogger().log(f, ErrorCodes.FileSize)
|
2022-03-17 20:24:56 +00:00
|
|
|
return False
|
2022-03-17 18:12:52 +00:00
|
|
|
|
|
|
|
img = openImage(f)
|
|
|
|
if img is None:
|
|
|
|
log(f, ErrorCodes.InvalidImage)
|
|
|
|
FileLogger().log(f, ErrorCodes.InvalidImage)
|
2022-03-17 20:24:56 +00:00
|
|
|
return False
|
2022-03-17 18:12:52 +00:00
|
|
|
|
|
|
|
if not validImageShape(img):
|
|
|
|
log(f, ErrorCodes.ImageShape)
|
|
|
|
FileLogger().log(f, ErrorCodes.ImageShape)
|
2022-03-17 20:24:56 +00:00
|
|
|
return False
|
2022-03-17 18:12:52 +00:00
|
|
|
|
|
|
|
if not validVariance(img):
|
|
|
|
log(f, ErrorCodes.VarianceLZero)
|
|
|
|
FileLogger().log(f, ErrorCodes.VarianceLZero)
|
2022-03-17 20:24:56 +00:00
|
|
|
return False
|
2022-03-17 18:12:52 +00:00
|
|
|
|
|
|
|
# lets copy the image data
|
2022-03-17 20:24:56 +00:00
|
|
|
if not FileCopy().copyFile(img):
|
|
|
|
log(f, ErrorCodes.Duplicate)
|
|
|
|
FileLogger().log(f, ErrorCodes.Duplicate)
|
|
|
|
return False
|
|
|
|
|
|
|
|
# ok nice if we are here we have successfully copied the img (:
|
2022-03-17 18:12:52 +00:00
|
|
|
|
|
|
|
# close image
|
|
|
|
img.close()
|
2022-03-17 20:24:56 +00:00
|
|
|
return True
|
2022-03-17 18:12:52 +00:00
|
|
|
|
|
|
|
|
2022-03-17 20:24:56 +00:00
|
|
|
def validate_images(input_dir: str, output_dir: str, log_file: str, formatter: str = "05d") -> int:
|
2022-03-17 18:12:52 +00:00
|
|
|
# check if out_put dir exists
|
|
|
|
mkdirIfNotExists(output_dir)
|
|
|
|
|
|
|
|
# check if logfile dir exists
|
|
|
|
logdir = os.path.dirname(log_file)
|
|
|
|
mkdirIfNotExists(logdir)
|
|
|
|
|
|
|
|
# init file logger
|
|
|
|
FileLogger().init(log_file)
|
2022-03-17 20:24:56 +00:00
|
|
|
FileCopy().init(output_dir, formatter)
|
2022-03-17 18:12:52 +00:00
|
|
|
|
|
|
|
# scan input dir for files
|
2022-03-17 20:24:56 +00:00
|
|
|
files = glob(input_dir + '/**/*.*', recursive=True)
|
|
|
|
|
|
|
|
# sort filenames
|
|
|
|
files.sort()
|
|
|
|
|
|
|
|
succcounter = 0
|
|
|
|
for f in files:
|
|
|
|
if processFile(f):
|
|
|
|
succcounter += 1
|
|
|
|
|
|
|
|
FileLogger().close()
|
|
|
|
return succcounter
|
2022-03-17 18:12:52 +00:00
|
|
|
|
|
|
|
|
|
|
|
# Press the green button in the gutter to run the script.
|
|
|
|
if __name__ == '__main__':
|
2022-03-17 20:24:56 +00:00
|
|
|
print(validate_images("unittest/unittest_input_0", "unittest/outputs/unittest_input_0",
|
|
|
|
"unittest/outputs/unittest_input_0.log", "06d"))
|