Compare commits
2 Commits
de688e4a1e
...
bcbb9bb9de
Author | SHA1 | Date | |
---|---|---|---|
bcbb9bb9de | |||
7ef0bb21b2 |
@ -42,10 +42,10 @@
|
||||
#show: jku-thesis.with(
|
||||
thesis-type: "Bachelor",
|
||||
degree: "Bachelor of Science",
|
||||
program: "Artifical Intelligence Studies",
|
||||
program: "Artifical Intelligence",
|
||||
supervisor: "Professor Scharinger Josef",
|
||||
advisors: (), // singular advisor like this: ("Dr. Felix Pawsworth",) and no supervisor: ""
|
||||
department: "Department of Image processing",
|
||||
department: "Institute of Computational Perception",
|
||||
author: "Lukas Heiligenbrunner",
|
||||
date: date,
|
||||
place-of-submission: "Linz",
|
Before Width: | Height: | Size: 1.4 MiB After Width: | Height: | Size: 1.4 MiB |
Before Width: | Height: | Size: 66 KiB After Width: | Height: | Size: 66 KiB |
@ -1,5 +0,0 @@
|
||||
\section{Conclusion and Outlook}\label{sec:conclusion-and-outlook}
|
||||
|
||||
\subsection{Conclusion}\label{subsec:conclusion}
|
||||
|
||||
\subsection{Outlook}\label{subsec:outlook}
|
@ -1,16 +0,0 @@
|
||||
\section{Experimental Results}\label{sec:experimental-results}
|
||||
|
||||
\subsubsection{Is Few-Shot learning a suitable fit for anomaly detection?}
|
||||
|
||||
Should Few-Shot learning be used for anomaly detection tasks?
|
||||
How does it compare to well established algorithms such as Patchcore or EfficientAD?
|
||||
|
||||
\subsubsection{How does disbalancing the Shot number affect performance?}
|
||||
Does giving the Few-Shot learner more good than bad samples improve the model performance?
|
||||
|
||||
\subsubsection{How does the 3 (ResNet, CAML, \pmf) methods perform in only detecting the anomaly class?}
|
||||
How much does the performance improve if only detecting an anomaly or not?
|
||||
How does it compare to PatchCore and EfficientAD?
|
||||
|
||||
\subsubsection{Extra: How does Euclidean distance compare to Cosine-similarity when using ResNet as a feature-extractor?}
|
||||
I've tried different distance measures $\rightarrow$ but results are pretty much the same.
|
@ -1,17 +0,0 @@
|
||||
\section{Implementation}\label{sec:implementation}
|
||||
|
||||
\subsection{Experiment Setup}\label{subsec:experiment-setup}
|
||||
% todo
|
||||
todo setup of experiments, which classes used, nr of samples
|
||||
kinds of experiments which lead to graphs
|
||||
|
||||
\subsection{Jupyter}\label{subsec:jupyter}
|
||||
|
||||
To get accurate performance measures the active-learning process was implemented in a Jupyter notebook first.
|
||||
This helps to choose which of the methods performs the best and which one to use in the final Dagster pipeline.
|
||||
A straight forward machine-learning pipeline was implemented with the help of Pytorch and RESNet-18.
|
||||
|
||||
Moreover, the Dataset was manually imported with the help of a custom torch dataloader and preprocessed with random augmentations.
|
||||
After each loop iteration the Area Under the Curve (AUC) was calculated over the validation set to get a performance measure.
|
||||
All those AUC were visualized in a line plot, see section~\ref{sec:experimental-results} for the results.
|
||||
|
@ -1,31 +0,0 @@
|
||||
\section{Introduction}\label{sec:introduction}
|
||||
\subsection{Motivation}\label{subsec:motivation}
|
||||
Anomaly detection has especially in the industrial and automotive field essential importance.
|
||||
Lots of assembly lines need visual inspection to find errors often with the help of camera systems.
|
||||
Machine learning helped the field to advance a lot in the past.
|
||||
PatchCore and EfficientAD are state of the art algorithms trained only on good data and then detect anomalies within unseen (but similar) data.
|
||||
One of their problems is the need of lots of training data and time to train.
|
||||
Few-Shot learning might be a suitable alternative with essentially lowered train time.
|
||||
|
||||
In this thesis the performance of 3 Few-Shot learning algorithms will be compared in the field of anomaly detection.
|
||||
Moreover, few-shot learning might be able not only to detect anomalies but also to detect the anomaly class.
|
||||
|
||||
\subsection{Research Questions}\label{subsec:research-questions}
|
||||
|
||||
\subsubsection{Is Few-Shot learning a suitable fit for anomaly detection?}
|
||||
|
||||
Should Few-Shot learning be used for anomaly detection tasks?
|
||||
How does it compare to well established algorithms such as Patchcore or EfficientAD?
|
||||
|
||||
\subsubsection{How does disbalancing the Shot number affect performance?}
|
||||
Does giving the Few-Shot learner more good than bad samples improve the model performance?
|
||||
|
||||
\subsubsection{How does the 3 (ResNet, CAML, \pmf) methods perform in only detecting the anomaly class?}
|
||||
How much does the performance improve if only detecting an anomaly or not?
|
||||
How does it compare to PatchCore and EfficientAD?
|
||||
|
||||
\subsubsection{Extra: How does Euclidean distance compare to Cosine-similarity when using ResNet as a feature-extractor?}
|
||||
I've tried different distance measures $\rightarrow$ but results are pretty much the same.
|
||||
|
||||
\subsection{Outline}\label{subsec:outline}
|
||||
todo
|
1189
src/llncs.cls
1189
src/llncs.cls
File diff suppressed because it is too large
Load Diff
160
src/main.tex
160
src/main.tex
@ -1,160 +0,0 @@
|
||||
\def\ieee{0}
|
||||
|
||||
\if\ieee1
|
||||
\documentclass[sigconf]{acmart}
|
||||
\else
|
||||
\documentclass{llncs}
|
||||
\fi
|
||||
\usepackage{amsmath}
|
||||
\usepackage{mathtools}
|
||||
\usepackage{hyperref}
|
||||
\usepackage{listings}
|
||||
|
||||
\usepackage{xcolor}
|
||||
|
||||
\usepackage{subfig}
|
||||
|
||||
\usepackage[inline]{enumitem}
|
||||
\usepackage{color}
|
||||
|
||||
\usepackage{tikz}
|
||||
\usetikzlibrary{shapes.geometric, arrows}
|
||||
|
||||
\tikzstyle{startstop} = [rectangle, rounded corners, minimum width=3cm, minimum height=1cm,text centered, draw=black, fill=red!30]
|
||||
\tikzstyle{io} = [rectangle, rounded corners,minimum width=3cm, minimum height=1cm, text centered, draw=black, fill=blue!30]
|
||||
\tikzstyle{process} = [rectangle, minimum width=3cm, minimum height=1cm, text centered, draw=black, fill=orange!30]
|
||||
\tikzstyle{decision} = [diamond, minimum width=3cm, minimum height=1cm, text centered, draw=black, fill=green!30]
|
||||
\tikzstyle{arrow} = [thick,->,>=stealth]
|
||||
|
||||
\definecolor{codegreen}{rgb}{0,0.6,0}
|
||||
\definecolor{codegray}{rgb}{0.5,0.5,0.5}
|
||||
\definecolor{codepurple}{rgb}{0.58,0,0.82}
|
||||
\definecolor{backcolour}{rgb}{0.95,0.95,0.92}
|
||||
|
||||
\lstdefinestyle{mystyle}{
|
||||
backgroundcolor=\color{backcolour},
|
||||
commentstyle=\color{codegreen},
|
||||
keywordstyle=\color{magenta},
|
||||
numberstyle=\tiny\color{codegray},
|
||||
stringstyle=\color{codepurple},
|
||||
basicstyle=\ttfamily\scriptsize,
|
||||
breakatwhitespace=false,
|
||||
breaklines=true,
|
||||
captionpos=b,
|
||||
keepspaces=true,
|
||||
numbers=left,
|
||||
numbersep=5pt,
|
||||
showspaces=false,
|
||||
showstringspaces=false,
|
||||
showtabs=false,
|
||||
tabsize=2
|
||||
}
|
||||
|
||||
\lstset{style=mystyle}
|
||||
|
||||
|
||||
\newcommand{\pmf}{$P{>}M{>}F$}
|
||||
|
||||
|
||||
%\lstset{basicstyle=\ttfamily, keywordstyle=\bfseries}
|
||||
|
||||
\if\ieee1
|
||||
\settopmatter{printacmref=false} % Removes citation information below abstract
|
||||
\renewcommand\footnotetextcopyrightpermission[1]{} % removes footnote with conference information in first column
|
||||
\pagestyle{plain} % removes running headers
|
||||
\fi
|
||||
|
||||
%%
|
||||
%% \BibTeX command to typeset BibTeX logo in the docs
|
||||
\if\ieee1
|
||||
\AtBeginDocument{%
|
||||
\providecommand\BibTeX{{%
|
||||
\normalfont B\kern-0.5em{\scshape i\kern-0.25em b}\kern-0.8em\TeX}}}
|
||||
|
||||
\acmConference{Minimize labeling effort of Binary classification Tasks with Active learning}{2023}{Linz}
|
||||
\fi
|
||||
|
||||
% Document
|
||||
\begin{document}
|
||||
%%
|
||||
%% The "title" command has an optional parameter,
|
||||
%% allowing the author to define a "short title" to be used in page headers.
|
||||
\title{Few shot learning for anomaly detection\\ Bachelor Thesis for AI}
|
||||
|
||||
%%
|
||||
%% The "author" command and its associated commands are used to define
|
||||
%% the authors and their affiliations.
|
||||
%% Of note is the shared affiliation of the first two authors, and the
|
||||
%% "authornote" and "authornotemark" commands
|
||||
%% used to denote shared contribution to the research.
|
||||
\author{Lukas Heiligenbrunner}
|
||||
|
||||
\if\ieee1
|
||||
\email{k12104785@students.jku.at}
|
||||
\affiliation{%
|
||||
\institution{Johannes Kepler University Linz}
|
||||
\city{Linz}
|
||||
\state{Upperaustria}
|
||||
\country{Austria}
|
||||
\postcode{4020}
|
||||
}
|
||||
\else
|
||||
\institute{Johannes Kepler University Linz}
|
||||
\fi
|
||||
|
||||
|
||||
%%
|
||||
%% By default, the full list of authors will be used in the page
|
||||
%% headers. Often, this list is too long, and will overlap
|
||||
%% other information printed in the page headers. This command allows
|
||||
%% the author to define a more concise list
|
||||
%% of authors' names for this purpose.
|
||||
% \renewcommand{\shortauthors}{Lukas Heilgenbrunner}
|
||||
|
||||
%%
|
||||
%% The abstract is a short summary of the work to be presented in the
|
||||
%% article.
|
||||
\if\ieee0
|
||||
\maketitle
|
||||
\fi
|
||||
|
||||
\begin{abstract}
|
||||
Todo abstract!!
|
||||
\end{abstract}
|
||||
|
||||
%%
|
||||
%% Keywords. The author(s) should pick words that accurately describe
|
||||
%% the work being presented. Separate the keywords with commas.
|
||||
\if\ieee1
|
||||
\keywords{neural networks, ResNET, pseudo-labeling, active-learning}
|
||||
\fi
|
||||
|
||||
%\received{20 February 2007}
|
||||
%\received[revised]{12 March 2009}
|
||||
%\received[accepted]{5 June 2009}
|
||||
|
||||
%%
|
||||
%% This command processes the author and affiliation and title
|
||||
%% information and builds the first part of the formatted document.
|
||||
\if\ieee1
|
||||
\maketitle
|
||||
\fi
|
||||
\input{introduction}
|
||||
\input{materialandmethods}
|
||||
\input{implementation}
|
||||
\input{experimentalresults}
|
||||
\input{conclusionandoutlook}
|
||||
|
||||
%% The next two lines define the bibliography style to be used, and
|
||||
%% the bibliography file.
|
||||
\bibliographystyle{ACM-Reference-Format}
|
||||
\bibliography{../src/sources}
|
||||
|
||||
%%
|
||||
%% If your work has an appendix, this is the place to put it.
|
||||
\appendix
|
||||
|
||||
% appendix
|
||||
|
||||
\end{document}
|
||||
\endinput
|
@ -1,122 +0,0 @@
|
||||
\section{Material and Methods}\label{sec:material-and-methods}
|
||||
|
||||
\subsection{Material}\label{subsec:material}
|
||||
|
||||
\subsubsection{MVTec AD}\label{subsubsec:mvtecad}
|
||||
MVTec AD is a dataset for benchmarking anomaly detection methods with a focus on industrial inspection.
|
||||
It contains over 5000 high-resolution images divided into fifteen different object and texture categories.
|
||||
Each category comprises a set of defect-free training images and a test set of images with various kinds of defects as well as images without defects.
|
||||
|
||||
% todo source for https://www.mvtec.com/company/research/datasets/mvtec-ad
|
||||
|
||||
% todo example image
|
||||
%\begin{figure}
|
||||
% \centering
|
||||
% \includegraphics[width=\linewidth/2]{../rsc/muffin_chiauaua_poster}
|
||||
% \caption{Sample images from dataset. \cite{muffinsvschiuahuakaggle_poster}}
|
||||
% \label{fig:roc-example}
|
||||
%\end{figure}
|
||||
|
||||
|
||||
\subsection{Methods}\label{subsec:methods}
|
||||
|
||||
\subsubsection{Few-Shot Learning}
|
||||
Few-Shot learning is a subfield of machine-learning which aims to train a classification-model with just a few or no samples at all.
|
||||
In contrast to traditional supervised learning where a huge amount of labeled data is required is to generalize well to unseen data.
|
||||
So the model is prone to overfitting to the few training samples.
|
||||
|
||||
Typically a few-shot leaning task consists of a support and query set.
|
||||
Where the support-set contains the training data and the query set the evaluation data for real world evaluation.
|
||||
A common way to format a few-shot leaning problem is using n-way k-shot notation.
|
||||
For Example 3 target classeas and 5 samples per class for training might be a 3-way 5-shot few-shot classification problem.
|
||||
|
||||
A classical example of how such a model might work is a prototypical network.
|
||||
These models learn a representation of each class and classify new examples based on proximity to these representations in an embedding space.
|
||||
|
||||
The first and easiest method of this bachelor thesis uses a simple ResNet to calucalte those embeddings and is basically a simple prototypical netowrk.
|
||||
See %todo link to this section
|
||||
% todo proper source
|
||||
|
||||
\subsubsection{Generalisation from few samples}
|
||||
|
||||
\subsubsection{Patchcore}
|
||||
|
||||
%todo also show values how they perform on MVTec AD
|
||||
|
||||
\subsubsection{EfficientAD}
|
||||
todo stuff~\cite{patchcorepaper}
|
||||
% https://arxiv.org/pdf/2106.08265
|
||||
todo stuff\cite{efficientADpaper}
|
||||
% https://arxiv.org/pdf/2303.14535
|
||||
|
||||
\subsubsection{Jupyter Notebook}\label{subsubsec:jupyternb}
|
||||
|
||||
A Jupyter notebook is a shareable document which combines code and its output, text and visualizations.
|
||||
The notebook along with the editor provides a environment for fast prototyping and data analysis.
|
||||
It is widely used in the data science, mathematics and machine learning community.
|
||||
|
||||
In the context of this practical work it can be used to test and evaluate the active learning loop before implementing it in a Dagster pipeline. \cite{jupyter}
|
||||
|
||||
\subsubsection{CNN}
|
||||
Convolutional neural networks are especially good model architectures for processing images, speech and audio signals.
|
||||
A CNN typically consists of Convolutional layers, pooling layers and fully connected layers.
|
||||
Convolutional layers are a set of learnable kernels (filters).
|
||||
Each filter performs a convolution operation by sliding a window over every pixel of the image.
|
||||
On each pixel a dot product creates a feature map.
|
||||
Convolutional layers capture features like edges, textures or shapes.
|
||||
Pooling layers sample down the feature maps created by the convolutional layers.
|
||||
This helps reducing the computational complexity of the overall network and help with overfitting.
|
||||
Common pooling layers include average- and max pooling.
|
||||
Finally, after some convolution layers the feature map is flattened and passed to a network of fully connected layers to perform a classification or regression task.
|
||||
Figure~\ref{fig:cnn-architecture} shows a typical binary classification task.
|
||||
\cite{cnnintro}
|
||||
|
||||
\begin{figure}
|
||||
\centering
|
||||
\includegraphics[width=\linewidth]{../rsc/cnn_architecture}
|
||||
\caption{Architecture convolutional neural network. \cite{cnnarchitectureimg}}
|
||||
\label{fig:cnn-architecture}
|
||||
\end{figure}
|
||||
|
||||
\subsubsection{RESNet}
|
||||
|
||||
Residual neural networks are a special type of neural network architecture.
|
||||
They are especially good for deep learning and have been used in many state-of-the-art computer vision tasks.
|
||||
The main idea behind ResNet is the skip connection.
|
||||
The skip connection is a direct connection from one layer to another layer which is not the next layer.
|
||||
This helps to avoid the vanishing gradient problem and helps with the training of very deep networks.
|
||||
ResNet has proven to be very successful in many computer vision tasks and is used in this practical work for the classification task.
|
||||
There are several different ResNet architectures, the most common are ResNet-18, ResNet-34, ResNet-50, ResNet-101 and ResNet-152. \cite{resnet}
|
||||
|
||||
Since the dataset is relatively small and the two class classification task is relatively easy (for such a large model) the ResNet-18 architecture is used in this practical work.
|
||||
|
||||
\subsubsection{CAML}
|
||||
Todo
|
||||
\subsubsection{P$>$M$>$F}
|
||||
Todo
|
||||
|
||||
\subsubsection{Softmax}
|
||||
|
||||
The Softmax function~\eqref{eq:softmax}\cite{liang2017soft} converts $n$ numbers of a vector into a probability distribution.
|
||||
Its a generalization of the Sigmoid function and often used as an Activation Layer in neural networks.
|
||||
\begin{equation}\label{eq:softmax}
|
||||
\sigma(\mathbf{z})_j = \frac{e^{z_j}}{\sum_{k=1}^K e^{z_k}} \; for j\coloneqq\{1,\dots,K\}
|
||||
\end{equation}
|
||||
|
||||
The softmax function has high similarities with the Boltzmann distribution and was first introduced in the 19$^{\textrm{th}}$ century~\cite{Boltzmann}.
|
||||
|
||||
|
||||
\subsubsection{Cross Entropy Loss}
|
||||
Cross Entropy Loss is a well established loss function in machine learning.
|
||||
Equation~\eqref{eq:crelformal}\cite{crossentropy} shows the formal general definition of the Cross Entropy Loss.
|
||||
And equation~\eqref{eq:crelbinary} is the special case of the general Cross Entropy Loss for binary classification tasks.
|
||||
|
||||
\begin{align}
|
||||
H(p,q) &= -\sum_{x\in\mathcal{X}} p(x)\, \log q(x)\label{eq:crelformal}\\
|
||||
H(p,q) &= - (p \log q + (1-p) \log(1-q))\label{eq:crelbinary}\\
|
||||
\mathcal{L}(p,q) &= - \frac1N \sum_{i=1}^{\mathcal{B}} (p_i \log q_i + (1-p_i) \log(1-q_i))\label{eq:crelbinarybatch}
|
||||
\end{align}
|
||||
|
||||
Equation~$\mathcal{L}(p,q)$~\eqref{eq:crelbinarybatch}\cite{handsonaiI} is the Binary Cross Entropy Loss for a batch of size $\mathcal{B}$ and used for model training in this Practical Work.
|
||||
|
||||
\subsubsection{Mathematical modeling of problem}\label{subsubsec:mathematicalmodeling}
|
@ -1,37 +0,0 @@
|
||||
%! Author = lukas
|
||||
%! Date = 4/9/24
|
||||
|
||||
@InProceedings{crossentropy,
|
||||
ISSN = {00359246},
|
||||
URL = {http://www.jstor.org/stable/2984087},
|
||||
abstract = {This paper deals first with the relationship between the theory of probability and the theory of rational behaviour. A method is then suggested for encouraging people to make accurate probability estimates, a connection with the theory of information being mentioned. Finally Wald's theory of statistical decision functions is summarised and generalised and its relation to the theory of rational behaviour is discussed.},
|
||||
author = {I. J. Good},
|
||||
journal = {Journal of the Royal Statistical Society. Series B (Methodological)},
|
||||
number = {1},
|
||||
pages = {107--114},
|
||||
publisher = {[Royal Statistical Society, Wiley]},
|
||||
title = {Rational Decisions},
|
||||
urldate = {2024-05-23},
|
||||
volume = {14},
|
||||
year = {1952}
|
||||
}
|
||||
|
||||
@misc{efficientADpaper,
|
||||
title={EfficientAD: Accurate Visual Anomaly Detection at Millisecond-Level Latencies},
|
||||
author={Kilian Batzner and Lars Heckler and Rebecca König},
|
||||
year={2024},
|
||||
eprint={2303.14535},
|
||||
archivePrefix={arXiv},
|
||||
primaryClass={cs.CV},
|
||||
url={https://arxiv.org/abs/2303.14535},
|
||||
}
|
||||
|
||||
@misc{patchcorepaper,
|
||||
title={Towards Total Recall in Industrial Anomaly Detection},
|
||||
author={Karsten Roth and Latha Pemula and Joaquin Zepeda and Bernhard Schölkopf and Thomas Brox and Peter Gehler},
|
||||
year={2022},
|
||||
eprint={2106.08265},
|
||||
archivePrefix={arXiv},
|
||||
primaryClass={cs.CV},
|
||||
url={https://arxiv.org/abs/2106.08265},
|
||||
}
|
Binary file not shown.
Before Width: | Height: | Size: 94 KiB |
Loading…
x
Reference in New Issue
Block a user