Compare commits
No commits in common. "a71dc30822f04f6d127d82bb60340c5f713fc716" and "de27f954c11652bd2c987719266a14d161762228" have entirely different histories.
a71dc30822
...
de27f954c1
@ -1,31 +0,0 @@
|
|||||||
= Introduction
|
|
||||||
== Motivation
|
|
||||||
Anomaly detection has especially in the industrial and automotive field essential importance.
|
|
||||||
Lots of assembly lines need visual inspection to find errors often with the help of camera systems.
|
|
||||||
Machine learning helped the field to advance a lot in the past.
|
|
||||||
PatchCore and EfficientAD are state of the art algorithms trained only on good data and then detect anomalies within unseen (but similar) data.
|
|
||||||
One of their problems is the need of lots of training data and time to train.
|
|
||||||
Few-Shot learning might be a suitable alternative with essentially lowered train time.
|
|
||||||
|
|
||||||
In this thesis the performance of 3 Few-Shot learning algorithms will be compared in the field of anomaly detection.
|
|
||||||
Moreover, few-shot learning might be able not only to detect anomalies but also to detect the anomaly class.
|
|
||||||
|
|
||||||
== Research Questions
|
|
||||||
|
|
||||||
=== Is Few-Shot learning a suitable fit for anomaly detection?
|
|
||||||
|
|
||||||
Should Few-Shot learning be used for anomaly detection tasks?
|
|
||||||
How does it compare to well established algorithms such as Patchcore or EfficientAD?
|
|
||||||
|
|
||||||
=== How does disbalancing the Shot number affect performance?
|
|
||||||
Does giving the Few-Shot learner more good than bad samples improve the model performance?
|
|
||||||
|
|
||||||
=== How does the 3 (ResNet, CAML, \pmf) methods perform in only detecting the anomaly class?
|
|
||||||
How much does the performance improve if only detecting an anomaly or not?
|
|
||||||
How does it compare to PatchCore and EfficientAD?
|
|
||||||
|
|
||||||
=== Extra: How does Euclidean distance compare to Cosine-similarity when using ResNet as a feature-extractor?
|
|
||||||
I've tried different distance measures $->$ but results are pretty much the same.
|
|
||||||
|
|
||||||
== Outline
|
|
||||||
todo
|
|
@ -1,82 +0,0 @@
|
|||||||
#import "@preview/springer-spaniel:0.1.0"
|
|
||||||
#import springer-spaniel.ctheorems: * // provides "proof", "theorem", "lemma"
|
|
||||||
|
|
||||||
// Set citation style
|
|
||||||
#set cite(style: "iso-690-author-date") // page info visible
|
|
||||||
//#set cite(style: "iso-690-numeric") // page info visible
|
|
||||||
//#set cite(style: "springer-basic")// no additional info visible (page number in square brackets)
|
|
||||||
//#set cite(style: "alphanumeric")// page info not visible
|
|
||||||
|
|
||||||
|
|
||||||
#show: springer-spaniel.template(
|
|
||||||
title: [Few shot learning for anomaly detection Bachelor Thesis for AI],
|
|
||||||
authors: (
|
|
||||||
(
|
|
||||||
name: "Lukas Heiligenbrunner",
|
|
||||||
institute: "Johannes Kepler University",
|
|
||||||
address: "Linz, Austria",
|
|
||||||
email: "lukas.heiligenbrunner@gmail.com"
|
|
||||||
),
|
|
||||||
// ... and so on
|
|
||||||
),
|
|
||||||
abstract: lorem(75),
|
|
||||||
|
|
||||||
// debug: true, // Highlights structural elements and links
|
|
||||||
// frame: 1pt, // A border around the page for white on white display
|
|
||||||
// printer-test: true, // Suitably placed CMYK printer tests
|
|
||||||
)
|
|
||||||
|
|
||||||
#let date = datetime.today() // not today: datetime(year: 1969, month: 9, day: 6,)
|
|
||||||
#let k-number = "k12345678"
|
|
||||||
|
|
||||||
// set equation and heading numbering
|
|
||||||
#set math.equation(numbering: "(1)")
|
|
||||||
#set heading(numbering: "1.1")
|
|
||||||
|
|
||||||
|
|
||||||
// Pagebreak after level 1 headings
|
|
||||||
#show heading.where(level: 1): it => [
|
|
||||||
#pagebreak(weak: true)
|
|
||||||
#it
|
|
||||||
]
|
|
||||||
|
|
||||||
// show reference targets in brackets
|
|
||||||
#show ref: it => {
|
|
||||||
let el = it.element
|
|
||||||
if el != none and el.func() == heading {
|
|
||||||
|
|
||||||
[#it (#el.body)]
|
|
||||||
} else [#it]
|
|
||||||
}
|
|
||||||
|
|
||||||
// style table-of-contents
|
|
||||||
#show outline.entry.where(
|
|
||||||
level: 1
|
|
||||||
): it => {
|
|
||||||
v(1em, weak: true)
|
|
||||||
strong(it)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Table of contents.
|
|
||||||
#outline(
|
|
||||||
title: {
|
|
||||||
text(1.3em, weight: 700, "Contents")
|
|
||||||
v(10mm)
|
|
||||||
},
|
|
||||||
indent: 2em,
|
|
||||||
depth: 3
|
|
||||||
)<outline>
|
|
||||||
#pagebreak(weak: false)
|
|
||||||
|
|
||||||
#include "introduction.typ"
|
|
||||||
#include "materialandmethods.typ"
|
|
||||||
|
|
||||||
= Section Heading
|
|
||||||
#cite(<efficientADpaper>)
|
|
||||||
== Subsection Heading
|
|
||||||
=== Subsubsection Heading
|
|
||||||
==== Paragraph Heading
|
|
||||||
===== Subparagraph Heading
|
|
||||||
|
|
||||||
#set par(leading: 0.7em, first-line-indent: 0em, justify: true)
|
|
||||||
#bibliography("sources.bib", style: "apa")
|
|
@ -1,121 +0,0 @@
|
|||||||
= Material and Methods
|
|
||||||
|
|
||||||
== Material
|
|
||||||
|
|
||||||
=== MVTec AD
|
|
||||||
MVTec AD is a dataset for benchmarking anomaly detection methods with a focus on industrial inspection.
|
|
||||||
It contains over 5000 high-resolution images divided into fifteen different object and texture categories.
|
|
||||||
Each category comprises a set of defect-free training images and a test set of images with various kinds of defects as well as images without defects.
|
|
||||||
|
|
||||||
// todo source for https://www.mvtec.com/company/research/datasets/mvtec-ad
|
|
||||||
|
|
||||||
// todo example image
|
|
||||||
//\begin{figure}
|
|
||||||
// \centering
|
|
||||||
// \includegraphics[width=\linewidth/2]{../rsc/muffin_chiauaua_poster}
|
|
||||||
// \caption{Sample images from dataset. \cite{muffinsvschiuahuakaggle_poster}}
|
|
||||||
// \label{fig:roc-example}
|
|
||||||
//\end{figure}
|
|
||||||
|
|
||||||
|
|
||||||
== Methods
|
|
||||||
|
|
||||||
=== Few-Shot Learning
|
|
||||||
Few-Shot learning is a subfield of machine-learning which aims to train a classification-model with just a few or no samples at all.
|
|
||||||
In contrast to traditional supervised learning where a huge amount of labeled data is required is to generalize well to unseen data.
|
|
||||||
So the model is prone to overfitting to the few training samples.
|
|
||||||
|
|
||||||
Typically a few-shot leaning task consists of a support and query set.
|
|
||||||
Where the support-set contains the training data and the query set the evaluation data for real world evaluation.
|
|
||||||
A common way to format a few-shot leaning problem is using n-way k-shot notation.
|
|
||||||
For Example 3 target classeas and 5 samples per class for training might be a 3-way 5-shot few-shot classification problem.
|
|
||||||
|
|
||||||
A classical example of how such a model might work is a prototypical network.
|
|
||||||
These models learn a representation of each class and classify new examples based on proximity to these representations in an embedding space.
|
|
||||||
|
|
||||||
The first and easiest method of this bachelor thesis uses a simple ResNet to calucalte those embeddings and is basically a simple prototypical netowrk.
|
|
||||||
See //%todo link to this section
|
|
||||||
// todo proper source
|
|
||||||
|
|
||||||
=== Generalisation from few samples}
|
|
||||||
|
|
||||||
=== Patchcore}
|
|
||||||
|
|
||||||
%todo also show values how they perform on MVTec AD
|
|
||||||
|
|
||||||
=== EfficientAD
|
|
||||||
todo stuff #cite(<patchcorepaper>)
|
|
||||||
// https://arxiv.org/pdf/2106.08265
|
|
||||||
todo stuff #cite(<efficientADpaper>)
|
|
||||||
// https://arxiv.org/pdf/2303.14535
|
|
||||||
|
|
||||||
=== Jupyter Notebook
|
|
||||||
|
|
||||||
A Jupyter notebook is a shareable document which combines code and its output, text and visualizations.
|
|
||||||
The notebook along with the editor provides a environment for fast prototyping and data analysis.
|
|
||||||
It is widely used in the data science, mathematics and machine learning community.
|
|
||||||
|
|
||||||
In the context of this practical work it can be used to test and evaluate the active learning loop before implementing it in a Dagster pipeline. #cite(<jupyter>)
|
|
||||||
|
|
||||||
=== CNN
|
|
||||||
Convolutional neural networks are especially good model architectures for processing images, speech and audio signals.
|
|
||||||
A CNN typically consists of Convolutional layers, pooling layers and fully connected layers.
|
|
||||||
Convolutional layers are a set of learnable kernels (filters).
|
|
||||||
Each filter performs a convolution operation by sliding a window over every pixel of the image.
|
|
||||||
On each pixel a dot product creates a feature map.
|
|
||||||
Convolutional layers capture features like edges, textures or shapes.
|
|
||||||
Pooling layers sample down the feature maps created by the convolutional layers.
|
|
||||||
This helps reducing the computational complexity of the overall network and help with overfitting.
|
|
||||||
Common pooling layers include average- and max pooling.
|
|
||||||
Finally, after some convolution layers the feature map is flattened and passed to a network of fully connected layers to perform a classification or regression task.
|
|
||||||
@cnnarchitecture shows a typical binary classification task.
|
|
||||||
#cite(<cnnintro>)
|
|
||||||
|
|
||||||
#figure(
|
|
||||||
image("rsc/cnn_architecture.png", width: 80%),
|
|
||||||
caption: [Architecture convolutional neural network. #cite(<cnnarchitectureimg>)],
|
|
||||||
) <cnnarchitecture>
|
|
||||||
|
|
||||||
=== RESNet
|
|
||||||
|
|
||||||
Residual neural networks are a special type of neural network architecture.
|
|
||||||
They are especially good for deep learning and have been used in many state-of-the-art computer vision tasks.
|
|
||||||
The main idea behind ResNet is the skip connection.
|
|
||||||
The skip connection is a direct connection from one layer to another layer which is not the next layer.
|
|
||||||
This helps to avoid the vanishing gradient problem and helps with the training of very deep networks.
|
|
||||||
ResNet has proven to be very successful in many computer vision tasks and is used in this practical work for the classification task.
|
|
||||||
There are several different ResNet architectures, the most common are ResNet-18, ResNet-34, ResNet-50, ResNet-101 and ResNet-152. #cite(<resnet>)
|
|
||||||
|
|
||||||
Since the dataset is relatively small and the two class classification task is relatively easy (for such a large model) the ResNet-18 architecture is used in this practical work.
|
|
||||||
|
|
||||||
=== CAML
|
|
||||||
Todo
|
|
||||||
=== P$>$M$>$F
|
|
||||||
Todo
|
|
||||||
|
|
||||||
=== Softmax
|
|
||||||
|
|
||||||
The Softmax function @softmax #cite(<liang2017soft>) converts $n$ numbers of a vector into a probability distribution.
|
|
||||||
Its a generalization of the Sigmoid function and often used as an Activation Layer in neural networks.
|
|
||||||
|
|
||||||
$
|
|
||||||
sigma(bold(z))_j = (e^(z_j)) / (sum_(k=1)^k e^(z_k)) "for" j=(1,...,k)
|
|
||||||
$ <softmax>
|
|
||||||
|
|
||||||
The softmax function has high similarities with the Boltzmann distribution and was first introduced in the 19th century #cite(<Boltzmann>).
|
|
||||||
|
|
||||||
|
|
||||||
=== Cross Entropy Loss
|
|
||||||
Cross Entropy Loss is a well established loss function in machine learning.
|
|
||||||
Equation~\eqref{eq:crelformal}\cite{crossentropy} shows the formal general definition of the Cross Entropy Loss.
|
|
||||||
And equation~\eqref{eq:crelbinary} is the special case of the general Cross Entropy Loss for binary classification tasks.
|
|
||||||
|
|
||||||
$
|
|
||||||
H(p,q) &= -sum_(x in cal(X)) p(x) log q(x)\
|
|
||||||
H(p,q) &= -p log(q) + (1-p) log(1-q)\
|
|
||||||
cal(L)(p,q) &= -1/N sum_(i=1)^(cal(B)) (p_i log(q_i) + (1-p_i) log(1-q_i))
|
|
||||||
$
|
|
||||||
|
|
||||||
Equation~$cal(L)(p,q)$~\eqref{eq:crelbinarybatch}\cite{handsonaiI} is the Binary Cross Entropy Loss for a batch of size $cal(B)$ and used for model training in this Practical Work.
|
|
||||||
|
|
||||||
=== Mathematical modeling of problem
|
|
Binary file not shown.
Before Width: | Height: | Size: 94 KiB |
@ -1,92 +0,0 @@
|
|||||||
%! Author = lukas
|
|
||||||
%! Date = 4/9/24
|
|
||||||
|
|
||||||
@InProceedings{crossentropy,
|
|
||||||
ISSN = {00359246},
|
|
||||||
URL = {http://www.jstor.org/stable/2984087},
|
|
||||||
abstract = {This paper deals first with the relationship between the theory of probability and the theory of rational behaviour. A method is then suggested for encouraging people to make accurate probability estimates, a connection with the theory of information being mentioned. Finally Wald's theory of statistical decision functions is summarised and generalised and its relation to the theory of rational behaviour is discussed.},
|
|
||||||
author = {I. J. Good},
|
|
||||||
journal = {Journal of the Royal Statistical Society. Series B (Methodological)},
|
|
||||||
number = {1},
|
|
||||||
pages = {107--114},
|
|
||||||
publisher = {[Royal Statistical Society, Wiley]},
|
|
||||||
title = {Rational Decisions},
|
|
||||||
urldate = {2024-05-23},
|
|
||||||
volume = {14},
|
|
||||||
year = {1952}
|
|
||||||
}
|
|
||||||
|
|
||||||
@misc{efficientADpaper,
|
|
||||||
title={EfficientAD: Accurate Visual Anomaly Detection at Millisecond-Level Latencies},
|
|
||||||
author={Kilian Batzner and Lars Heckler and Rebecca König},
|
|
||||||
year={2024},
|
|
||||||
eprint={2303.14535},
|
|
||||||
archivePrefix={arXiv},
|
|
||||||
primaryClass={cs.CV},
|
|
||||||
url={https://arxiv.org/abs/2303.14535},
|
|
||||||
}
|
|
||||||
|
|
||||||
@misc{patchcorepaper,
|
|
||||||
title={Towards Total Recall in Industrial Anomaly Detection},
|
|
||||||
author={Karsten Roth and Latha Pemula and Joaquin Zepeda and Bernhard Schölkopf and Thomas Brox and Peter Gehler},
|
|
||||||
year={2022},
|
|
||||||
eprint={2106.08265},
|
|
||||||
archivePrefix={arXiv},
|
|
||||||
primaryClass={cs.CV},
|
|
||||||
url={https://arxiv.org/abs/2106.08265},
|
|
||||||
}
|
|
||||||
|
|
||||||
@misc{jupyter,
|
|
||||||
author = {},
|
|
||||||
title = {{Project Jupyter Documentation}},
|
|
||||||
howpublished = "\url{https://docs.jupyter.org/en/latest/}",
|
|
||||||
year = {2024},
|
|
||||||
note = "[Online; accessed 13-May-2024]"
|
|
||||||
}
|
|
||||||
|
|
||||||
@misc{cnnintro,
|
|
||||||
title={An Introduction to Convolutional Neural Networks},
|
|
||||||
author={Keiron O'Shea and Ryan Nash},
|
|
||||||
year={2015},
|
|
||||||
eprint={1511.08458},
|
|
||||||
archivePrefix={arXiv},
|
|
||||||
primaryClass={cs.NE}
|
|
||||||
}
|
|
||||||
|
|
||||||
@misc{cnnarchitectureimg,
|
|
||||||
author = {},
|
|
||||||
title = {{What are convolutional neural networks?}},
|
|
||||||
howpublished = "\url{https://cointelegraph.com/explained/what-are-convolutional-neural-networks}",
|
|
||||||
year = {2024},
|
|
||||||
note = "[Online; accessed 12-April-2024]"
|
|
||||||
}
|
|
||||||
|
|
||||||
@inproceedings{liang2017soft,
|
|
||||||
title={Soft-margin softmax for deep classification},
|
|
||||||
author={Liang, Xuezhi and Wang, Xiaobo and Lei, Zhen and Liao, Shengcai and Li, Stan Z},
|
|
||||||
booktitle={International Conference on Neural Information Processing},
|
|
||||||
pages={413--421},
|
|
||||||
year={2017},
|
|
||||||
organization={Springer}
|
|
||||||
}
|
|
||||||
|
|
||||||
@inbook{Boltzmann,
|
|
||||||
place = {Cambridge},
|
|
||||||
series = {Cambridge Library Collection - Physical Sciences},
|
|
||||||
title = {Studien über das Gleichgewicht der lebendigen Kraft zwischen bewegten materiellen Punkten},
|
|
||||||
booktitle = {Wissenschaftliche Abhandlungen},
|
|
||||||
publisher = {Cambridge University Press},
|
|
||||||
author = {Boltzmann, Ludwig},
|
|
||||||
editor = {Hasenöhrl, FriedrichEditor},
|
|
||||||
year = {2012},
|
|
||||||
pages = {49–96},
|
|
||||||
collection = {Cambridge Library Collection - Physical Sciences}, key = {value},}
|
|
||||||
|
|
||||||
@misc{resnet,
|
|
||||||
title={Deep Residual Learning for Image Recognition},
|
|
||||||
author={Kaiming He and Xiangyu Zhang and Shaoqing Ren and Jian Sun},
|
|
||||||
year={2015},
|
|
||||||
eprint={1512.03385},
|
|
||||||
archivePrefix={arXiv},
|
|
||||||
primaryClass={cs.CV}
|
|
||||||
}
|
|
Loading…
Reference in New Issue
Block a user