add typst alt impl

2024-10-28 12:43:59 +01:00
parent de27f954c1
commit c518044f59
6 changed files with 326 additions and 0 deletions
--- a/typstalt/introduction.typ
+++ b/typstalt/introduction.typ
@@ -0,0 +1,31 @@
+= Introduction
+== Motivation
+Anomaly detection has especially in the industrial and automotive field essential importance.
+Lots of assembly lines need visual inspection to find errors often with the help of camera systems.
+Machine learning helped the field to advance a lot in the past.
+PatchCore and EfficientAD are state of the art algorithms trained only on good data and then detect anomalies within unseen (but similar) data.
+One of their problems is the need of lots of training data and time to train.
+Few-Shot learning might be a suitable alternative with essentially lowered train time.
+
+In this thesis the performance of 3 Few-Shot learning algorithms will be compared in the field of anomaly detection.
+Moreover, few-shot learning might be able not only to detect anomalies but also to detect the anomaly class.
+
+== Research Questions
+
+=== Is Few-Shot learning a suitable fit for anomaly detection?
+
+Should Few-Shot learning be used for anomaly detection tasks?
+How does it compare to well established algorithms such as Patchcore or EfficientAD?
+
+=== How does disbalancing the Shot number affect performance?
+Does giving the Few-Shot learner more good than bad samples improve the model performance?
+
+=== How does the 3 (ResNet, CAML, \pmf) methods perform in only detecting the anomaly class?
+How much does the performance improve if only detecting an anomaly or not?
+How does it compare to PatchCore and EfficientAD?
+
+=== Extra: How does Euclidean distance compare to Cosine-similarity when using ResNet as a feature-extractor?
+I've tried different distance measures $->$ but results are pretty much the same.
+
+== Outline
+todo
--- a/typstalt/main.pdf
+++ b/typstalt/main.pdf
--- a/typstalt/main.typ
+++ b/typstalt/main.typ
@@ -0,0 +1,82 @@
+#import "@preview/springer-spaniel:0.1.0"
+#import springer-spaniel.ctheorems: * // provides "proof", "theorem", "lemma"
+
+// Set citation style
+#set cite(style: "iso-690-author-date") // page info visible
+//#set cite(style: "iso-690-numeric") // page info visible
+//#set cite(style: "springer-basic")// no additional info visible (page number in square brackets)
+//#set cite(style: "alphanumeric")// page info not visible
+
+
+#show: springer-spaniel.template(
+  title: [Few shot learning for anomaly detection Bachelor Thesis for AI],
+  authors: (
+    (
+      name: "Lukas Heiligenbrunner",
+      institute: "Johannes Kepler University",
+      address: "Linz, Austria",
+      email: "lukas.heiligenbrunner@gmail.com"
+    ),
+    // ... and so on
+  ),
+  abstract: lorem(75),
+
+  // debug: true, // Highlights structural elements and links
+  // frame: 1pt, // A border around the page for white on white display
+  // printer-test: true, // Suitably placed CMYK printer tests
+)
+
+#let date =  datetime.today() // not today: datetime(year: 1969, month: 9, day: 6,)
+#let k-number = "k12345678"
+
+// set equation and heading numbering
+#set math.equation(numbering: "(1)")
+#set heading(numbering: "1.1")
+
+
+// Pagebreak after level 1 headings
+#show heading.where(level: 1): it => [
+  #pagebreak(weak: true)
+  #it
+]
+
+// show reference targets in brackets
+#show ref: it => {
+  let el = it.element
+  if el != none and el.func() == heading {
+
+    [#it (#el.body)]
+  } else [#it]
+}
+
+// style table-of-contents
+#show outline.entry.where(
+  level: 1
+): it => {
+  v(1em, weak: true)
+  strong(it)
+}
+
+// Table of contents.
+#outline(
+  title: {
+    text(1.3em, weight: 700, "Contents")
+    v(10mm)
+  },
+  indent: 2em,
+  depth: 3
+)<outline>
+#pagebreak(weak: false)
+
+#include "introduction.typ"
+#include "materialandmethods.typ"
+
+= Section Heading
+#cite(<efficientADpaper>)
+== Subsection Heading
+=== Subsubsection Heading
+==== Paragraph Heading
+===== Subparagraph Heading
+
+#set par(leading: 0.7em, first-line-indent: 0em, justify: true)
+#bibliography("sources.bib", style: "apa")
--- a/typstalt/materialandmethods.typ
+++ b/typstalt/materialandmethods.typ
@@ -0,0 +1,121 @@
+= Material and Methods
+
+== Material
+
+=== MVTec AD
+MVTec AD is a dataset for benchmarking anomaly detection methods with a focus on industrial inspection.
+It contains over 5000 high-resolution images divided into fifteen different object and texture categories.
+Each category comprises a set of defect-free training images and a test set of images with various kinds of defects as well as images without defects.
+
+// todo source for https://www.mvtec.com/company/research/datasets/mvtec-ad
+
+// todo example image
+//\begin{figure}
+//    \centering
+//    \includegraphics[width=\linewidth/2]{../rsc/muffin_chiauaua_poster}
+//    \caption{Sample images from dataset. \cite{muffinsvschiuahuakaggle_poster}}
+//    \label{fig:roc-example}
+//\end{figure}
+
+
+== Methods
+
+=== Few-Shot Learning
+Few-Shot learning is a subfield of machine-learning which aims to train a classification-model with just a few or no samples at all.
+In contrast to traditional supervised learning where a huge amount of labeled data is required is to generalize well to unseen data.
+So the model is prone to overfitting to the few training samples.
+
+Typically a few-shot leaning task consists of a support and query set.
+Where the support-set contains the training data and the query set the evaluation data for real world evaluation.
+A common way to format a few-shot leaning problem is using n-way k-shot notation.
+For Example 3 target classeas and 5 samples per class for training might be a 3-way 5-shot few-shot classification problem.
+
+A classical example of how such a model might work is a prototypical network.
+These models learn a representation of each class and classify new examples based on proximity to these representations in an embedding space.
+
+The first and easiest method of this bachelor thesis uses a simple ResNet to calucalte those embeddings and is basically a simple prototypical netowrk.
+See //%todo link to this section
+// todo proper source
+
+=== Generalisation from few samples}
+
+=== Patchcore}
+
+%todo also show values how they perform on MVTec AD
+
+=== EfficientAD
+todo stuff #cite(<patchcorepaper>)
+// https://arxiv.org/pdf/2106.08265
+todo stuff #cite(<efficientADpaper>)
+// https://arxiv.org/pdf/2303.14535
+
+=== Jupyter Notebook
+
+A Jupyter notebook is a shareable document which combines code and its output, text and visualizations.
+The notebook along with the editor provides a environment for fast prototyping and data analysis.
+It is widely used in the data science, mathematics and machine learning community.
+
+In the context of this practical work it can be used to test and evaluate the active learning loop before implementing it in a Dagster pipeline. #cite(<jupyter>)
+
+=== CNN
+Convolutional neural networks are especially good model architectures for processing images, speech and audio signals.
+A CNN typically consists of Convolutional layers, pooling layers and fully connected layers.
+Convolutional layers are a set of learnable kernels (filters).
+Each filter performs a convolution operation by sliding a window over every pixel of the image.
+On each pixel a dot product creates a feature map.
+Convolutional layers capture features like edges, textures or shapes.
+Pooling layers sample down the feature maps created by the convolutional layers.
+This helps reducing the computational complexity of the overall network and help with overfitting.
+Common pooling layers include average- and max pooling.
+Finally, after some convolution layers the feature map is flattened and passed to a network of fully connected layers to perform a classification or regression task.
+@cnnarchitecture shows a typical binary classification task.
+#cite(<cnnintro>)
+
+#figure(
+  image("rsc/cnn_architecture.png", width: 80%),
+  caption: [Architecture convolutional neural network. #cite(<cnnarchitectureimg>)],
+) <cnnarchitecture>
+
+=== RESNet
+
+Residual neural networks are a special type of neural network architecture.
+They are especially good for deep learning and have been used in many state-of-the-art computer vision tasks.
+The main idea behind ResNet is the skip connection.
+The skip connection is a direct connection from one layer to another layer which is not the next layer.
+This helps to avoid the vanishing gradient problem and helps with the training of very deep networks.
+ResNet has proven to be very successful in many computer vision tasks and is used in this practical work for the classification task.
+There are several different ResNet architectures, the most common are ResNet-18, ResNet-34, ResNet-50, ResNet-101 and ResNet-152. #cite(<resnet>)
+
+Since the dataset is relatively small and the two class classification task is relatively easy (for such a large model) the ResNet-18 architecture is used in this practical work.
+
+=== CAML
+Todo
+=== P$>$M$>$F
+Todo
+
+=== Softmax
+
+The Softmax function @softmax #cite(<liang2017soft>) converts $n$ numbers of a vector into a probability distribution.
+Its a generalization of the Sigmoid function and often used as an Activation Layer in neural networks.
+
+$
+sigma(bold(z))_j = (e^(z_j)) / (sum_(k=1)^k e^(z_k)) "for" j=(1,...,k)
+$ <softmax>
+
+The softmax function has high similarities with the Boltzmann distribution and was first introduced in the 19th century #cite(<Boltzmann>).
+
+
+=== Cross Entropy Loss
+Cross Entropy Loss is a well established loss function in machine learning.
+Equation~\eqref{eq:crelformal}\cite{crossentropy} shows the formal general definition of the Cross Entropy Loss.
+And equation~\eqref{eq:crelbinary} is the special case of the general Cross Entropy Loss for binary classification tasks.
+
+$
+H(p,q) &= -sum_(x in cal(X)) p(x) log q(x)\
+H(p,q) &= -p log(q) + (1-p) log(1-q)\
+cal(L)(p,q) &= -1/N sum_(i=1)^(cal(B)) (p_i log(q_i) + (1-p_i) log(1-q_i))
+$
+
+Equation~$cal(L)(p,q)$~\eqref{eq:crelbinarybatch}\cite{handsonaiI} is the Binary Cross Entropy Loss for a batch of size $cal(B)$ and used for model training in this Practical Work.
+
+=== Mathematical modeling of problem
--- a/typstalt/rsc/cnn_architecture.png
+++ b/typstalt/rsc/cnn_architecture.png
--- a/typstalt/sources.bib
+++ b/typstalt/sources.bib
@@ -0,0 +1,92 @@
+%! Author = lukas
+%! Date = 4/9/24
+
+@InProceedings{crossentropy,
+    ISSN = {00359246},
+    URL = {http://www.jstor.org/stable/2984087},
+    abstract = {This paper deals first with the relationship between the theory of probability and the theory of rational behaviour. A method is then suggested for encouraging people to make accurate probability estimates, a connection with the theory of information being mentioned. Finally Wald's theory of statistical decision functions is summarised and generalised and its relation to the theory of rational behaviour is discussed.},
+    author = {I. J. Good},
+    journal = {Journal of the Royal Statistical Society. Series B (Methodological)},
+    number = {1},
+    pages = {107--114},
+    publisher = {[Royal Statistical Society, Wiley]},
+    title = {Rational Decisions},
+    urldate = {2024-05-23},
+    volume = {14},
+    year = {1952}
+}
+
+@misc{efficientADpaper,
+    title={EfficientAD: Accurate Visual Anomaly Detection at Millisecond-Level Latencies},
+    author={Kilian Batzner and Lars Heckler and Rebecca König},
+    year={2024},
+    eprint={2303.14535},
+    archivePrefix={arXiv},
+    primaryClass={cs.CV},
+    url={https://arxiv.org/abs/2303.14535},
+}
+
+@misc{patchcorepaper,
+    title={Towards Total Recall in Industrial Anomaly Detection},
+    author={Karsten Roth and Latha Pemula and Joaquin Zepeda and Bernhard Schölkopf and Thomas Brox and Peter Gehler},
+    year={2022},
+    eprint={2106.08265},
+    archivePrefix={arXiv},
+    primaryClass={cs.CV},
+    url={https://arxiv.org/abs/2106.08265},
+}
+
+@misc{jupyter,
+	    author = {},
+	    title = {{Project Jupyter Documentation}},
+	    howpublished = "\url{https://docs.jupyter.org/en/latest/}",
+	    year = {2024},
+	    note = "[Online; accessed 13-May-2024]"
+	}
+
+ 	@misc{cnnintro,
+	    title={An Introduction to Convolutional Neural Networks},
+	    author={Keiron O'Shea and Ryan Nash},
+	    year={2015},
+	    eprint={1511.08458},
+	    archivePrefix={arXiv},
+	    primaryClass={cs.NE}
+	}
+
+ 	@misc{cnnarchitectureimg,
+	    author = {},
+	    title = {{What are convolutional neural networks?}},
+	    howpublished = "\url{https://cointelegraph.com/explained/what-are-convolutional-neural-networks}",
+	    year = {2024},
+	    note = "[Online; accessed 12-April-2024]"
+	}
+
+	 	@inproceedings{liang2017soft,
+	    title={Soft-margin softmax for deep classification},
+	    author={Liang, Xuezhi and Wang, Xiaobo and Lei, Zhen and Liao, Shengcai and Li, Stan Z},
+	    booktitle={International Conference on Neural Information Processing},
+	    pages={413--421},
+	    year={2017},
+	    organization={Springer}
+	}
+
+@inbook{Boltzmann,
+	    place = {Cambridge},
+    series = {Cambridge Library Collection - Physical  Sciences},
+	    title = {Studien über das Gleichgewicht der lebendigen Kraft zwischen bewegten materiellen Punkten},
+	    booktitle = {Wissenschaftliche Abhandlungen},
+	    publisher = {Cambridge University Press},
+	    author = {Boltzmann, Ludwig},
+	    editor = {Hasenöhrl, FriedrichEditor},
+	    year = {2012},
+    pages = {49–96},
+    collection = {Cambridge Library Collection - Physical  Sciences}, key = {value},}
+
+ 	@misc{resnet,
+	    title={Deep Residual Learning for Image Recognition},
+	    author={Kaiming He and Xiangyu Zhang and Shaoqing Ren and Jian Sun},
+	    year={2015},
+	    eprint={1512.03385},
+	    archivePrefix={arXiv},
+	    primaryClass={cs.CV}
+	}