From 155faa6e807ce8c1122946c3762df77be6f86fc7 Mon Sep 17 00:00:00 2001 From: lukas-heiligenbrunner Date: Mon, 30 Dec 2024 18:34:43 +0100 Subject: [PATCH] correct eq numbering, add impl of resent50 --- implementation.typ | 85 ++++++++++++++++++++++++++++++++++++++++++ main.typ | 6 ++- materialandmethods.typ | 14 +++---- sources.bib | 9 +++++ 4 files changed, 106 insertions(+), 8 deletions(-) diff --git a/implementation.typ b/implementation.typ index 0cbde7d..c3ad1d8 100644 --- a/implementation.typ +++ b/implementation.typ @@ -1,4 +1,89 @@ +#import "@preview/fletcher:0.5.3" as fletcher: diagram, node, edge +#import fletcher.shapes: rect, diamond +#import "utils.typ": todo + = Implementation +The three methods described (ResNet50, CAML, P>M>F) were implemented in a Jupyter notebook and compared to each other. + +== Experiments +For all of the three methods we test the following use-cases:#todo[maybe write more to each test] +- Detection of anomaly class (1,3,5 shots) +- Inbalanced target class prediction (5,10,15,30 good shots, 5 bad shots) +- 2 Way classification (1,3,5 shots) +- Inbalanced 2 Way classification (5,10,15,30 good shots, 5 bad shots) +- Detect only anomaly classes (1,3,5 shots) + +Those experiments were conducted on the MVTEC AD dataset on the bottle and cable classes. + +== ResNet50 +=== Approach +The simplest approach is to use a pre-trained ResNet50 model as a feature extractor. +From both the support and query set the features are extracted to get a downprojected representation of the images. +The support set embeddings are compared to the query set embeddings. +To predict the class of a query the class with the smallest distance to the support embedding is chosen. +If there are more than one support embedding within the same class the mean of those embeddings is used (class center). +This approach is similar to a prototypical network @snell2017prototypicalnetworksfewshotlearning. + +In this bachelor thesis a pre-trained ResNet50 (IMAGENET1K_V2) pytorch model was used. +It is pretrained on the imagenet dataset and has 50 residual layers. + +To get the embeddings the last layer of the model was removed and the output of the second last layer was used as embedding output. +In the following diagram the ResNet50 architecture is visualized and the cut-point is marked. + +#diagram( + spacing: (5mm, 5mm), + node-stroke: 1pt, + node-fill: eastern, + edge-stroke: 1pt, + + // Input + node((1, 1), "Input", shape: rect, width: 30mm, height: 10mm, name: ), + + // Conv1 + node((1, 0), "Conv1\n7x7, 64", shape: rect, width: 30mm, height: 15mm, name: ), + edge(, , "->"), + + // MaxPool + node((1, -1), "MaxPool\n3x3", shape: rect, width: 30mm, height: 15mm, name: ), + edge(, , "->"), + + // Residual Blocks + node((3, -1), "Residual Block 1\n3x [64, 64, 256]", shape: rect, width: 40mm, height: 15mm, name: ), + edge(, , "->"), + + node((3, 0), "Residual Block 2\n4x [128, 128, 512]", shape: rect, width: 40mm, height: 15mm, name: ), + edge(, , "->"), + + node((3, 1), "Residual Block 3\n6x [256, 256, 1024]", shape: rect, width: 40mm, height: 15mm, name: ), + edge(, , "->"), + + node((3, 2), "Residual Block 4\n3x [512, 512, 2048]", shape: rect, width: 40mm, height: 15mm, name: ), + edge(, , "->"), + + // Cutting Line + edge(, , marks: "..|..>", stroke: 1pt, label: "Cut here", label-pos: 0.5, label-side: left), + + // AvgPool + FC + node((7, 2), "AvgPool\n1x1", shape: rect, width: 30mm, height: 10mm, name: ), + //edge(, , "->"), + + node((7, 1), "Fully Connected\n1000 classes", shape: rect, width: 40mm, height: 10mm, name: ), + edge(, , "->"), + + // Output + node((7, 0), "Output", shape: rect, width: 30mm, height: 10mm, name: ), + edge(, , "->") +) + +After creating the embeddings for the support and query set the euclidean distance is calculated. +The class with the smallest distance is chosen as the predicted class. + +=== Results + + +== CAML + +== P>M>F == Experiment Setup % todo diff --git a/main.typ b/main.typ index 9d5c397..712b7d7 100644 --- a/main.typ +++ b/main.typ @@ -3,6 +3,7 @@ #import "utils.typ": inwriting, draft, todo, flex-caption, flex-caption-styles #import "glossary.typ": glossary #import "@preview/glossarium:0.2.6": make-glossary, print-glossary, gls, glspl +#import "@preview/equate:0.2.1": equate #show: make-glossary #show: flex-caption-styles @@ -72,9 +73,12 @@ To everyone who contributed to this thesis, directly or indirectly, I offer my h ) // set equation and heading numbering -#set math.equation(numbering: "(1)") +#show: equate.with(breakable: true, sub-numbering: true) +#set math.equation(numbering: "(1.1)") #set heading(numbering: "1.1") +// Allow references to a line of the equation. +//#show ref: equate // Set font size #show heading.where(level: 3): set text(size: 1.05em) diff --git a/materialandmethods.typ b/materialandmethods.typ index 755eb32..130b513 100644 --- a/materialandmethods.typ +++ b/materialandmethods.typ @@ -1,5 +1,6 @@ #import "@preview/subpar:0.1.1" #import "utils.typ": todo +#import "@preview/equate:0.2.1": equate = Material and Methods @@ -283,17 +284,16 @@ The softmax function has high similarities with the Boltzmann distribution and w === Cross Entropy Loss #todo[Maybe remove this section] Cross Entropy Loss is a well established loss function in machine learning. -@crel #cite() shows the formal general definition of the Cross Entropy Loss. -And @crel is the special case of the general Cross Entropy Loss for binary classification tasks. +@crelformal #cite() shows the formal general definition of the Cross Entropy Loss. +And @crelbinary is the special case of the general Cross Entropy Loss for binary classification tasks. $ -H(p,q) &= -sum_(x in cal(X)) p(x) log q(x)\ -H(p,q) &= -(p log(q) + (1-p) log(1-q))\ -cal(L)(p,q) &= -1/N sum_(i=1)^(cal(B)) (p_i log(q_i) + (1-p_i) log(1-q_i)) +H(p,q) &= -sum_(x in cal(X)) p(x) log q(x) #\ +H(p,q) &= -(p log(q) + (1-p) log(1-q)) #\ +cal(L)(p,q) &= -1/N sum_(i=1)^(cal(B)) (p_i log(q_i) + (1-p_i) log(1-q_i)) # $ -#todo[Check how multiline equation refs work] -Equation~$cal(L)(p,q)$ @crel #cite() is the Binary Cross Entropy Loss for a batch of size $cal(B)$ and used for model training in this Practical Work. +Equation~$cal(L)(p,q)$ @crelbatched #cite() is the Binary Cross Entropy Loss for a batch of size $cal(B)$ and used for model training in this Practical Work. === Cosine Similarity To measure the distance between two vectors some common distance measures are used. diff --git a/sources.bib b/sources.bib index 7b30e17..a14db4d 100644 --- a/sources.bib +++ b/sources.bib @@ -118,3 +118,12 @@ primaryClass={cs.LG}, url={https://arxiv.org/abs/2310.10971}, } + + +@misc{handsonaiI, + author = {Andreas Schörgenhumer, Bernhard Schäfl, Michael Widrich}, + title = {Lecture notes in Hands On AI I, Unit 4 \& 5}, + month = {October}, + year = {2021}, + publisher={Johannes Kepler Universität Linz} +}