outsource text in seperate files

2024-04-10 19:21:26 +02:00
parent 1ea1fc763f
commit 0566ff4626
6 changed files with 70 additions and 50 deletions
--- a/src/conclusionandoutlook.tex
+++ b/src/conclusionandoutlook.tex
@ -0,0 +1,5 @@
+\section{Conclusion and Outlook}\label{sec:conclusion-and-outlook}
+
+\subsection{Conclusion}\label{subsec:conclusion}
+
+\subsection{Outlook}\label{subsec:outlook}
--- a/src/experimentalresults.tex
+++ b/src/experimentalresults.tex
@ -0,0 +1 @@
+\section{Experimental Results}
--- a/src/implementation.tex
+++ b/src/implementation.tex
@ -0,0 +1,15 @@
+\section{Implementation}\label{sec:implementation}
+The model is defined as $g(\pmb{x};\pmb{w})$ where $\pmb{w}$ are the model weights and $\pmb{x}$ the input samples.
+We define two hyperparameters, the batch size $\mathcal{B}$ and the sample size $\mathcal{S}$ where $\mathcal{B} < \mathcal{S}$.
+In every active learning loop iteration we sample $\mathcal{S}$ random samples from our total unlabeled sample set $\mathcal{X}_S \subset\mathcal{X}_U \subset \mathcal{X}$
+
+\begin{equation}\label{eq:equation2}
+    z = g(\mathcal{X}_S;\pmb{w})
+\end{equation}
+
+
+\begin{align}
+    S(z) = | 0.5 - \sigma(\mathbf{z})_0|  \; \textit{or}  \; \arg\max_j \sigma(\mathbf{z})
+\end{align}
+
+\cite{activelearning}
--- a/src/introduction.tex
+++ b/src/introduction.tex
@ -0,0 +1,21 @@
+\section{Introduction}\label{sec:introduction}
+\subsection{Motivation}\label{subsec:motivation}
+For most supervised learning tasks lots of training samples are essential.
+With too less training data the model will not generalize well and not fit a real world task.
+Labeling datasets is commonly seen as an expensive task and wants to be avoided as much as possible.
+That's why there is a machine-learning field called active learning.
+The general approach is to train a model that predicts within every iteration a ranking metric or Pseudo-Labels which then can be used to rank the importance of samples to be labeled.
+
+The goal of this practical work is to test active learning within a simple classification task and evaluate its performance.
+\subsection{Research Questions}\label{subsec:research-questions}
+
+\subsubsection{Does Active-Learning benefit the learning process?}
+
+Should Active-learning be used for classification tasks to improve learning performance?
+Furthermore, how does the sample-selection process impact the learning?
+\subsubsection{Is Dagster and Label-Studio a proper tooling to build an AL Loop?}
+Is combining Dagster with Label-Studio a good match for building scalable and reliable Active-Learning loops?
+\subsubsection{Does balancing the learning samples improve performance?}
+The sample-selection metric might select samples just from one class by chance.
+Does balancing this distribution help the model performance?
+\subsection{Outline}\label{subsec:outline}
--- a/src/main.tex
+++ b/src/main.tex
@ -69,56 +69,11 @@
 %% This command processes the author and affiliation and title
 %% information and builds the first part of the formatted document.
    \maketitle
-
-    \section{Introduction}\label{sec:introduction}
-    \subsection{Motivation}
-    For most supervised learning tasks lots of training samples are essential.
-    With too less training data the model will not generalize well and not fit a real world task.
-    Labeling datasets is commonly seen as an expensive task and wants to be avoided as much as possible.
-    That's why there is a machine-learning field called active learning.
-    The general approach is to train a model that predicts within every iteration a ranking metric or Pseudo-Labels which then can be used to rank the importance of samples to be labeled.
-
-    The goal of this practical work is to test active learning within a simple classification task and evaluate its performance.
-    \subsection{Research Questions}
-    \subsubsection{Does Active-Learning benefit the learning process?}
-
-    Should Active-learning be used for classification tasks to improve learning performance?
-    Furthermore, how does the sample-selection process impact the learning?
-    \subsubsection{Is Dagster and Label-Studio a proper tooling to build an AL Loop?}
-    Is combining Dagster with Label-Studio a good match for building scalable and reliable Active-Learning loops?
-    \subsubsection{Does balancing the learning samples improve performance?}
-    The sample-selection metric might select samples just from one class by chance.
-    Does balancing this distribution help the model performance?
-    \subsection{Outline}
-    \section{Material and Methods}
-    \subsection{Material}
-    \subsubsection{Dagster}
-    \subsubsection{Label-Studio}
-    \subsubsection{Pytorch}
-    \subsection{Methods}
-    \subsubsection{Active-Learning}
-    \subsubsection{ROC}
-    \subsubsection{RESNet}
-
-    \section{Implementation}
-    Model is defined as $g(\pmb{x};\pmb{w})$ where $\pmb{w}$ are the model weights and $\pmb{x}$ the input samples.
-    We define two hyperparameters, the batch size $\mathcal{B}$ and the sample size $\mathcal{S}$ where $\mathcal{B} < \mathcal{S}$.
-    In every active learning loop iteration we sample $\mathcal{S}$ random samples from our total unlabeled sample set $\mathcal{X}_S \subset\mathcal{X}_U \subset \mathcal{X}$
-    \begin{equation}
-        z = g(\mathcal{X}_S;\pmb{w})
-    \end{equation}
-    To get a class distribution summing up to one we apply a softmax to the result values.
-    \begin{equation}
-        \sigma(\mathbf{z})_j = \frac{e^{z_j}}{\sum_{k=1}^K e^{z_k}} \; for j\coloneqq\{0,1\}\label{eq:equation}
-    \end{equation}
-
-    \begin{align}
-        S(z) = | 0.5 - \sigma(\mathbf{z})_0|  \; \textit{or}  \; \arg\max_j \sigma(\mathbf{z})
-    \end{align}
-
-
-    \cite{activelearning}
-
+    \input{introduction}
+    \input{materialandmethods}
+    \input{implementation}
+    \input{experimentalresults}
+    \input{conclusionandoutlook}

    \section{Semi-Supervised learning}\label{sec:semi-supervised-learning}
    In traditional supervised learning we have a labeled dataset.
--- a/src/materialandmethods.tex
+++ b/src/materialandmethods.tex
@ -0,0 +1,23 @@
+\section{Material and Methods}\label{sec:material-and-methods}
+
+\subsection{Material}\label{subsec:material}
+
+\subsubsection{Dagster}
+\subsubsection{Label-Studio}
+\subsubsection{Pytorch}
+\subsubsection{NVTec}
+\subsubsection{Imagenet}
+
+\subsection{Methods}\label{subsec:methods}
+
+\subsubsection{Active-Learning}
+\subsubsection{ROC and AUC}
+\subsubsection{RESNet}
+\subsubsection{CNN}
+\subsubsection{Softmax}
+
+The Softmax function converts $n$ numbers of a vector into a probability distribution.
+Its a generalization of the Sigmoid function and often used as an Activation Layer in neural networks.
+\begin{equation}\label{eq:softmax}
+    \sigma(\mathbf{z})_j = \frac{e^{z_j}}{\sum_{k=1}^K e^{z_k}} \; for j\coloneqq\{1,\dots,K\}
+\end{equation}