diff --git a/src/implementation.tex b/src/implementation.tex
index 73f4008..a015ecd 100644
--- a/src/implementation.tex
+++ b/src/implementation.tex
@@ -27,4 +27,40 @@ That means taking the absolute value of the prediction minus the class center re
     S(z) = | 0.5 - \sigma(\mathbf{z})_0|  \; \textit{or}  \; \arg\max_j \sigma(\mathbf{z})
 \end{align}
 
-\cite{activelearning}
\ No newline at end of file
+\cite{activelearning}
+
+\begin{equation}\label{eq:minnot}
+    \text{min}_n(S) \coloneqq a \subset S \mid \text{where a are the n smallest numbers of S}
+\end{equation}
+
+\begin{equation}\label{eq:maxnot}
+    \text{max}_n(S) \coloneqq a \subset S \mid \text{where a are the n largest numbers of S}
+\end{equation}
+
+\subsection{Low certainty first}
+We take the samples with the lowest certainty score first and give it to the user for labeling.
+
+\begin{equation}
+    \text{min}_\mathcal{B}(S(z))
+\end{equation}
+
+\subsection{High certainty first}
+We take the samples with the highest certainty score first and give it to the user for labeling.
+
+\begin{equation}
+    \text{max}_\mathcal{B}(S(z))
+\end{equation}
+
+\subsection{Low and High certain first}
+
+We take half the batch-size $\mathcal{B}$ of low certainty and the other half with high certainty samples.
+
+\begin{equation}
+    \text{max}_{\mathcal{B}/2}(S(z)) \cup  \text{max}_{\mathcal{B}/2}(S(z))
+\end{equation}
+
+\subsection{Mid certain first}
+
+\begin{equation}
+    S(z) \setminus (\text{min}_{\mathcal{S}/2 - \mathcal{B}/2}(S(z)) \cup  \text{max}_{\mathcal{S}/2 - \mathcal{B}/2}(S(z)))
+\end{equation}
\ No newline at end of file
diff --git a/src/main.tex b/src/main.tex
index ef613ae..142955e 100644
--- a/src/main.tex
+++ b/src/main.tex
@@ -75,19 +75,6 @@
     \input{experimentalresults}
     \input{conclusionandoutlook}
 
-    \section{Semi-Supervised learning}\label{sec:semi-supervised-learning}
-    In traditional supervised learning we have a labeled dataset.
-    Each datapoint is associated with a corresponding target label.
-    The goal is to fit a model to predict the labels from datapoints.
-
-    In traditional unsupervised learning there are also datapoints but no labels are known.
-    The goal is to find patterns or structures in the data.
-    Moreover, it can be used for clustering or downprojection.
-
-    Those two techniques combined yield semi-supervised learning.
-    Some of the labels are known, but for most of the data we have only the raw datapoints.
-    The basic idea is that the unlabeled data can significantly improve the model performance when used in combination with the labeled data.
-
     \section{FixMatch}\label{sec:fixmatch}
     There is an already existing approach called FixMatch.
     This was introduced in a Google Research paper from 2020~\cite{fixmatch}.
diff --git a/src/materialandmethods.tex b/src/materialandmethods.tex
index d0a78b0..575e291 100644
--- a/src/materialandmethods.tex
+++ b/src/materialandmethods.tex
@@ -13,6 +13,19 @@
 \subsection{Methods}\label{subsec:methods}
 
 \subsubsection{Active-Learning}
+\subsubsection{Semi-Supervised learning}
+In traditional supervised learning we have a labeled dataset.
+Each datapoint is associated with a corresponding target label.
+The goal is to fit a model to predict the labels from datapoints.
+
+In traditional unsupervised learning there are also datapoints but no labels are known.
+The goal is to find patterns or structures in the data.
+Moreover, it can be used for clustering or downprojection.
+
+Those two techniques combined yield semi-supervised learning.
+Some of the labels are known, but for most of the data we have only the raw datapoints.
+The basic idea is that the unlabeled data can significantly improve the model performance when used in combination with the labeled data.
+
 \subsubsection{ROC and AUC}
 \subsubsection{RESNet}
 \subsubsection{CNN}
@@ -26,6 +39,7 @@ Pooling layers sample down the feature maps created by the convolutional layers.
 This helps reducing the computational complexity of the overall network and help with overfitting.
 Common pooling layers include average- and max pooling.
 Finally, after some convolution layers the feature map is flattened and passed to a network of fully connected layers to perform a classification or regression task.
+\ref{fig:cnn-architecture} shows a typical binary classification task.
 
 \begin{figure}[h]
     \centering