From c0c51f8ecffe057f3b55aa2e10f1e29fc9de1fc6 Mon Sep 17 00:00:00 2001
From: lukas-heilgenbrunner <lukas.heiligenbrunner@gmail.com>
Date: Mon, 22 May 2023 18:28:41 +0200
Subject: [PATCH] add remaining loss formulas fix some typos

---
 summary/main.tex | 78 +++++++++++++++++++++++++-----------------------
 1 file changed, 40 insertions(+), 38 deletions(-)

diff --git a/summary/main.tex b/summary/main.tex
index 6dffcd2..78d2f3a 100644
--- a/summary/main.tex
+++ b/summary/main.tex
@@ -89,14 +89,14 @@
 %% information and builds the first part of the formatted document.
 \maketitle
 
-\section{Introduction}
+\section{Introduction}\label{sec:introduction}
 For most supervised learning tasks are lots of training samples essential.
 With too less training data the model will gerneralize not well and not fit a real world task.
 Labeling datasets is commonly seen as an expensive task and wants to be avoided as much as possible.
 Thats why there is a machine-learning field called Semi-Supervised learning.
 The general approach is to train a model that predicts Pseudo-Labels which then can be used to train the main model.
 
-\section{Semi-Supervised learning}
+\section{Semi-Supervised learning}\label{sec:semi-supervised-learning}
 In traditional supervised learning we have a labeled dataset.
 Each datapoint is associated with a corresponding target label.
 The goal is to fit a model to predict the labels from datapoints.
@@ -122,7 +122,7 @@ The quantity and quality of the obtained labels is crucial and they have an sign
 This means improving the pseudo-label framework as much as possible is important.
 
 \subsection{Math of FixMatch}\label{subsec:math-of-fixmatch}
-The equation~\ref{eq:fixmatch} defines the loss-function that trains the model.
+Equation~\ref{eq:fixmatch} defines the loss-function that trains the model.
 The sum over a batch size $B_u$ takes the average loss of this batch and should be straight forward.
 The input data is augmented in two different ways.
 At first there is a weak augmentation $\mathcal{T}_{\text{weak}}(\cdot)$ which only applies basic transformation such as filtering and bluring.
@@ -136,14 +136,15 @@ Moreover, there is the strong augmentation $\mathcal{T}_{\text{strong}}(\cdot)$
 The interesting part is the indicator function $\mathbbm{1}(\cdot)$ which applies a principle called `confidence-based masking`.
 It retains a label only if its largest probability is above a threshold $\tau$.
 Where $p_i \coloneqq F(\mathcal{T}_{\text{weak}}(u_i))$ is a model evaluation with a weakly augmented input.
-The second part $\mathcal{H}(\cdot, \cdot)$ is a standard Cross-entropy loss function which takes two inputs.
+The second part $\mathcal{H}(\cdot, \cdot)$ is a standard Cross-entropy loss function which takes two inputs, the predicted and the true label.
 $\hat{y}_i$, the obtained pseudo-label and $F(\mathcal{T}_{\text{strong}}(u_i))$, a model evaluation with strong augmentation.
 The indicator function evaluates in $0$ if the pseudo prediction is not confident and the current loss evaluation will be dropped.
 Otherwise it will be kept and trains the model further.
 
-\section{Cross-Model Pseudo-Labeling}
-The newly invented approach of this paper is called Cross-Model Pseudo-Labeling (CMPL).\cite{Xu_2022_CVPR}
+\section{Cross-Model Pseudo-Labeling}\label{sec:cross-model-pseudo-labeling}
+The newly invented approach of this paper is called Cross-Model Pseudo-Labeling (CMPL)\cite{Xu_2022_CVPR}.
 In Figure~\ref{fig:cmpl-structure} one can see its structure.
+We define two different models, a smaller and a larger one.
 
 \begin{figure}[h]
   \centering
@@ -153,12 +154,41 @@ In Figure~\ref{fig:cmpl-structure} one can see its structure.
 \end{figure}
 
 \subsection{Math of CMPL}\label{subsec:math}
+The loss function of CMPL is similar to that one explaind above.
+But we have to differ from the loss generated from the supervised samples with the label known and the unsupervised loss where no labels are knonw.
+
+The two equations~\ref{eq:cmpl-losses1} and~\ref{eq:cmpl-losses2} are normal Cross-Entropy loss functions generated with the supervised labels of the two seperate models.
+
+
+\begin{align}
+  \label{eq:cmpl-losses1}
+  \mathcal{L}_s^F &= \frac{1}{B_l} \sum_{i=1}^{B_l} \mathcal{H}(y_i,F(\mathcal{T}^F_{\text{standard}}(v_i)))\\
+  \label{eq:cmpl-losses2}
+  \mathcal{L}_s^A &= \frac{1}{B_l} \sum_{i=1}^{B_l} \mathcal{H}(y_i,A(\mathcal{T}^F_{\text{standard}}(v_i)))
+\end{align}
+
+Equation~\ref{eq:cmpl-loss3} and~\ref{eq:cmpl-loss4} are the unsupervised losses.
+They are very similar to FastMatch, but
+
+\begin{align}
+  \label{eq:cmpl-loss3}
+  \mathcal{L}_u^F &= \frac{1}{B_u} \sum_{i=1}^{B_u} \mathbbm{1}(\max(p_i^A) \geq \tau) \mathcal{H}(\hat{y}_i^A,F(\mathcal{T}_{\text{strong}}(u_i)))\\
+  \label{eq:cmpl-loss4}
+  \mathcal{L}_u^A &= \frac{1}{B_u} \sum_{i=1}^{B_u} \mathbbm{1}(\max(p_i^F) \geq \tau) \mathcal{H}(\hat{y}_i^F,A(\mathcal{T}_{\text{strong}}(u_i)))
+\end{align}
+
+Finally to train the main objective an overall loss is calculated by simply summing all the losses.
+The loss is regulated by an hyperparamter $\lambda$ to enhance the importance of the supervised loss.
+
 \begin{equation}
-  \label{eq:equation}
-  \mathcal{L}_u = \frac{1}{B_u} \sum_{i=1}^{B_u} \mathbbm{1}(\max(p_i) \geq \tau) \mathcal{H}(\hat{y}_i,F(\mathcal{T}_{\text{strong}}(u_i)))
+  \label{eq:loss-main-obj}
+  \mathcal{L} = (\mathcal{L}_s^F + \mathcal{L}_s^A) + \lambda(\mathcal{L}_u^F + \mathcal{L}_u^A)
 \end{equation}
 
-\section{Performance}
+\section{Performance}\label{sec:performance}
+
+In figure~\ref{fig:results} a performance comparison is shown between just using the supervised samples for training against some different pseudo label frameworks.
+One can clearly see that the performance gain with the new CMPL framework is quite significant.
 
 \begin{figure}[h]
   \centering
@@ -178,35 +208,7 @@ In Figure~\ref{fig:cmpl-structure} one can see its structure.
 %% If your work has an appendix, this is the place to put it.
 \appendix
 
-\section{Research Methods}
-
-\subsection{Part One}
-
-Lorem ipsum dolor sit amet, consectetur adipiscing elit. Morbi
-malesuada, quam in pulvinar varius, metus nunc fermentum urna, id
-sollicitudin purus odio sit amet enim. Aliquam ullamcorper eu ipsum
-vel mollis. Curabitur quis dictum nisl. Phasellus vel semper risus, et
-lacinia dolor. Integer ultricies commodo sem nec semper.
-
-\subsection{Part Two}
-
-Etiam commodo feugiat nisl pulvinar pellentesque. Etiam auctor sodales
-ligula, non varius nibh pulvinar semper. Suspendisse nec lectus non
-ipsum convallis congue hendrerit vitae sapien. Donec at laoreet
-eros. Vivamus non purus placerat, scelerisque diam eu, cursus
-ante. Etiam aliquam tortor auctor efficitur mattis.
-
-\section{Online Resources}
-
-Nam id fermentum dui. Suspendisse sagittis tortor a nulla mollis, in
-pulvinar ex pretium. Sed interdum orci quis metus euismod, et sagittis
-enim maximus. Vestibulum gravida massa ut felis suscipit
-congue. Quisque mattis elit a risus ultrices commodo venenatis eget
-dui. Etiam sagittis eleifend elementum.
-
-Nam interdum magna at lectus dignissim, ac dignissim lorem
-rhoncus. Maecenas eu arcu ac neque placerat aliquam. Nunc pulvinar
-massa et mattis lacinia.
+% appendix
 
 \end{document}
 \endinput