add lots of stuff to materialandmethods and results
This commit is contained in:
parent
74ed28a377
commit
841c8deb6d
@ -1,5 +1,6 @@
|
|||||||
\section{Experimental Results}
|
\section{Experimental Results}\label{sec:experimental-results}
|
||||||
\subsection{Does Active-Learning benefit the learning process?}
|
|
||||||
|
\subsection{Does Active-Learning benefit the learning process?}\label{subsec:does-active-learning-benefit-the-learning-process?}
|
||||||
|
|
||||||
With the test setup described in~\ref{sec:implementation} a test series was performed.
|
With the test setup described in~\ref{sec:implementation} a test series was performed.
|
||||||
Several different batch sizes $\mathcal{B} = \left\{ 2,4,6,8 \right\}$ and sample sizes $\mathcal{S} = \left\{ 2\mathcal{B}_i,4\mathcal{B}_i,5\mathcal{B}_i,10\mathcal{B}_i \right\}$
|
Several different batch sizes $\mathcal{B} = \left\{ 2,4,6,8 \right\}$ and sample sizes $\mathcal{S} = \left\{ 2\mathcal{B}_i,4\mathcal{B}_i,5\mathcal{B}_i,10\mathcal{B}_i \right\}$
|
||||||
@ -8,57 +9,57 @@ We define the baseline (passive learning) AUC curve as the supervised learning p
|
|||||||
The following graphs are only a subselection of the test series which give the most insights.
|
The following graphs are only a subselection of the test series which give the most insights.
|
||||||
|
|
||||||
\begin{figure}
|
\begin{figure}
|
||||||
\label{fig:auc_normal_lowcer_2_10}
|
|
||||||
\centering
|
\centering
|
||||||
\hspace*{-0.1\linewidth}\includegraphics[width=1.2\linewidth]{../rsc/AUC_normal_lowcer_2_10}
|
\hspace*{-0.1\linewidth}\includegraphics[width=1.2\linewidth]{../rsc/AUC_normal_lowcer_2_10}
|
||||||
\caption{AUC with $\mathcal{B} = 2$ and $\mathcal{S}=10$}
|
\caption{AUC with $\mathcal{B} = 2$ and $\mathcal{S}=10$}
|
||||||
|
\label{fig:auc_normal_lowcer_2_10}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
\begin{figure}
|
\begin{figure}
|
||||||
\label{fig:auc_normal_lowcer_2_20}
|
|
||||||
\centering
|
\centering
|
||||||
\hspace*{-0.1\linewidth}\includegraphics[width=1.2\linewidth]{../rsc/AUC_normal_lowcer_2_20}
|
\hspace*{-0.1\linewidth}\includegraphics[width=1.2\linewidth]{../rsc/AUC_normal_lowcer_2_20}
|
||||||
\caption{AUC with $\mathcal{B} = 2$ and $\mathcal{S}=20$}
|
\caption{AUC with $\mathcal{B} = 2$ and $\mathcal{S}=20$}
|
||||||
|
\label{fig:auc_normal_lowcer_2_20}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
\begin{figure}
|
\begin{figure}
|
||||||
\label{fig:auc_normal_lowcer_2_50}
|
|
||||||
\centering
|
\centering
|
||||||
\hspace*{-0.1\linewidth}\includegraphics[width=1.2\linewidth]{../rsc/AUC_normal_lowcer_2_50}
|
\hspace*{-0.1\linewidth}\includegraphics[width=1.2\linewidth]{../rsc/AUC_normal_lowcer_2_50}
|
||||||
\caption{AUC with $\mathcal{B} = 2$ and $\mathcal{S}=50$}
|
\caption{AUC with $\mathcal{B} = 2$ and $\mathcal{S}=50$}
|
||||||
|
\label{fig:auc_normal_lowcer_2_50}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
\begin{figure}
|
\begin{figure}
|
||||||
\label{fig:auc_normal_lowcer_4_16}
|
|
||||||
\centering
|
\centering
|
||||||
\hspace*{-0.1\linewidth}\includegraphics[width=1.2\linewidth]{../rsc/AUC_normal_lowcer_4_16}
|
\hspace*{-0.1\linewidth}\includegraphics[width=1.2\linewidth]{../rsc/AUC_normal_lowcer_4_16}
|
||||||
\caption{AUC with $\mathcal{B} = 4$ and $\mathcal{S}=16$}
|
\caption{AUC with $\mathcal{B} = 4$ and $\mathcal{S}=16$}
|
||||||
|
\label{fig:auc_normal_lowcer_4_16}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
\begin{figure}
|
\begin{figure}
|
||||||
\label{fig:auc_normal_lowcer_4_24}
|
|
||||||
\centering
|
\centering
|
||||||
\hspace*{-0.1\linewidth}\includegraphics[width=1.2\linewidth]{../rsc/AUC_normal_lowcer_4_24}
|
\hspace*{-0.1\linewidth}\includegraphics[width=1.2\linewidth]{../rsc/AUC_normal_lowcer_4_24}
|
||||||
\caption{AUC with $\mathcal{B} = 4$ and $\mathcal{S}=24$}
|
\caption{AUC with $\mathcal{B} = 4$ and $\mathcal{S}=24$}
|
||||||
|
\label{fig:auc_normal_lowcer_4_24}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
\begin{figure}
|
\begin{figure}
|
||||||
\label{fig:auc_normal_lowcer_8_16}
|
|
||||||
\centering
|
\centering
|
||||||
\hspace*{-0.1\linewidth}\includegraphics[width=1.2\linewidth]{../rsc/AUC_normal_lowcer_8_16}
|
\hspace*{-0.1\linewidth}\includegraphics[width=1.2\linewidth]{../rsc/AUC_normal_lowcer_8_16}
|
||||||
\caption{AUC with $\mathcal{B} = 8$ and $\mathcal{S}=16$}
|
\caption{AUC with $\mathcal{B} = 8$ and $\mathcal{S}=16$}
|
||||||
|
\label{fig:auc_normal_lowcer_8_16}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
\begin{figure}
|
\begin{figure}
|
||||||
\label{fig:auc_normal_lowcer_8_32}
|
|
||||||
\centering
|
\centering
|
||||||
\hspace*{-0.1\linewidth}\includegraphics[width=1.2\linewidth]{../rsc/AUC_normal_lowcer_8_32}
|
\hspace*{-0.1\linewidth}\includegraphics[width=1.2\linewidth]{../rsc/AUC_normal_lowcer_8_32}
|
||||||
\caption{AUC with $\mathcal{B} = 8$ and $\mathcal{S}=32$}
|
\caption{AUC with $\mathcal{B} = 8$ and $\mathcal{S}=32$}
|
||||||
|
\label{fig:auc_normal_lowcer_8_32}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
Generally a pattern can be seen: The lower the batch size the more benefits are gained by active learning.
|
Generally a pattern can be seen: The lower the batch size $\mathcal{B}$ the more benefits are gained by active learning.
|
||||||
This may be caused by the fast model convergence.
|
This may be caused by the fast model convergence.
|
||||||
The lower the batch size the more pre-prediction decision points are required.
|
The lower $\mathcal{B}$ the more pre-prediction decision points are required.
|
||||||
This helps directing the learning with better samples of the selected metric.
|
This helps directing the learning with better samples of the selected metric.
|
||||||
When the batch size is higher the model already converges to a good AUC value before the same amount of pre-predictions is reached.
|
When the batch size is higher the model already converges to a good AUC value before the same amount of pre-predictions is reached.
|
||||||
|
|
||||||
@ -66,17 +67,31 @@ Moreover, when increasing the sample-space $\mathcal{S}$ from where the pre-pred
|
|||||||
This is because the selected subset $\pmb{x} \sim \mathcal{X}_U$ has a higher chance of containing relevant elements corresponding to the selected metric.
|
This is because the selected subset $\pmb{x} \sim \mathcal{X}_U$ has a higher chance of containing relevant elements corresponding to the selected metric.
|
||||||
But keep in mind this improvement comes with a performance penalty because more model evaluations are required to predict the ranking scores.
|
But keep in mind this improvement comes with a performance penalty because more model evaluations are required to predict the ranking scores.
|
||||||
|
|
||||||
% todo
|
\ref{fig:auc_normal_lowcer_2_10};\ref{fig:auc_normal_lowcer_2_20};\ref{fig:auc_normal_lowcer_2_50} shows the AUC curve with a batch size of 2 and a sample size of 10, 20, 50 respectively.
|
||||||
\ref{fig:auc_normal_lowcer_2_10} shows the AUC curve with a batch size of 2 and a sample size of 10.
|
On all three graphs the active learning curve outperforms the passive learning curve in all four scenarios.
|
||||||
Todo add some references to the graphs.
|
Generally the higher the sample space $\mathcal{S}$ the better the performance.
|
||||||
|
|
||||||
|
\ref{fig:auc_normal_lowcer_4_16};\ref{fig:auc_normal_lowcer_4_24} shows the AUC curve with a batch size of 4 and a sample size of 16, 24 respectively.
|
||||||
|
The performance is already much worse compared to the results from above with a batch size of 2.
|
||||||
|
Only the low certainty first approach outperforms the passive learning in both cases.
|
||||||
|
The other methods are as good or worse than the passive learning curve.
|
||||||
|
|
||||||
|
\ref{fig:auc_normal_lowcer_8_16};\ref{fig:auc_normal_lowcer_8_32} shows the AUC curve with a batch size of 8 and a sample size of 16, 32 respectively.
|
||||||
|
The performance is even worse compared to the results from above with a batch size of 4.
|
||||||
|
This might be the case because the model already converges to a good AUC value before the same amount of pre-predictions is reached.
|
||||||
|
|
||||||
\subsection{Is Dagster and Label-Studio a proper tooling to build an AL
|
\subsection{Is Dagster and Label-Studio a proper tooling to build an AL
|
||||||
Loop?}\label{subsec:is-dagster-and-label-studio-a-proper-tooling-to-build-an-al
|
Loop?}\label{subsec:is-dagster-and-label-studio-a-proper-tooling-to-build-an-al
|
||||||
loop?}
|
loop?}
|
||||||
|
|
||||||
The combination of Dagster and Label-Studio is a good choice for building an active-learning loop.
|
The combination of Dagster and Label-Studio is a good choice for building an active-learning loop.
|
||||||
|
Dagster provides a clean way to build pipelines and to keep track of the data in the Web UI\@.
|
||||||
|
Label-Studio provides a great api which can be used to update the predictions of the model from the dagster pipeline.
|
||||||
|
|
||||||
|
% todo write stuff here
|
||||||
|
|
||||||
\subsection{Does balancing the learning samples improve performance?}\label{subsec:does-balancing-the-learning-samples-improve-performance?}
|
\subsection{Does balancing the learning samples improve performance?}\label{subsec:does-balancing-the-learning-samples-improve-performance?}
|
||||||
|
|
||||||
Not really.
|
Not really.
|
||||||
|
|
||||||
|
% todo add img and add stuff
|
@ -1,29 +1,5 @@
|
|||||||
\section{Implementation}\label{sec:implementation}
|
\section{Implementation}\label{sec:implementation}
|
||||||
|
|
||||||
\subsection{Dagster with Label-Studio}\label{subsec:dagster-with-label-studio}
|
|
||||||
|
|
||||||
The main goal is to implement an active learning loop with the help of Dagster and Label-Studio.
|
|
||||||
The active learning loop was split as much as possible into assets and graph assets.
|
|
||||||
This helps building reusable building blocks and to keep the code clean.
|
|
||||||
|
|
||||||
\begin{figure}
|
|
||||||
\centering
|
|
||||||
\includegraphics[width=\linewidth]{../rsc/dagster/assets}
|
|
||||||
\caption{Dagster asset graph}
|
|
||||||
\end{figure}
|
|
||||||
|
|
||||||
\begin{figure}
|
|
||||||
\centering
|
|
||||||
\subfloat[Score prediction graph asset]{
|
|
||||||
\includegraphics[width=0.45\linewidth]{../rsc/dagster/predict_scores}
|
|
||||||
}
|
|
||||||
\hfill
|
|
||||||
\subfloat[Model training graph asset]{
|
|
||||||
\includegraphics[width=0.45\linewidth]{../rsc/dagster/train_model}
|
|
||||||
}
|
|
||||||
\caption{Dagster graph assets}
|
|
||||||
\end{figure}
|
|
||||||
|
|
||||||
\subsection{Jupyter}\label{subsec:jupyter}
|
\subsection{Jupyter}\label{subsec:jupyter}
|
||||||
|
|
||||||
To get accurate performance measures the active-learning process was implemented in a Jupyter notebook first.
|
To get accurate performance measures the active-learning process was implemented in a Jupyter notebook first.
|
||||||
@ -58,3 +34,48 @@ match predict_mode:
|
|||||||
\end{lstlisting}
|
\end{lstlisting}
|
||||||
|
|
||||||
Moreover, the Dataset was manually imported and preprocessed with random augmentations.
|
Moreover, the Dataset was manually imported and preprocessed with random augmentations.
|
||||||
|
|
||||||
|
\subsection{Dagster with Label-Studio}\label{subsec:dagster-with-label-studio}
|
||||||
|
|
||||||
|
The main goal is to implement an active learning loop with the help of Dagster and Label-Studio.
|
||||||
|
The active learning loop was split as much as possible into assets and graph assets.
|
||||||
|
This helps building reusable building blocks and to keep the code clean.
|
||||||
|
|
||||||
|
Most of the python routines implemented in section~\ref{subsec:jupyter} were reused here and just slightly modified to fit the Dagster pipeline.
|
||||||
|
|
||||||
|
% todo short this figure to half!
|
||||||
|
\begin{figure}
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=\linewidth]{../rsc/dagster/assets}
|
||||||
|
\caption{Dagster asset graph}
|
||||||
|
\label{fig:dagster_assets}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
\ref{fig:dagster_assets} shows the implemented assets in which the task is split.
|
||||||
|
Whenever a asset materializes it is stored in the Dagster database.
|
||||||
|
This helps to keep track of the data and to rerun the pipeline with the same data.
|
||||||
|
\textit{train\_sup\_model} is the main asset that trains the model with the labeled samples.
|
||||||
|
\textit{inference\_unlabeled\_samples} is the asset that predicts the scores for the unlabeled samples und updates them with the Label-Studio API.
|
||||||
|
This pipeline is triggered by the machine learning backend defined within Label-Studio.
|
||||||
|
It runs every $\mathcal{B} - t$ samples, where t is a buffer to compensate the time the pipeline needs to run to avoid overlaps (e.g.\ t=10).
|
||||||
|
Such that Label-Studio has always samples with scores produced by the current model to rank the samples presented to the user.
|
||||||
|
\begin{figure}
|
||||||
|
\centering
|
||||||
|
\subfloat[Score prediction graph asset]{
|
||||||
|
\includegraphics[width=0.45\linewidth]{../rsc/dagster/predict_scores}
|
||||||
|
\label{fig:predict_scores}
|
||||||
|
}
|
||||||
|
\hfill
|
||||||
|
\subfloat[Model training graph asset]{
|
||||||
|
\includegraphics[width=0.45\linewidth]{../rsc/dagster/train_model}
|
||||||
|
\label{fig:train_model}
|
||||||
|
}
|
||||||
|
\caption{Dagster graph assets}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
\ref{fig:train_model} shows the train model asset in detail.
|
||||||
|
It loads the data for training, trains the model and saves the model automatically due to the Dagster asset system.
|
||||||
|
Moreover, testing data is loaded and the model is evaluated with the test data to get a performance measure.
|
||||||
|
\ref{fig:predict_scores} shows the predict scores asset in detail.
|
||||||
|
It samples $\mathcal{S}$ samples from the unlabeled samples $\mathcal{X}_U$ and predicts the scores.
|
||||||
|
Then it connects to the Label-Studio API with an API key and updates the scores of the samples.
|
||||||
|
@ -2,9 +2,10 @@
|
|||||||
\subsection{Motivation}\label{subsec:motivation}
|
\subsection{Motivation}\label{subsec:motivation}
|
||||||
For most supervised learning tasks lots of training samples are essential.
|
For most supervised learning tasks lots of training samples are essential.
|
||||||
With too less training data the model will not generalize well and not fit a real world task.
|
With too less training data the model will not generalize well and not fit a real world task.
|
||||||
Labeling datasets is commonly seen as an expensive task and wants to be avoided as much as possible.
|
Labeling datasets is commonly seen as an expensive task and wants to be avoided as much as possible.\cite{generalAI}
|
||||||
That's why there is a machine-learning field called active learning.
|
That's why there is a machine-learning field called active learning.
|
||||||
The general approach is to train a model that predicts within every iteration a ranking metric or Pseudo-Labels which then can be used to rank the importance of samples to be labeled.
|
The general approach is to train a model that predicts within every iteration a ranking metric or Pseudo-Labels which then can be used to rank the importance of samples to be labeled by an oracle.
|
||||||
|
These labeled are then used to train the model.\cite{activelearning}
|
||||||
|
|
||||||
The goal of this practical work is to test active learning within a simple classification task and evaluate its performance.
|
The goal of this practical work is to test active learning within a simple classification task and evaluate its performance.
|
||||||
\subsection{Research Questions}\label{subsec:research-questions}
|
\subsection{Research Questions}\label{subsec:research-questions}
|
||||||
@ -20,5 +21,11 @@ The sample-selection metric might select samples just from one class by chance.
|
|||||||
Does balancing this distribution help the model performance?
|
Does balancing this distribution help the model performance?
|
||||||
\subsection{Outline}\label{subsec:outline}
|
\subsection{Outline}\label{subsec:outline}
|
||||||
|
|
||||||
|
In section~\ref{sec:material-and-methods} we talk about general methods and materials we use.
|
||||||
|
First the problem is modeled mathematically in~\ref{subsubsec:mathematicalmodeling} and then implemented and benchmarked in a Jupyter notebook~\ref{subsubsec:jupyternb}
|
||||||
|
Section~\ref{sec:implementation} gives deeper insights to the implementation for the interested reader.
|
||||||
|
The conclusion~\ref{subsec:conclusion} provides a overview of the findings, highlighting the benefits of active learning.
|
||||||
|
Additionally the outlook section suggests avenues for future research which are not covered in this work.
|
||||||
|
The experimental results are well-presented with clear figures illustrating the performance of active learning across different sample sizes and batch sizes.
|
||||||
|
|
||||||
Todo talk about what we do in which section
|
% todo proper linking to sections
|
@ -18,9 +18,24 @@ An Op is a function that performs a task and can be used to split the code into
|
|||||||
Dagster has a well-built web interface to monitor jobs and pipelines. \cite{dagster}
|
Dagster has a well-built web interface to monitor jobs and pipelines. \cite{dagster}
|
||||||
|
|
||||||
\subsubsection{Label-Studio}
|
\subsubsection{Label-Studio}
|
||||||
\subsubsection{Pytorch}
|
|
||||||
|
|
||||||
\subsubsection{Imagenet}
|
Label-Studio is a data labeling tool that can be used to label images, text, audio and video data.
|
||||||
|
Which makes it an excellent choice incorporating human feedback into an active learning loop.
|
||||||
|
|
||||||
|
Label Studio provides a wide range of annotation interfaces and can be extended with custom ones.
|
||||||
|
Any arbitrary data can be passed to the labelling frontend using labelling tasks in the form of json files.
|
||||||
|
It is open-source and can be used for free.
|
||||||
|
Label Studio offers a seamless integration with active learning pipelines by allowing to define custom machine-learning backends.
|
||||||
|
It is designed for scalability and can be easily deployed on a cloud infrastructure using Kubernetes or Helm.\cite{labelstudio}
|
||||||
|
|
||||||
|
\subsubsection{Jupyter Notebook}\label{subsubsec:jupyternb}
|
||||||
|
|
||||||
|
A Jupyter notebook is a shareable document which combines code and its output, text and visualizations.
|
||||||
|
The notebook along with the editor provides a environment for fast prototyping and data analysis.
|
||||||
|
It is widely used in the data science, mathematics and machine learning community.
|
||||||
|
|
||||||
|
In the case of this practical work it can be used to test and evaluate the active learning loop before implementing it in a Dagster pipeline. \cite{jupyter}
|
||||||
|
|
||||||
\subsubsection{Muffin vs chihuahua}
|
\subsubsection{Muffin vs chihuahua}
|
||||||
Muffin vs chihuahua is a free dataset available on Kaggle.
|
Muffin vs chihuahua is a free dataset available on Kaggle.
|
||||||
It consists of $\sim6000$ images of muffins and chihuahuas.
|
It consists of $\sim6000$ images of muffins and chihuahuas.
|
||||||
@ -34,15 +49,13 @@ It is used in this practical work for a binary classification task to evaluate t
|
|||||||
Active learning is a subfield of supervised learning.
|
Active learning is a subfield of supervised learning.
|
||||||
The key idea is if the algorithm is allowed to choose the data it learns from, it can perform better with less data.
|
The key idea is if the algorithm is allowed to choose the data it learns from, it can perform better with less data.
|
||||||
A supervised classifier requires hundreds or even thousands of labeled samples to perform well.
|
A supervised classifier requires hundreds or even thousands of labeled samples to perform well.
|
||||||
Those labeled samples must be manually labeled by an oracle (human expert).
|
Those labeled samples must be manually labeled by an oracle (human expert).\cite{RubensRecSysHB2010}
|
||||||
|
|
||||||
Clearly this results in a huge bottleneck for the training procedure.
|
Clearly this results in a huge bottleneck for the training procedure.
|
||||||
Active learning aims to overcome this bottleneck by selecting the most informative samples to be labeled.
|
Active learning aims to overcome this bottleneck by selecting the most informative samples to be labeled.\cite{settles.tr09}
|
||||||
|
|
||||||
Todo \cite{RubensRecSysHB2010} \cite{settles.tr09}
|
|
||||||
|
|
||||||
|
The active learning process can be modeled as a loop as shown in~\ref{fig:active-learning-workflow}.
|
||||||
\begin{figure}
|
\begin{figure}
|
||||||
\label{fig:active-learning-workflow}
|
|
||||||
\centering
|
\centering
|
||||||
\begin{tikzpicture}[node distance=2cm]
|
\begin{tikzpicture}[node distance=2cm]
|
||||||
\node (start) [startstop] {Start};
|
\node (start) [startstop] {Start};
|
||||||
@ -64,8 +77,14 @@ Todo \cite{RubensRecSysHB2010} \cite{settles.tr09}
|
|||||||
\draw [arrow] (pro4) -- (pro2);
|
\draw [arrow] (pro4) -- (pro2);
|
||||||
\end{tikzpicture}
|
\end{tikzpicture}
|
||||||
\caption{Basic active-learning workflow}
|
\caption{Basic active-learning workflow}
|
||||||
|
\label{fig:active-learning-workflow}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
|
The active learning loop starts with the model inference on $\mathcal{S}$ samples.
|
||||||
|
The most uncertain samples of size $\mathcal{B}$ are selected and given to the oracle\footnote{Human annotator} for labeling.
|
||||||
|
Those labeled samples are then used to train the model.
|
||||||
|
The loop starts again with the new model and draws new samples from the unlabeled sample set $\mathcal{X}_U$.
|
||||||
|
|
||||||
\subsubsection{Semi-Supervised learning}
|
\subsubsection{Semi-Supervised learning}
|
||||||
In traditional supervised learning we have a labeled dataset.
|
In traditional supervised learning we have a labeled dataset.
|
||||||
Each datapoint is associated with a corresponding target label.
|
Each datapoint is associated with a corresponding target label.
|
||||||
@ -90,13 +109,24 @@ The more the curve ascents the upper-left or bottom-right corner the better the
|
|||||||
\begin{figure}
|
\begin{figure}
|
||||||
\centering
|
\centering
|
||||||
\includegraphics[width=\linewidth/2]{../rsc/Roc_curve.svg}
|
\includegraphics[width=\linewidth/2]{../rsc/Roc_curve.svg}
|
||||||
\caption{Architecture convolutional neural network. Image by \href{https://cointelegraph.com/explained/what-are-convolutional-neural-networks}{SKY ENGINE AI}}
|
\caption{ROC curve comparision of two classifiers. \cite{ROCWikipedia}}
|
||||||
\label{fig:roc-example}
|
\label{fig:roc-example}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
Furthermore, the area under this curve is called AUR curve and a useful metric to measure the performance of a binary classifier.
|
Furthermore, the area under this curve is called AUR curve and a useful metric to measure the performance of a binary classifier. \cite{suptechniques}
|
||||||
|
|
||||||
\subsubsection{RESNet}
|
\subsubsection{RESNet}
|
||||||
|
|
||||||
|
Residual neural networks are a special type of neural network architecture.
|
||||||
|
They are especially good for deep learning and have been used in many state-of-the-art computer vision tasks.
|
||||||
|
The main idea behind ResNet is the skip connection.
|
||||||
|
The skip connection is a direct connection from one layer to another layer which is not the next layer.
|
||||||
|
This helps to avoid the vanishing gradient problem and helps with the training of very deep networks.
|
||||||
|
ResNet has proven to be very successful in many computer vision tasks and is used in this practical work for the classification task.
|
||||||
|
There are several different ResNet architectures, the most common are ResNet-18, ResNet-34, ResNet-50, ResNet-101 and ResNet-152. \cite{resnet}
|
||||||
|
|
||||||
|
Since the dataset is relatively small and the two class classification task is relatively easy the ResNet-18 architecture is used in this practical work.
|
||||||
|
|
||||||
\subsubsection{CNN}
|
\subsubsection{CNN}
|
||||||
Convolutional neural networks are especially good model architectures for processing images, speech and audio signals.
|
Convolutional neural networks are especially good model architectures for processing images, speech and audio signals.
|
||||||
A CNN typically consists of Convolutional layers, pooling layers and fully connected layers.
|
A CNN typically consists of Convolutional layers, pooling layers and fully connected layers.
|
||||||
@ -139,9 +169,7 @@ And~\eqref{eq:crelbinary} is the special case of the general Cross Entropy Loss
|
|||||||
|
|
||||||
$\mathcal{L}(p,q)$~\eqref{eq:crelbinarybatch} is the Binary Cross Entropy Loss for a batch of size $\mathcal{B}$ and used for model training in this PW.
|
$\mathcal{L}(p,q)$~\eqref{eq:crelbinarybatch} is the Binary Cross Entropy Loss for a batch of size $\mathcal{B}$ and used for model training in this PW.
|
||||||
|
|
||||||
\subsubsection{Adam}
|
\subsubsection{Mathematical modeling of problem}\label{subsubsec:mathematicalmodeling}
|
||||||
|
|
||||||
\subsubsection{Mathematical modeling of problem}
|
|
||||||
|
|
||||||
Here the task is modeled as a mathematical problem to get a better understanding of how the problem is solved.
|
Here the task is modeled as a mathematical problem to get a better understanding of how the problem is solved.
|
||||||
|
|
||||||
@ -188,10 +216,10 @@ We define $\text{min}_n(S)$ and $\text{max}_n(S)$ respectively in~\ref{eq:minnot
|
|||||||
|
|
||||||
This notation helps to define which subsets of samples to give the user for labeling.
|
This notation helps to define which subsets of samples to give the user for labeling.
|
||||||
There are different ways how this subset can be chosen.
|
There are different ways how this subset can be chosen.
|
||||||
In this PW we do the obvious experiments with High-Certainty first~\ref{subsec:low-certainty-first}, Low-Certainty first~\ref{subsec:high-certainty-first}.
|
In this PW we do the obvious experiments with High-Certainty first~\ref{par:low-certainty-first}, Low-Certainty first~\ref{par:high-certainty-first}.
|
||||||
Furthermore, the two mixtures between them, half-high and half-low certain and only the middle section of the sorted certainty scores.
|
Furthermore, the two mixtures between them, half-high and half-low certain and only the middle section of the sorted certainty scores.
|
||||||
|
|
||||||
\paragraph{Low certainty first}
|
\paragraph{Low certainty first}\label{par:low-certainty-first}
|
||||||
We take the samples with the lowest certainty score first and give it to the user for labeling.
|
We take the samples with the lowest certainty score first and give it to the user for labeling.
|
||||||
This is the most intuitive way to do active learning and might also be the most beneficial.
|
This is the most intuitive way to do active learning and might also be the most beneficial.
|
||||||
|
|
||||||
@ -199,7 +227,7 @@ This is the most intuitive way to do active learning and might also be the most
|
|||||||
\mathcal{X}_t = \text{min}_\mathcal{B}(S(z))
|
\mathcal{X}_t = \text{min}_\mathcal{B}(S(z))
|
||||||
\end{equation}
|
\end{equation}
|
||||||
|
|
||||||
\paragraph{High certainty first}
|
\paragraph{High certainty first}\label{par:high-certainty-first}
|
||||||
We take the samples with the highest certainty score first and give it to the user for labeling.
|
We take the samples with the highest certainty score first and give it to the user for labeling.
|
||||||
The idea behind this is that the model is already very certain about the prediction and the user can confirm this.
|
The idea behind this is that the model is already very certain about the prediction and the user can confirm this.
|
||||||
This might help ignoring labels which are irrelevant for the model.
|
This might help ignoring labels which are irrelevant for the model.
|
||||||
|
@ -58,6 +58,22 @@ and Sardinha, Alberto",
|
|||||||
note = "[Online; accessed 12-April-2024]"
|
note = "[Online; accessed 12-April-2024]"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@misc{labelstudio,
|
||||||
|
author = {},
|
||||||
|
title = {{Label Studio Documentation}},
|
||||||
|
howpublished = "\url{https://labelstud.io/guide/}",
|
||||||
|
year = {2024},
|
||||||
|
note = "[Online; accessed 13-May-2024]"
|
||||||
|
}
|
||||||
|
|
||||||
|
@misc{jupyter,
|
||||||
|
author = {},
|
||||||
|
title = {{Project Jupyter Documentation}},
|
||||||
|
howpublished = "\url{https://docs.jupyter.org/en/latest/}",
|
||||||
|
year = {2024},
|
||||||
|
note = "[Online; accessed 13-May-2024]"
|
||||||
|
}
|
||||||
|
|
||||||
@misc{muffinsvschiuahuakaggle,
|
@misc{muffinsvschiuahuakaggle,
|
||||||
author = {},
|
author = {},
|
||||||
title = {{Muffin vs Chihuahua Kaggle Dataset}},
|
title = {{Muffin vs Chihuahua Kaggle Dataset}},
|
||||||
@ -85,3 +101,37 @@ doi = {10.1007/978-0-387-85820-3_23}
|
|||||||
Type = {Computer Sciences Technical Report},
|
Type = {Computer Sciences Technical Report},
|
||||||
Year = {2009},
|
Year = {2009},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@misc{generalAI,
|
||||||
|
author = {Johannes Brandstetter},
|
||||||
|
title = {Lecture notes in Theoretical Concepts of Machine Learning},
|
||||||
|
month = {May},
|
||||||
|
year = {2024},
|
||||||
|
publisher={Johannes Kepler Universität Linz}
|
||||||
|
}
|
||||||
|
|
||||||
|
@misc{suptechniques,
|
||||||
|
author = {Andreas Radler, Markus Holzleitner},
|
||||||
|
title = {Lecture notes in Machine Learning: Supervised Techniques},
|
||||||
|
month = {September},
|
||||||
|
year = {2022},
|
||||||
|
publisher={Johannes Kepler Universität Linz}
|
||||||
|
}
|
||||||
|
|
||||||
|
@online{ROCWikipedia,
|
||||||
|
author = "Wikimedia Commons",
|
||||||
|
title = "Receiver operating characteristic",
|
||||||
|
year = "2024",
|
||||||
|
urlseen = "13-05-24",
|
||||||
|
url = "https://commons.wikimedia.org/wiki/File:Roc_curve.svg",
|
||||||
|
note = "File: \ttfamily{Roc curve.svg}",
|
||||||
|
}
|
||||||
|
|
||||||
|
@misc{resnet,
|
||||||
|
title={Deep Residual Learning for Image Recognition},
|
||||||
|
author={Kaiming He and Xiangyu Zhang and Shaoqing Ren and Jian Sun},
|
||||||
|
year={2015},
|
||||||
|
eprint={1512.03385},
|
||||||
|
archivePrefix={arXiv},
|
||||||
|
primaryClass={cs.CV}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user