finish first round of presentation
This commit is contained in:
parent
88a55247d0
commit
25b84b5ed4
@ -5,14 +5,19 @@
|
|||||||
%----------------------------------------------------------------------------------------
|
%----------------------------------------------------------------------------------------
|
||||||
\usetheme{focus}
|
\usetheme{focus}
|
||||||
|
|
||||||
|
|
||||||
\usepackage[utf8]{inputenc}
|
\usepackage[utf8]{inputenc}
|
||||||
|
|
||||||
\usepackage{booktabs}
|
\usepackage{booktabs}
|
||||||
\usepackage{amsmath}
|
\usepackage{amsmath}
|
||||||
|
\usepackage{amssymb}
|
||||||
|
\usepackage{amsfonts}
|
||||||
|
\usepackage{bbm}
|
||||||
\usepackage{hyperref}
|
\usepackage{hyperref}
|
||||||
\usepackage{graphicx}
|
\usepackage{graphicx}
|
||||||
\usepackage{listings}
|
\usepackage{listings}
|
||||||
\usepackage{xcolor}
|
\usepackage{xcolor}
|
||||||
|
\usepackage{mathtools}
|
||||||
|
|
||||||
% Farbdefinitionen
|
% Farbdefinitionen
|
||||||
\definecolor{backgroundcoloreq}{RGB}{180,140,0}
|
\definecolor{backgroundcoloreq}{RGB}{180,140,0}
|
||||||
@ -21,6 +26,9 @@
|
|||||||
\definecolor{codepurple}{rgb}{0.58,0,0.82}
|
\definecolor{codepurple}{rgb}{0.58,0,0.82}
|
||||||
\definecolor{codeorange}{RGB}{190,100,0}
|
\definecolor{codeorange}{RGB}{190,100,0}
|
||||||
|
|
||||||
|
% we wanna use default caleographic alphabet
|
||||||
|
\DeclareMathAlphabet{\mathcal}{OMS}{cmbrs}{m}{n}
|
||||||
|
|
||||||
|
|
||||||
\lstset{
|
\lstset{
|
||||||
language=C,
|
language=C,
|
||||||
@ -75,20 +83,20 @@
|
|||||||
%----------------------------------------------------------------------------------------
|
%----------------------------------------------------------------------------------------
|
||||||
% todo pic of action
|
% todo pic of action
|
||||||
|
|
||||||
\section{The goal}
|
\section{The Goal}
|
||||||
\begin{frame}{The goal}
|
\begin{frame}{The goal}
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item train model
|
\item Train model
|
||||||
\item recognize action of person
|
\item Recognize action of person
|
||||||
\item from video [$\approx$10sec]
|
\item From video [$\approx$10sec]
|
||||||
\item eg.:
|
\item E.g.:
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item brushing hair
|
\item brushing hair
|
||||||
\item riding bike
|
\item riding bike
|
||||||
\item dancing
|
\item dancing
|
||||||
\item playing violin
|
\item playing violin
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
\item as generic as possible
|
\item As generic as possible
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
\end{frame}
|
\end{frame}
|
||||||
|
|
||||||
@ -173,6 +181,7 @@
|
|||||||
% SECTION 2
|
% SECTION 2
|
||||||
%----------------------------------------------------------------------------------------
|
%----------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
\section{Cross-Model Pseudo-Labeling}
|
\section{Cross-Model Pseudo-Labeling}
|
||||||
|
|
||||||
\begin{frame}[allowframebreaks]{Papers approach}
|
\begin{frame}[allowframebreaks]{Papers approach}
|
||||||
@ -196,7 +205,7 @@
|
|||||||
|
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item Cross-Model Pseudo-Labeling
|
\item Cross-Model Pseudo-Labeling
|
||||||
\item Primary backbone
|
\item Primary backbone (large model)
|
||||||
\item Supplemented by lightweight auxiliary network
|
\item Supplemented by lightweight auxiliary network
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item Different structure
|
\item Different structure
|
||||||
@ -206,11 +215,128 @@
|
|||||||
\end{itemize}
|
\end{itemize}
|
||||||
\end{frame}
|
\end{frame}
|
||||||
|
|
||||||
|
\begin{frame}{Structure Visualization}
|
||||||
\begin{frame}{Performance glance}
|
\includegraphics[scale=.17]{rsc/structure}
|
||||||
todo the pic of the performance graph
|
|
||||||
\end{frame}
|
\end{frame}
|
||||||
|
|
||||||
|
\begin{frame}{Performance Perspectives}
|
||||||
|
\includegraphics[scale=.205]{rsc/performance_comparison}
|
||||||
|
\end{frame}
|
||||||
|
|
||||||
|
|
||||||
|
\section{Give me the math!}
|
||||||
|
|
||||||
|
\begin{frame}{Definitions}
|
||||||
|
\begin{itemize}
|
||||||
|
\item Labeled data set of size $N_l$\\
|
||||||
|
$\mathcal{V} = \{(v_1,y_1), \dots, (v_{N_l}, y_{N_l})\}$
|
||||||
|
\item Unlabeled data set of size $N_u$\\
|
||||||
|
$\mathcal{U} = \{u_1, \dots, u_{N_u}\}$
|
||||||
|
\item in general $\lvert\mathcal{U}\rvert \gg \lvert\mathcal{V}\rvert$\\
|
||||||
|
\end{itemize}
|
||||||
|
\end{frame}
|
||||||
|
|
||||||
|
\begin{frame}[allowframebreaks]{How existing method \textit{FixMatch} works}
|
||||||
|
\begin{itemize}
|
||||||
|
\item $\mathbbm{1} \coloneqq \text{Indicator Function}$
|
||||||
|
\item $B_u \coloneqq \text{Batchsize}$
|
||||||
|
\item $\mathcal{T} \coloneqq \text{Confidence Threshold}$
|
||||||
|
\item $F(\mathcal{T}_{\text{strong}}(u_i)) \coloneqq \text{Class distribution}$
|
||||||
|
\item $p_i \coloneqq F(\mathcal{T}_{\text{weak}}(u_i))$
|
||||||
|
\item $\hat{y}_i \coloneqq \arg \max(p_i) \coloneqq \text{Pseudo Label}$
|
||||||
|
\item $\mathcal{H} \coloneqq \text{Cross-entropy loss}$
|
||||||
|
\item $\mathcal{L}_u \coloneqq \text{Loss on the unlabeled data}$
|
||||||
|
\item $\mathcal{F} \coloneqq \text{Model}$
|
||||||
|
\end{itemize}
|
||||||
|
\begin{align*}
|
||||||
|
\mathcal{L}_u = \frac{1}{B_u} \sum_{i=1}^{B_u} \mathbbm{1}(\max(p_i) \geq \mathcal{T}) \mathcal{H}(\hat{y}_i,F(\mathcal{T}_{\text{strong}}(u_i)))
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
\framebreak
|
||||||
|
|
||||||
|
\begin{itemize}
|
||||||
|
\item $\mathbbm{1}(\max(p_i) \geq \mathcal{T})$
|
||||||
|
\begin{itemize}
|
||||||
|
\item 'confidence-based masking'
|
||||||
|
\item retain label only if largest probability is above threshold
|
||||||
|
\item keep only 'high confidence' labels
|
||||||
|
\end{itemize}
|
||||||
|
\item $\mathcal{H}(\hat{y}_i,F(\mathcal{T}_{\text{strong}}(u_i)))$
|
||||||
|
\begin{itemize}
|
||||||
|
\item 'consistency regularization'
|
||||||
|
\item cross-entropy loss of strong augmented and weak augmented data
|
||||||
|
\end{itemize}
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
\end{frame}
|
||||||
|
|
||||||
|
\begin{frame}[allowframebreaks]{CMPL (Cross-Model Pseudo-Labeling)}
|
||||||
|
\begin{itemize}
|
||||||
|
\item $F(\cdot) \coloneqq \text{Primary backbone}$
|
||||||
|
\item $A(\cdot) \coloneqq \text{Auxiliary network}$
|
||||||
|
\item Learning on labeled data
|
||||||
|
\begin{align*}
|
||||||
|
\mathcal{L}_s^F &= \frac{1}{B_l} \sum_{i=1}^{B_l} \mathcal{H}(y_i,F(\mathcal{T}^F_{\text{standard}}(v_i)))\\
|
||||||
|
\mathcal{L}_s^A &= \frac{1}{B_l} \sum_{i=1}^{B_l} \mathcal{H}(y_i,A(\mathcal{T}^F_{\text{standard}}(v_i)))
|
||||||
|
\end{align*}
|
||||||
|
\item $\mathcal{T}^F_{\text{standard}}(v_i) \coloneqq \text{standard augmentations for action recognition}$
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
\framebreak
|
||||||
|
|
||||||
|
\begin{itemize}
|
||||||
|
\item Learning on unlabeled data
|
||||||
|
\begin{align*}
|
||||||
|
\mathcal{L}_u^F &= \frac{1}{B_u} \sum_{i=1}^{B_u} \mathbbm{1}(\max(p_i^A) \geq \mathcal{T}) \mathcal{H}(\hat{y}_i^A,F(\mathcal{T}_{\text{strong}}(u_i)))\\
|
||||||
|
\mathcal{L}_u^A &= \frac{1}{B_u} \sum_{i=1}^{B_u} \mathbbm{1}(\max(p_i^F) \geq \mathcal{T}) \mathcal{H}(\hat{y}_i^F,A(\mathcal{T}_{\text{strong}}(u_i)))\\
|
||||||
|
\end{align*}
|
||||||
|
\item Complete training objective
|
||||||
|
\begin{align*}
|
||||||
|
\mathcal{L} = (\mathcal{L}_s^F + \mathcal{L}_s^A) + \lambda(\mathcal{L}_u^F + \mathcal{L}_u^A)
|
||||||
|
\end{align*}
|
||||||
|
\item $\lambda \coloneqq \text{Balancing coefficient for unsupervised loss}$
|
||||||
|
\end{itemize}
|
||||||
|
\end{frame}
|
||||||
|
|
||||||
|
\section{Implementation}
|
||||||
|
|
||||||
|
\begin{frame}{Networks}
|
||||||
|
\begin{itemize}
|
||||||
|
\item Auxiliary Network
|
||||||
|
\begin{itemize}
|
||||||
|
\item sub-network of primary model
|
||||||
|
\item 3D-ResNet18
|
||||||
|
\item \textbf{3D-ResNet50x1/4}
|
||||||
|
\end{itemize}
|
||||||
|
\item Backbone network
|
||||||
|
\begin{itemize}
|
||||||
|
\item larger version of aux-net
|
||||||
|
\item \textbf{3D-ResNet50}
|
||||||
|
\end{itemize}
|
||||||
|
\end{itemize}
|
||||||
|
\end{frame}
|
||||||
|
|
||||||
|
\begin{frame}{Dataset}
|
||||||
|
\begin{itemize}
|
||||||
|
\item Kinetics-400
|
||||||
|
\begin{itemize}
|
||||||
|
\item 400 categories
|
||||||
|
\item 240k/20k training/validation samples
|
||||||
|
\end{itemize}
|
||||||
|
\item UCF-101
|
||||||
|
\begin{itemize}
|
||||||
|
\item 101 classes
|
||||||
|
\item 9.5k/4k training/validation samples
|
||||||
|
\end{itemize}
|
||||||
|
\item $\approx$10sec every video
|
||||||
|
\item 1\% or 10\% labeled subsets balanced sampled from distribution
|
||||||
|
\end{itemize}
|
||||||
|
\end{frame}
|
||||||
|
|
||||||
|
|
||||||
|
\begin{frame}{Performance Results}
|
||||||
|
\includegraphics[scale=.65]{rsc/results}
|
||||||
|
\end{frame}
|
||||||
|
|
||||||
|
|
||||||
% --- THE END
|
% --- THE END
|
||||||
|
BIN
presentation/rsc/performance_comparison.jpg
Normal file
BIN
presentation/rsc/performance_comparison.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 226 KiB |
BIN
presentation/rsc/results.jpg
Normal file
BIN
presentation/rsc/results.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 438 KiB |
BIN
presentation/rsc/structure.jpg
Normal file
BIN
presentation/rsc/structure.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 470 KiB |
@ -1,18 +1,16 @@
|
|||||||
@misc{structtutorialspoint,
|
@InProceedings{Xu_2022_CVPR,
|
||||||
Title = {struct basics},
|
author = {Xu, Yinghao and Wei, Fangyun and Sun, Xiao and Yang, Ceyuan and Shen, Yujun and Dai, Bo and Zhou, Bolei and Lin, Stephen},
|
||||||
howpublished = {\url{https://www.tutorialspoint.com/cprogramming/c_structures.htm}},
|
title = {Cross-Model Pseudo-Labeling for Semi-Supervised Action Recognition},
|
||||||
note = {Aufgerufen: 2020-04}
|
booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
|
||||||
|
month = {June},
|
||||||
|
year = {2022},
|
||||||
|
pages = {2959-2968}
|
||||||
}
|
}
|
||||||
|
|
||||||
@misc{structchowto,
|
@online{knuthwebsite,
|
||||||
Title = {struct basics},
|
author = "Kihyuk Sohn, David Berthelot, Chun-Liang Li",
|
||||||
howpublished = {\url{http://www.c-howto.de/tutorial/strukturierte-datentypen/strukturen/}},
|
title = "FixMatch: Simplifying Semi-Supervised Learning with Consistency and Confidence",
|
||||||
note = {Aufgerufen: 2020-04}
|
url = "https://arxiv.org/abs/2001.07685",
|
||||||
|
addendum = "(accessed: 20.03.2023)",
|
||||||
|
keywords = "FixMatch, semi-supervised"
|
||||||
}
|
}
|
||||||
|
|
||||||
@misc{pointertutorialspoint,
|
|
||||||
Title = {Pointer basics},
|
|
||||||
howpublished = {\url{https://www.tutorialspoint.com/cprogramming/c_pointers.htm}},
|
|
||||||
note = {Aufgerufen: 2020-04}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user