finish first round of presentation
This commit is contained in:
		@@ -5,14 +5,19 @@
 | 
				
			|||||||
%----------------------------------------------------------------------------------------
 | 
					%----------------------------------------------------------------------------------------
 | 
				
			||||||
\usetheme{focus}
 | 
					\usetheme{focus}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
\usepackage[utf8]{inputenc}
 | 
					\usepackage[utf8]{inputenc}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
\usepackage{booktabs}
 | 
					\usepackage{booktabs}
 | 
				
			||||||
\usepackage{amsmath}
 | 
					\usepackage{amsmath}
 | 
				
			||||||
 | 
					\usepackage{amssymb}
 | 
				
			||||||
 | 
					\usepackage{amsfonts}
 | 
				
			||||||
 | 
					\usepackage{bbm}
 | 
				
			||||||
\usepackage{hyperref}
 | 
					\usepackage{hyperref}
 | 
				
			||||||
\usepackage{graphicx}
 | 
					\usepackage{graphicx}
 | 
				
			||||||
\usepackage{listings}
 | 
					\usepackage{listings}
 | 
				
			||||||
\usepackage{xcolor}
 | 
					\usepackage{xcolor}
 | 
				
			||||||
 | 
					\usepackage{mathtools}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
% Farbdefinitionen
 | 
					% Farbdefinitionen
 | 
				
			||||||
\definecolor{backgroundcoloreq}{RGB}{180,140,0}
 | 
					\definecolor{backgroundcoloreq}{RGB}{180,140,0}
 | 
				
			||||||
@@ -21,6 +26,9 @@
 | 
				
			|||||||
\definecolor{codepurple}{rgb}{0.58,0,0.82}
 | 
					\definecolor{codepurple}{rgb}{0.58,0,0.82}
 | 
				
			||||||
\definecolor{codeorange}{RGB}{190,100,0}
 | 
					\definecolor{codeorange}{RGB}{190,100,0}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					% we wanna use default caleographic alphabet
 | 
				
			||||||
 | 
					\DeclareMathAlphabet{\mathcal}{OMS}{cmbrs}{m}{n}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
\lstset{
 | 
					\lstset{
 | 
				
			||||||
    language=C,
 | 
					    language=C,
 | 
				
			||||||
@@ -75,20 +83,20 @@
 | 
				
			|||||||
%----------------------------------------------------------------------------------------
 | 
					%----------------------------------------------------------------------------------------
 | 
				
			||||||
% todo pic of action
 | 
					% todo pic of action
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    \section{The goal}
 | 
					    \section{The Goal}
 | 
				
			||||||
    \begin{frame}{The goal}
 | 
					    \begin{frame}{The goal}
 | 
				
			||||||
        \begin{itemize}
 | 
					        \begin{itemize}
 | 
				
			||||||
            \item train model
 | 
					            \item Train model
 | 
				
			||||||
            \item recognize action of person
 | 
					            \item Recognize action of person
 | 
				
			||||||
            \item from video [$\approx$10sec]
 | 
					            \item From video [$\approx$10sec]
 | 
				
			||||||
            \item eg.:
 | 
					            \item E.g.:
 | 
				
			||||||
            \begin{itemize}
 | 
					            \begin{itemize}
 | 
				
			||||||
                \item brushing hair
 | 
					                \item brushing hair
 | 
				
			||||||
                \item riding bike
 | 
					                \item riding bike
 | 
				
			||||||
                \item dancing
 | 
					                \item dancing
 | 
				
			||||||
                \item playing violin
 | 
					                \item playing violin
 | 
				
			||||||
            \end{itemize}
 | 
					            \end{itemize}
 | 
				
			||||||
            \item as generic as possible
 | 
					            \item As generic as possible
 | 
				
			||||||
        \end{itemize}
 | 
					        \end{itemize}
 | 
				
			||||||
    \end{frame}
 | 
					    \end{frame}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -173,6 +181,7 @@
 | 
				
			|||||||
%	 SECTION 2
 | 
					%	 SECTION 2
 | 
				
			||||||
%----------------------------------------------------------------------------------------
 | 
					%----------------------------------------------------------------------------------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    \section{Cross-Model Pseudo-Labeling}
 | 
					    \section{Cross-Model Pseudo-Labeling}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    \begin{frame}[allowframebreaks]{Papers approach}
 | 
					    \begin{frame}[allowframebreaks]{Papers approach}
 | 
				
			||||||
@@ -196,7 +205,7 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        \begin{itemize}
 | 
					        \begin{itemize}
 | 
				
			||||||
            \item Cross-Model Pseudo-Labeling
 | 
					            \item Cross-Model Pseudo-Labeling
 | 
				
			||||||
            \item Primary backbone
 | 
					            \item Primary backbone (large model)
 | 
				
			||||||
            \item Supplemented by lightweight auxiliary network
 | 
					            \item Supplemented by lightweight auxiliary network
 | 
				
			||||||
            \begin{itemize}
 | 
					            \begin{itemize}
 | 
				
			||||||
                \item Different structure
 | 
					                \item Different structure
 | 
				
			||||||
@@ -206,11 +215,128 @@
 | 
				
			|||||||
        \end{itemize}
 | 
					        \end{itemize}
 | 
				
			||||||
    \end{frame}
 | 
					    \end{frame}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    \begin{frame}{Structure Visualization}
 | 
				
			||||||
    \begin{frame}{Performance glance}
 | 
					        \includegraphics[scale=.17]{rsc/structure}
 | 
				
			||||||
        todo the pic of the performance graph
 | 
					 | 
				
			||||||
    \end{frame}
 | 
					    \end{frame}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    \begin{frame}{Performance Perspectives}
 | 
				
			||||||
 | 
					        \includegraphics[scale=.205]{rsc/performance_comparison}
 | 
				
			||||||
 | 
					    \end{frame}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    \section{Give me the math!}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    \begin{frame}{Definitions}
 | 
				
			||||||
 | 
					        \begin{itemize}
 | 
				
			||||||
 | 
					            \item Labeled data set of size $N_l$\\
 | 
				
			||||||
 | 
					            $\mathcal{V} = \{(v_1,y_1), \dots, (v_{N_l}, y_{N_l})\}$
 | 
				
			||||||
 | 
					            \item Unlabeled data set of size $N_u$\\
 | 
				
			||||||
 | 
					            $\mathcal{U} = \{u_1, \dots, u_{N_u}\}$
 | 
				
			||||||
 | 
					            \item in general $\lvert\mathcal{U}\rvert \gg \lvert\mathcal{V}\rvert$\\
 | 
				
			||||||
 | 
					        \end{itemize}
 | 
				
			||||||
 | 
					    \end{frame}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    \begin{frame}[allowframebreaks]{How existing method \textit{FixMatch} works}
 | 
				
			||||||
 | 
					        \begin{itemize}
 | 
				
			||||||
 | 
					            \item $\mathbbm{1} \coloneqq \text{Indicator Function}$
 | 
				
			||||||
 | 
					            \item $B_u \coloneqq \text{Batchsize}$
 | 
				
			||||||
 | 
					            \item $\mathcal{T} \coloneqq \text{Confidence Threshold}$
 | 
				
			||||||
 | 
					            \item $F(\mathcal{T}_{\text{strong}}(u_i)) \coloneqq \text{Class distribution}$
 | 
				
			||||||
 | 
					            \item $p_i \coloneqq F(\mathcal{T}_{\text{weak}}(u_i))$
 | 
				
			||||||
 | 
					            \item $\hat{y}_i \coloneqq \arg \max(p_i) \coloneqq \text{Pseudo Label}$
 | 
				
			||||||
 | 
					            \item $\mathcal{H} \coloneqq \text{Cross-entropy loss}$
 | 
				
			||||||
 | 
					            \item $\mathcal{L}_u \coloneqq \text{Loss on the unlabeled data}$
 | 
				
			||||||
 | 
					            \item $\mathcal{F} \coloneqq \text{Model}$
 | 
				
			||||||
 | 
					        \end{itemize}
 | 
				
			||||||
 | 
					        \begin{align*}
 | 
				
			||||||
 | 
					            \mathcal{L}_u = \frac{1}{B_u} \sum_{i=1}^{B_u} \mathbbm{1}(\max(p_i) \geq \mathcal{T}) \mathcal{H}(\hat{y}_i,F(\mathcal{T}_{\text{strong}}(u_i)))
 | 
				
			||||||
 | 
					        \end{align*}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        \framebreak
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        \begin{itemize}
 | 
				
			||||||
 | 
					            \item $\mathbbm{1}(\max(p_i) \geq \mathcal{T})$
 | 
				
			||||||
 | 
					            \begin{itemize}
 | 
				
			||||||
 | 
					                \item 'confidence-based masking'
 | 
				
			||||||
 | 
					                \item retain label only if largest probability is above threshold
 | 
				
			||||||
 | 
					                \item keep only 'high confidence' labels
 | 
				
			||||||
 | 
					            \end{itemize}
 | 
				
			||||||
 | 
					            \item $\mathcal{H}(\hat{y}_i,F(\mathcal{T}_{\text{strong}}(u_i)))$
 | 
				
			||||||
 | 
					            \begin{itemize}
 | 
				
			||||||
 | 
					                \item 'consistency regularization'
 | 
				
			||||||
 | 
					                \item cross-entropy loss of strong augmented and weak augmented data
 | 
				
			||||||
 | 
					            \end{itemize}
 | 
				
			||||||
 | 
					        \end{itemize}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    \end{frame}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    \begin{frame}[allowframebreaks]{CMPL (Cross-Model Pseudo-Labeling)}
 | 
				
			||||||
 | 
					        \begin{itemize}
 | 
				
			||||||
 | 
					            \item $F(\cdot) \coloneqq \text{Primary backbone}$
 | 
				
			||||||
 | 
					            \item $A(\cdot) \coloneqq \text{Auxiliary network}$
 | 
				
			||||||
 | 
					            \item Learning on labeled data
 | 
				
			||||||
 | 
					            \begin{align*}
 | 
				
			||||||
 | 
					                \mathcal{L}_s^F &= \frac{1}{B_l} \sum_{i=1}^{B_l} \mathcal{H}(y_i,F(\mathcal{T}^F_{\text{standard}}(v_i)))\\
 | 
				
			||||||
 | 
					                \mathcal{L}_s^A &= \frac{1}{B_l} \sum_{i=1}^{B_l} \mathcal{H}(y_i,A(\mathcal{T}^F_{\text{standard}}(v_i)))
 | 
				
			||||||
 | 
					            \end{align*}
 | 
				
			||||||
 | 
					            \item $\mathcal{T}^F_{\text{standard}}(v_i) \coloneqq \text{standard augmentations for action recognition}$
 | 
				
			||||||
 | 
					        \end{itemize}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        \framebreak
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        \begin{itemize}
 | 
				
			||||||
 | 
					            \item Learning on unlabeled data
 | 
				
			||||||
 | 
					            \begin{align*}
 | 
				
			||||||
 | 
					                \mathcal{L}_u^F &= \frac{1}{B_u} \sum_{i=1}^{B_u} \mathbbm{1}(\max(p_i^A) \geq \mathcal{T}) \mathcal{H}(\hat{y}_i^A,F(\mathcal{T}_{\text{strong}}(u_i)))\\
 | 
				
			||||||
 | 
					                \mathcal{L}_u^A &= \frac{1}{B_u} \sum_{i=1}^{B_u} \mathbbm{1}(\max(p_i^F) \geq \mathcal{T}) \mathcal{H}(\hat{y}_i^F,A(\mathcal{T}_{\text{strong}}(u_i)))\\
 | 
				
			||||||
 | 
					            \end{align*}
 | 
				
			||||||
 | 
					            \item Complete training objective
 | 
				
			||||||
 | 
					            \begin{align*}
 | 
				
			||||||
 | 
					                \mathcal{L} = (\mathcal{L}_s^F + \mathcal{L}_s^A) + \lambda(\mathcal{L}_u^F + \mathcal{L}_u^A)
 | 
				
			||||||
 | 
					            \end{align*}
 | 
				
			||||||
 | 
					            \item $\lambda \coloneqq \text{Balancing coefficient for unsupervised loss}$
 | 
				
			||||||
 | 
					        \end{itemize}
 | 
				
			||||||
 | 
					    \end{frame}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    \section{Implementation}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    \begin{frame}{Networks}
 | 
				
			||||||
 | 
					        \begin{itemize}
 | 
				
			||||||
 | 
					            \item Auxiliary Network
 | 
				
			||||||
 | 
					            \begin{itemize}
 | 
				
			||||||
 | 
					                \item sub-network of primary model
 | 
				
			||||||
 | 
					                \item 3D-ResNet18
 | 
				
			||||||
 | 
					                \item \textbf{3D-ResNet50x1/4}
 | 
				
			||||||
 | 
					            \end{itemize}
 | 
				
			||||||
 | 
					            \item Backbone network
 | 
				
			||||||
 | 
					            \begin{itemize}
 | 
				
			||||||
 | 
					                \item larger version of aux-net
 | 
				
			||||||
 | 
					                \item \textbf{3D-ResNet50}
 | 
				
			||||||
 | 
					            \end{itemize}
 | 
				
			||||||
 | 
					        \end{itemize}
 | 
				
			||||||
 | 
					    \end{frame}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    \begin{frame}{Dataset}
 | 
				
			||||||
 | 
					        \begin{itemize}
 | 
				
			||||||
 | 
					            \item Kinetics-400
 | 
				
			||||||
 | 
					            \begin{itemize}
 | 
				
			||||||
 | 
					                \item 400 categories
 | 
				
			||||||
 | 
					                \item 240k/20k training/validation samples
 | 
				
			||||||
 | 
					            \end{itemize}
 | 
				
			||||||
 | 
					            \item UCF-101
 | 
				
			||||||
 | 
					            \begin{itemize}
 | 
				
			||||||
 | 
					                \item 101 classes
 | 
				
			||||||
 | 
					                \item 9.5k/4k training/validation samples
 | 
				
			||||||
 | 
					            \end{itemize}
 | 
				
			||||||
 | 
					            \item $\approx$10sec every video
 | 
				
			||||||
 | 
					            \item 1\% or 10\% labeled subsets balanced sampled from distribution
 | 
				
			||||||
 | 
					        \end{itemize}
 | 
				
			||||||
 | 
					    \end{frame}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    \begin{frame}{Performance Results}
 | 
				
			||||||
 | 
					        \includegraphics[scale=.65]{rsc/results}
 | 
				
			||||||
 | 
					    \end{frame}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    % ---  THE END
 | 
					    % ---  THE END
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										
											BIN
										
									
								
								presentation/rsc/performance_comparison.jpg
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								presentation/rsc/performance_comparison.jpg
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| 
		 After Width: | Height: | Size: 226 KiB  | 
							
								
								
									
										
											BIN
										
									
								
								presentation/rsc/results.jpg
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								presentation/rsc/results.jpg
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| 
		 After Width: | Height: | Size: 438 KiB  | 
							
								
								
									
										
											BIN
										
									
								
								presentation/rsc/structure.jpg
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								presentation/rsc/structure.jpg
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| 
		 After Width: | Height: | Size: 470 KiB  | 
@@ -1,18 +1,16 @@
 | 
				
			|||||||
@misc{structtutorialspoint,
 | 
					@InProceedings{Xu_2022_CVPR,
 | 
				
			||||||
    Title = {struct basics},
 | 
					    author    = {Xu, Yinghao and Wei, Fangyun and Sun, Xiao and Yang, Ceyuan and Shen, Yujun and Dai, Bo and Zhou, Bolei and Lin, Stephen},
 | 
				
			||||||
    howpublished = {\url{https://www.tutorialspoint.com/cprogramming/c_structures.htm}},
 | 
					    title     = {Cross-Model Pseudo-Labeling for Semi-Supervised Action Recognition},
 | 
				
			||||||
    note = {Aufgerufen: 2020-04}
 | 
					    booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
 | 
				
			||||||
 | 
					    month     = {June},
 | 
				
			||||||
 | 
					    year      = {2022},
 | 
				
			||||||
 | 
					    pages     = {2959-2968}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@misc{structchowto,
 | 
					@online{knuthwebsite,
 | 
				
			||||||
    Title = {struct basics},
 | 
					    author = "Kihyuk Sohn, David Berthelot, Chun-Liang Li",
 | 
				
			||||||
    howpublished = {\url{http://www.c-howto.de/tutorial/strukturierte-datentypen/strukturen/}},
 | 
					    title = "FixMatch: Simplifying Semi-Supervised Learning with Consistency and Confidence",
 | 
				
			||||||
    note = {Aufgerufen: 2020-04}
 | 
					    url  = "https://arxiv.org/abs/2001.07685",
 | 
				
			||||||
 | 
					    addendum = "(accessed: 20.03.2023)",
 | 
				
			||||||
 | 
					    keywords = "FixMatch, semi-supervised"
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					 | 
				
			||||||
@misc{pointertutorialspoint,
 | 
					 | 
				
			||||||
    Title = {Pointer basics},
 | 
					 | 
				
			||||||
    howpublished = {\url{https://www.tutorialspoint.com/cprogramming/c_pointers.htm}},
 | 
					 | 
				
			||||||
    note = {Aufgerufen: 2020-04}
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user