add basic presentation

This commit is contained in:
2025-11-21 13:01:17 +01:00
parent 310ded1bd7
commit 905bad7af3

View File

@@ -40,12 +40,9 @@
% TITLE SLIDE % TITLE SLIDE
%---------------------------------------------------------------------------------------- %----------------------------------------------------------------------------------------
\title{Cross-Model Pseudo-Labeling} \title{De-Cluttering Scatterplots}
\subtitle{with Integral Images}
\subtitle{for Semi-Supervised Action Recognition}
\author{Lukas Heiligenbrunner} \author{Lukas Heiligenbrunner}
\date{\today} \date{\today}
%------------------------------------------------ %------------------------------------------------
@@ -59,274 +56,264 @@
\end{frame} \end{frame}
%----------------------------------------------------------------------------------------
% SECTION 1
%----------------------------------------------------------------------------------------
% todo pic of action
%----------------------------------------------------------------------------------------
% SECTION 1: INTRODUCTION
%----------------------------------------------------------------------------------------
\section{The Goal} \section{Introduction}
\begin{frame}{The goal}
\begin{frame}{Goal of the Paper}
\begin{itemize} \begin{itemize}
\item Train model \item Scatterplots are fundamental for exploring multidimensional data
\item Recognize action of person \item But: with large datasets they suffer from \textbf{overplotting}
\item From video [$\approx$10sec] \item Dense regions obscure structure, samples become inaccessible
\item E.g.: \item Goal:
\begin{itemize} \begin{itemize}
\item brushing hair \item Reduce clutter
\item riding bike \item Preserve neighborhood relations
\item dancing \item Achieve uniform sample distribution
\item playing violin \item Maintain interpretability
\end{itemize} \end{itemize}
\item As generic as possible
\end{itemize} \end{itemize}
\end{frame} \end{frame}
%---------------------------------------------------------------------------------------- %----------------------------------------------------------------------------------------
% SECTION 2 % SECTION 2: PROBLEM
%---------------------------------------------------------------------------------------- %----------------------------------------------------------------------------------------
\section{Problem: Overplotting}
\section{The Problem} % Section title slide, unnumbered \begin{frame}{Why Scatterplots Clutter}
%------------------------------------------------
\begin{frame}{Missing Labels}
\begin{itemize} \begin{itemize}
\item Supervised action recoginition \item Modern datasets: millions of samples
\item Pixel resolution fixed → many samples map to the same pixel
\item Consequences:
\begin{itemize} \begin{itemize}
\item lots of labeled samples necessary \item Occlusion of clusters + outliers
\item videos \item Loss of density information
\item Hard to select individual items
\item Misleading visual perception
\end{itemize} \end{itemize}
\item Labeling Samples very expensive \item A method is needed to \textbf{declutter} without losing structure
\begin{itemize}
\item Avoid!
\end{itemize}
\item Tremendous amount of unlabled data
\begin{itemize}
\item YouTube
\end{itemize}
\item Using semi-supervised learning might be benefitial
\end{itemize} \end{itemize}
\end{frame} \end{frame}
%------------------------------------------------ \begin{frame}{Limitations of Traditional Approaches}
\begin{frame}{What's all about Semi supervised?}
\begin{itemize} \begin{itemize}
\item Supervised learning \item Transparency-based methods
\begin{itemize} \begin{itemize}
\item Data samples \item Improve density perception
\item Target labels \item But still lose individual sample visibility
\item Each sample is associated to target label
\end{itemize} \end{itemize}
\item Unsupervised learning \item Down-sampling
\begin{itemize} \begin{itemize}
\item Data samples \item Removes data → not acceptable for analysis
\item target is to find patterns in data
\item without supervision
\end{itemize} \end{itemize}
\item Semi-Supervised learning \item Local spatial distortions
\begin{itemize} \begin{itemize}
\item combination of both \item Risk of collisions
\item have labeled \& unlabeled data \item Often non-monotonic mappings
\item labeled data guides learning process
\item unlabled helps to gain additional information
\item goal is performance improvement
\end{itemize} \end{itemize}
\end{itemize} \item Need a \textbf{global}, \textbf{smooth}, \textbf{monotonic}, \textbf{collision-free} method
\end{frame}
%------------------------------------------------
\begin{frame}[allowframebreaks]{What's already been done}
\begin{itemize}
\item Pseudo-labeling
\item Train model on labeled data
\begin{itemize}
\item Eg. 1\%/10\% of data labeled
\end{itemize}
\item Predict pseudo-labels from unlabeled data
\item Confidence of prediction [Threshold]
\item Drop/Use prediction to train model further
\item Finally use pseudo-labels + 1/10\% to train main model
\end{itemize}
\framebreak
\begin{itemize}
\item quantity and quality of pseudo-labels
\item significant impact on main model accuracy!
\item we want to improve pseudo-label framework as much as possible
\end{itemize} \end{itemize}
\end{frame} \end{frame}
%---------------------------------------------------------------------------------------- %----------------------------------------------------------------------------------------
% SECTION 2 % SECTION 3: BACKGROUND
%---------------------------------------------------------------------------------------- %----------------------------------------------------------------------------------------
\section{Background: Density Fields \& Integral Images}
\section{Cross-Model Pseudo-Labeling} \begin{frame}{Density Estimation}
\begin{frame}[allowframebreaks]{Papers approach}
\begin{itemize} \begin{itemize}
\item Based on complementary-representations of model \item Given samples $z_i = (x_i, y_i)$
\item Models of different size \item Build smoothed density:
\item Different structural-bias $\rightarrow$ different category-wise performance \[
\item Small model d_r(x,y) = \sum_{p=1}^n \varphi_r(x-x_p, y-y_p)
\begin{itemize} \]
\item lower capacity \item Typically Gaussian kernel
\item better captures temporal dynamics in recognizing actions \item Add global constant $d_0$ for stability:
\item scene changes/motion over time \[
\end{itemize} d(i,j) = d_r(i,j) + d_0
\item Large model \]
\begin{itemize} \item Ensures no empty regions → avoids singular mappings
\item better learns spatial semantics
\item to distinguish different action instances
\item localize/identify objects in specific scene
\end{itemize}
\end{itemize}
\framebreak
\begin{itemize}
\item Cross-Model Pseudo-Labeling
\item Primary backbone (large model)
\item Supplemented by lightweight auxiliary network
\begin{itemize}
\item Different structure
\item Fewer channels (smaller)
\end{itemize}
\item Different representation of data complements primary backbone
\end{itemize} \end{itemize}
\end{frame} \end{frame}
\begin{frame}{Structure Visualization} \begin{frame}{Integral Images (InIms)}
\includegraphics[scale=.17]{rsc/structure}
\end{frame}
\begin{frame}{Performance Perspectives}
\begin{itemize} \begin{itemize}
\item 1\% labeled data + 400 Labels \item Integral images compute cumulative sums over regions
\item Kinetics-400 dataset \item Four standard tables:
\end{itemize} \[
\includegraphics[scale=.205]{rsc/performance_comparison} \alpha,\beta,\gamma,\delta
\end{frame} \]
\item Four tilted (45°) tables:
\[
\section{Give me the math!} \alpha_t, \beta_t, \gamma_t, \delta_t
\]
\begin{frame}{Definitions} \item Each encodes global density distribution
\item Key advantage:
\begin{itemize} \begin{itemize}
\item Labeled data set of size $N_l$\\ \item Displacements depend on \textbf{global density}, not local neighborhood
$\mathcal{V} = \{(v_1,y_1), \dots, (v_{N_l}, y_{N_l})\}$ \item Avoids collisions
\item Unlabeled data set of size $N_u$\\
$\mathcal{U} = \{u_1, \dots, u_{N_u}\}$
\item in general $\lvert\mathcal{U}\rvert \gg \lvert\mathcal{V}\rvert$\\
\end{itemize}
\end{frame}
\begin{frame}[allowframebreaks]{How existing method \textit{FixMatch} works}
\begin{itemize}
\item $B_u \coloneqq \text{Batchsize}$
\item $\tau \coloneqq \text{Confidence Threshold (Hyperparameter)}$
\item $F(\mathcal{T}_{\text{strong}}(u_i)) \coloneqq \text{Class distribution}$
\item $p_i \coloneqq F(\mathcal{T}_{\text{weak}}(u_i))$
\item $\hat{y}_i \coloneqq \arg \max(p_i) \coloneqq \text{Pseudo Label}$
\item $\mathcal{H} \coloneqq \text{Cross-entropy loss}$
\item $\mathcal{L}_u \coloneqq \text{Loss on the unlabeled data}$
\item $F \coloneqq \text{Model}$
\item $\mathbbm{1} \coloneqq \text{Indicator Function}$
\end{itemize}
\begin{align*}
\mathcal{L}_u = \frac{1}{B_u} \sum_{i=1}^{B_u} \mathbbm{1}(\max(p_i) \geq \tau) \mathcal{H}(\hat{y}_i,F(\mathcal{T}_{\text{strong}}(u_i)))
\end{align*}
\framebreak
\begin{itemize}
\item $\mathbbm{1}(\max(p_i) \geq \tau)$
\begin{itemize}
\item 'confidence-based masking'
\item retain label only if largest probability is above threshold
\item keep only 'high confidence' labels
\end{itemize}
\item $\mathcal{H}(\hat{y}_i,F(\mathcal{T}_{\text{strong}}(u_i)))$
\begin{itemize}
\item 'consistency regularization'
\item cross-entropy loss of strong augmented and weak augmented data
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}[allowframebreaks]{CMPL (Cross-Model Pseudo-Labeling)}
\begin{itemize}
\item $F(\cdot) \coloneqq \text{Primary backbone}$
\item $A(\cdot) \coloneqq \text{Auxiliary network}$
\item Learning on labeled data
\begin{align*}
\mathcal{L}_s^F &= \frac{1}{B_l} \sum_{i=1}^{B_l} \mathcal{H}(y_i,F(\mathcal{T}^F_{\text{standard}}(v_i)))\\
\mathcal{L}_s^A &= \frac{1}{B_l} \sum_{i=1}^{B_l} \mathcal{H}(y_i,A(\mathcal{T}^F_{\text{standard}}(v_i)))
\end{align*}
\item $\mathcal{T}^F_{\text{standard}}(v_i) \coloneqq \text{standard augmentations for action recognition}$
\end{itemize}
\framebreak
\begin{itemize}
\item Learning on unlabeled data
\begin{align*}
\mathcal{L}_u^F &= \frac{1}{B_u} \sum_{i=1}^{B_u} \mathbbm{1}(\max(p_i^A) \geq \tau) \mathcal{H}(\hat{y}_i^A,F(\mathcal{T}_{\text{strong}}(u_i)))\\
\mathcal{L}_u^A &= \frac{1}{B_u} \sum_{i=1}^{B_u} \mathbbm{1}(\max(p_i^F) \geq \tau) \mathcal{H}(\hat{y}_i^F,A(\mathcal{T}_{\text{strong}}(u_i)))\\
\end{align*}
\item Complete training objective
\begin{align*}
\mathcal{L} = (\mathcal{L}_s^F + \mathcal{L}_s^A) + \lambda(\mathcal{L}_u^F + \mathcal{L}_u^A)
\end{align*}
\item $\lambda \coloneqq \text{Balancing coefficient for unsupervised loss}$
\end{itemize}
\end{frame}
\section{Implementation}
\begin{frame}{Networks}
\begin{itemize}
\item Auxiliary Network
\begin{itemize}
\item sub-network of primary model
\item 3D-ResNet18
\item \textbf{3D-ResNet50x1/4}
\end{itemize}
\item Backbone network
\begin{itemize}
\item larger version of aux-net
\item \textbf{3D-ResNet50}
\end{itemize} \end{itemize}
\end{itemize} \end{itemize}
\end{frame} \end{frame}
\begin{frame}{Dataset} %----------------------------------------------------------------------------------------
% SECTION 4: METHOD
%----------------------------------------------------------------------------------------
\section{Density-Equalizing Mapping}
\begin{frame}{Original Mapping (Molchanov \& Linsen)}
\begin{itemize} \begin{itemize}
\item Kinetics-400 \item Prior work defined mapping:
\[
t(x,y; d) = \frac{
\alpha q_1 + \beta q_2 + \gamma q_3 + \delta q_4
+ \alpha_t (x,1) + \beta_t (1,y) + \gamma_t (x,0) + \delta_t (0,y)
}{2C}
\]
\item But:
\begin{itemize} \begin{itemize}
\item 400 categories \item Not identity for uniform density
\item 240k/20k training/validation samples \item Iteration unstable
\item Does not converge to equalized distribution
\end{itemize} \end{itemize}
\item UCF-101
\begin{itemize}
\item 101 classes
\item 9.5k/4k training/validation samples
\end{itemize}
\item $\approx$10sec every video
\item 1\% or 10\% labeled subsets balanced sampled from distribution
\end{itemize} \end{itemize}
\end{frame} \end{frame}
\begin{frame}{Corrected Mapping (This Paper)}
\begin{itemize}
\item Compute deformation for true density $d$
\item Compute deformation for constant density $d_0$
\item Subtract:
\[
t(x,y) = (x,y) + t(x,y; d) - t(x,y; d_0)
\]
\item This ensures:
\begin{itemize}
\item Identity for uniform density
\item Smooth monotonic deformation
\item Progressive convergence to equalization
\item No overlap of regions
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}{Performance Results} \begin{frame}{Iterative Algorithm Overview}
\includegraphics[scale=.65]{rsc/results} \begin{enumerate}
\item Rasterize and smooth density
\item Compute integral images
\item Compute corrected deformation $t(x,y)$
\item Apply bi-linear interpolation to sample positions
\item Iterate until:
\begin{itemize}
\item Time budget reached
\item Uniformity threshold reached
\end{itemize}
\end{enumerate}
\end{frame}
%----------------------------------------------------------------------------------------
% SECTION 5: IMPLEMENTATION
%----------------------------------------------------------------------------------------
\section{GPU Implementation}
\begin{frame}{Efficient GPU Computation}
\begin{itemize}
\item All major steps implemented on GPU:
\begin{itemize}
\item Density accumulation
\item Gaussian smoothing
\item Integral image computation
\end{itemize}
\item Fast multi-pass reduction for InIms
\item Complexity:
\[
O(n + m)
\]
where $m = 2^k \times 2^k$ is texture resolution
\item Achieves interactive rates for millions of samples
\end{itemize}
\end{frame}
%----------------------------------------------------------------------------------------
% SECTION 6: VISUAL ENCODING
%----------------------------------------------------------------------------------------
\section{Visual Encoding of Deformation}
\begin{frame}{Problem After Deformation}
\begin{itemize}
\item After equalization:
\begin{itemize}
\item Local densities lost
\item Cluster shapes distorted
\item Distances no longer meaningful
\end{itemize}
\item Need additional encodings to preserve structure
\end{itemize}
\end{frame}
\begin{frame}{Three Proposed Encodings}
\begin{itemize}
\item \textbf{Deformed grid lines}
\begin{itemize}
\item Show local expansion / contraction
\end{itemize}
\item \textbf{Background density texture}
\begin{itemize}
\item Shows cluster cores after deformation
\end{itemize}
\item \textbf{Contour lines}
\begin{itemize}
\item Reveal subcluster structure
\end{itemize}
\end{itemize}
\end{frame}
%----------------------------------------------------------------------------------------
% SECTION 7: RESULTS
%----------------------------------------------------------------------------------------
\section{Results}
\begin{frame}{Performance}
\begin{itemize}
\item Runs at interactive frame rates:
\begin{itemize}
\item e.g. 4M samples in $\approx 28$ ms per iteration
\end{itemize}
\item Standard deviation of samples/bin decreases monotonically
\item Overplotting fraction also decreases monotonically
\end{itemize}
\centering
\includegraphics[scale=0.4]{rsc/results}
\end{frame}
\begin{frame}{User Study}
\begin{itemize}
\item 25 participants, 3 tasks:
\begin{enumerate}
\item Estimate cluster size
\item Sort clusters by size
\item Select clusters (lasso)
\end{enumerate}
\item Findings:
\begin{itemize}
\item Size estimation (T1): regularized significantly better
\item Sorting (T2): regularized significantly better
\item Cluster selection (T3):
\begin{itemize}
\item Grid encoding: worst
\item Background texture: better
\item Original scatterplot: best
\end{itemize}
\end{itemize}
\end{itemize}
\end{frame} \end{frame}