Compare commits

5 Commits
main ... main

Author SHA1 Message Date
29715ff95d moved slides a bit around 2025-11-23 19:50:57 +01:00
74ae33c267 reorder some slides 2025-11-23 15:29:42 +01:00
a586a15f8c improve presentation 2025-11-23 13:50:20 +01:00
1384d2036e add plots 2025-11-23 00:40:19 +01:00
905bad7af3 add basic presentation 2025-11-21 13:01:17 +01:00
17 changed files with 367 additions and 243 deletions

View File

@@ -1,4 +1,4 @@
\documentclass[usenames,dvipsnames]{beamer}
\documentclass[usenames,dvipsnames, aspectratio=169]{beamer}
%----------------------------------------------------------------------------------------
% Struktur und Pointer Referat
% 20.04.2020
@@ -40,12 +40,9 @@
% TITLE SLIDE
%----------------------------------------------------------------------------------------
\title{Cross-Model Pseudo-Labeling}
\subtitle{for Semi-Supervised Action Recognition}
\title{De-Cluttering Scatterplots}
\subtitle{with Integral Images}
\author{Lukas Heiligenbrunner}
\date{\today}
%------------------------------------------------
@@ -59,276 +56,351 @@
\end{frame}
%----------------------------------------------------------------------------------------
% SECTION 1
%----------------------------------------------------------------------------------------
% todo pic of action
\section{The Goal}
\begin{frame}{The goal}
\begin{itemize}
\item Train model
\item Recognize action of person
\item From video [$\approx$10sec]
\item E.g.:
\begin{itemize}
\item brushing hair
\item riding bike
\item dancing
\item playing violin
\end{itemize}
\item As generic as possible
\end{itemize}
\end{frame}
%----------------------------------------------------------------------------------------
% SECTION 2
% SECTION 1: INTRODUCTION
%----------------------------------------------------------------------------------------
\section{The Problem} % Section title slide, unnumbered
%------------------------------------------------
\begin{frame}{Missing Labels}
\begin{itemize}
\item Supervised action recoginition
\begin{itemize}
\item lots of labeled samples necessary
\item videos
\end{itemize}
\item Labeling Samples very expensive
\begin{itemize}
\item Avoid!
\end{itemize}
\item Tremendous amount of unlabled data
\begin{itemize}
\item YouTube
\end{itemize}
\item Using semi-supervised learning might be benefitial
\end{itemize}
\end{frame}
%------------------------------------------------
\begin{frame}{What's all about Semi supervised?}
\begin{itemize}
\item Supervised learning
\begin{itemize}
\item Data samples
\item Target labels
\item Each sample is associated to target label
\end{itemize}
\item Unsupervised learning
\begin{itemize}
\item Data samples
\item target is to find patterns in data
\item without supervision
\end{itemize}
\item Semi-Supervised learning
\begin{itemize}
\item combination of both
\item have labeled \& unlabeled data
\item labeled data guides learning process
\item unlabled helps to gain additional information
\item goal is performance improvement
\end{itemize}
\end{itemize}
\end{frame}
%------------------------------------------------
\begin{frame}[allowframebreaks]{What's already been done}
\begin{itemize}
\item Pseudo-labeling
\item Train model on labeled data
\begin{itemize}
\item Eg. 1\%/10\% of data labeled
\end{itemize}
\item Predict pseudo-labels from unlabeled data
\item Confidence of prediction [Threshold]
\item Drop/Use prediction to train model further
\item Finally use pseudo-labels + 1/10\% to train main model
\end{itemize}
\framebreak
\begin{itemize}
\item quantity and quality of pseudo-labels
\item significant impact on main model accuracy!
\item we want to improve pseudo-label framework as much as possible
\end{itemize}
\end{frame}
%----------------------------------------------------------------------------------------
% SECTION 2
% SECTION 2: PROBLEM
%----------------------------------------------------------------------------------------
\section{Introduction}
\section{Cross-Model Pseudo-Labeling}
\begin{frame}[allowframebreaks]{Papers approach}
\begin{frame}{Problem: Scatterplots Clutter}
\begin{itemize}
\item Based on complementary-representations of model
\item Models of different size
\item Different structural-bias $\rightarrow$ different category-wise performance
\item Small model
\item Scatterplots are fundamental for exploring multidimensional data
\item Modern datasets: millions of samples
\item Pixel resolution fixed → many samples map to the same pixel
\item This results in \textbf{overplotting}
\item Consequences:
\begin{itemize}
\item lower capacity
\item better captures temporal dynamics in recognizing actions
\item scene changes/motion over time
\item Occlusion of clusters
\item Loss of density information
\item Hard to select and see individual items
%\item Misleading visual perception
\end{itemize}
\item Large model
\item A method is needed to \textbf{declutter} without losing structure
\end{itemize}
\end{frame}
\begin{frame}
\centering
\includegraphics[scale=0.8]{rsc/overplotting}
\footnotesize\text{Source: \cite{statisticsglobe_overplotting_r}}
\end{frame}
\begin{frame}{Goal of the Paper}
\begin{itemize}
\item Goal:
\begin{itemize}
\item better learns spatial semantics
\item to distinguish different action instances
\item localize/identify objects in specific scene
\item Reduce clutter
\item Preserve neighborhood relations
\item Achieve uniform sample distribution
\item Maintain interpretability
\end{itemize}
\end{itemize}
\end{frame}
\framebreak
\begin{frame}{Limitations of Traditional Approaches}
\begin{itemize}
\item Cross-Model Pseudo-Labeling
\item Primary backbone (large model)
\item Supplemented by lightweight auxiliary network
\item Transparency-based methods
\begin{itemize}
\item Different structure
\item Fewer channels (smaller)
\item Improve density perception
\item But still lose individual sample visibility
\end{itemize}
\item Different representation of data complements primary backbone
\item Down-sampling
\begin{itemize}
\item Removes data → not acceptable for analysis
\end{itemize}
\item Local spatial distortions
\begin{itemize}
\item Risk of collisions
\item Often non-monotonic mappings
\end{itemize}
\item Need a \textbf{global}, \textbf{smooth}, \textbf{monotonic}, \textbf{collision-free} method
\end{itemize}
\end{frame}
%----------------------------------------------------------------------------------------
% SECTION 3: BACKGROUND
%----------------------------------------------------------------------------------------
\section{Background:\\Density Fields \& Integral Images}
\begin{frame}{Density Estimation}
\begin{itemize}
\item Given samples $z_i = (x_i, y_i)$
\item Build smoothed density:
\[
d_r(x,y) = \sum_{p=1}^n \varphi_r(x-x_p, y-y_p)
\]
\item Typically Gaussian kernel
\item Add global constant $d_0$ for stability:
\[
d(i,j) = d_r(i,j) + d_0
\]
\item Ensures no empty regions → avoids singular mappings
\end{itemize}
\end{frame}
\begin{frame}{Structure Visualization}
\includegraphics[scale=.17]{rsc/structure}
\end{frame}
\begin{frame}{Performance Perspectives}
\begin{frame}{Integral Images (InIms) I}
\begin{itemize}
\item 1\% labeled data + 400 Labels
\item Kinetics-400 dataset
\end{itemize}
\includegraphics[scale=.205]{rsc/performance_comparison}
\end{frame}
\section{Give me the math!}
\begin{frame}{Definitions}
\begin{itemize}
\item Labeled data set of size $N_l$\\
$\mathcal{V} = \{(v_1,y_1), \dots, (v_{N_l}, y_{N_l})\}$
\item Unlabeled data set of size $N_u$\\
$\mathcal{U} = \{u_1, \dots, u_{N_u}\}$
\item in general $\lvert\mathcal{U}\rvert \gg \lvert\mathcal{V}\rvert$\\
\item Integral images compute cumulative sums over regions
\item Four standard tables:
\[
\alpha,\beta,\gamma,\delta
\]
\item Four tilted (45°) tables:
\[
\alpha_t, \beta_t, \gamma_t, \delta_t
\]
\item Each encodes global density distribution
\item Key advantage:
\begin{itemize}
\item Displacements depend on \textbf{global density}, not local neighborhood
\item Avoids collisions
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}[allowframebreaks]{How existing method \textit{FixMatch} works}
\begin{frame}{Integral Images (InIms) II}
\centering
\includegraphics[scale=0.3]{rsc/2408.06513v1_page_6_5}\\
\footnotesize\text{Source: \cite{Rave_2025}}
\end{frame}
\begin{frame}{Integral Images (InIms) III}
\centering
\includegraphics[scale=0.3]{rsc/2408.06513v1_page_6_6}\\
\footnotesize\text{Source: \cite{Rave_2025}}
\end{frame}
\begin{frame}{Integral Images (InIms) IV}
\centering
\includegraphics[scale=0.3]{rsc/2408.06513v1_page_6_7}\\
\footnotesize\text{Source: \cite{Rave_2025}}
\end{frame}
%----------------------------------------------------------------------------------------
% SECTION 4: METHOD
%----------------------------------------------------------------------------------------
\section{Density-Equalizing Mapping}
\begin{frame}{Goal of the Mapping}
\begin{itemize}
\item $B_u \coloneqq \text{Batchsize}$
\item $\tau \coloneqq \text{Confidence Threshold (Hyperparameter)}$
\item $F(\mathcal{T}_{\text{strong}}(u_i)) \coloneqq \text{Class distribution}$
\item $p_i \coloneqq F(\mathcal{T}_{\text{weak}}(u_i))$
\item $\hat{y}_i \coloneqq \arg \max(p_i) \coloneqq \text{Pseudo Label}$
\item $\mathcal{H} \coloneqq \text{Cross-entropy loss}$
\item $\mathcal{L}_u \coloneqq \text{Loss on the unlabeled data}$
\item $F \coloneqq \text{Model}$
\item $\mathbbm{1} \coloneqq \text{Indicator Function}$
\item We want to transform the scatterplot domain so that:
\begin{itemize}
\item dense regions expand
\item sparse regions contract
\item overall density becomes approximately uniform
\end{itemize}
\item The deformation must be:
\begin{itemize}
\item smooth
\item globally consistent
\item monotonic (no point order swaps)
\item free of collisions
\end{itemize}
\item To achieve this, we compute a \textbf{densitydriven displacement field}.
\end{itemize}
\end{frame}
\begin{frame}{Corrected Mapping: Key Idea}
\begin{itemize}
\item Let $t(x,y; d)$ be the deformation computed from the
\textbf{actual density field} $d(x,y)$.
\item This deformation is built from cumulative sums of density
through the integral images.
\item Problem: even for \textbf{constant density}, $t(x,y; d_0)$
is \emph{not} zero (due to construction of the integral tables).
\item Therefore:\\
We subtract the deformation caused by constant density.
\end{itemize}
\begin{align*}
\mathcal{L}_u = \frac{1}{B_u} \sum_{i=1}^{B_u} \mathbbm{1}(\max(p_i) \geq \tau) \mathcal{H}(\hat{y}_i,F(\mathcal{T}_{\text{strong}}(u_i)))
T(x,y) = (x,y) \;+\; t(x,y; d) \;-\; t(x,y; d_0) \;
\end{align*}
\framebreak
\begin{itemize}
\item $\mathbbm{1}(\max(p_i) \geq \tau)$
\begin{itemize}
\item 'confidence-based masking'
\item retain label only if largest probability is above threshold
\item keep only 'high confidence' labels
\end{itemize}
\item $\mathcal{H}(\hat{y}_i,F(\mathcal{T}_{\text{strong}}(u_i)))$
\begin{itemize}
\item 'consistency regularization'
\item cross-entropy loss of strong augmented and weak augmented data
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}[allowframebreaks]{CMPL (Cross-Model Pseudo-Labeling)}
\begin{itemize}
\item $F(\cdot) \coloneqq \text{Primary backbone}$
\item $A(\cdot) \coloneqq \text{Auxiliary network}$
\item Learning on labeled data
\begin{align*}
\mathcal{L}_s^F &= \frac{1}{B_l} \sum_{i=1}^{B_l} \mathcal{H}(y_i,F(\mathcal{T}^F_{\text{standard}}(v_i)))\\
\mathcal{L}_s^A &= \frac{1}{B_l} \sum_{i=1}^{B_l} \mathcal{H}(y_i,A(\mathcal{T}^F_{\text{standard}}(v_i)))
\end{align*}
\item $\mathcal{T}^F_{\text{standard}}(v_i) \coloneqq \text{standard augmentations for action recognition}$
\end{itemize}
\framebreak
\begin{itemize}
\item Learning on unlabeled data
\begin{align*}
\mathcal{L}_u^F &= \frac{1}{B_u} \sum_{i=1}^{B_u} \mathbbm{1}(\max(p_i^A) \geq \tau) \mathcal{H}(\hat{y}_i^A,F(\mathcal{T}_{\text{strong}}(u_i)))\\
\mathcal{L}_u^A &= \frac{1}{B_u} \sum_{i=1}^{B_u} \mathbbm{1}(\max(p_i^F) \geq \tau) \mathcal{H}(\hat{y}_i^F,A(\mathcal{T}_{\text{strong}}(u_i)))\\
\end{align*}
\item Complete training objective
\begin{align*}
\mathcal{L} = (\mathcal{L}_s^F + \mathcal{L}_s^A) + \lambda(\mathcal{L}_u^F + \mathcal{L}_u^A)
\end{align*}
\item $\lambda \coloneqq \text{Balancing coefficient for unsupervised loss}$
\item $T(x,y)$ is the \textbf{corrected mapping}.
\item For uniform density: $t(x,y; d) = t(x,y; d_0)$ $\rightarrow$ identity mapping.
\end{itemize}
\end{frame}
% \begin{frame}{Why the Corrected Mapping Works}
% \begin{itemize}
% \item \textbf{Identity on uniform density}
% \begin{itemize}
% \item Without correction: the old mapping distorted even uniform fields.
% \item With correction: uniform density $\rightarrow$ no deformation.
% \end{itemize}
% \item \textbf{Monotonicity}
% \begin{itemize}
% \item The corrected mapping guarantees no coordinate inversions.
% \item Order of points is preserved along both axes.
% \end{itemize}
% \item \textbf{Smoothness}
% \begin{itemize}
% \item The mapping is built from integral images (global cumulative fields),
% \item yielding slow, continuous changes.
% \end{itemize}
% \item \textbf{Stability in iteration}
% \begin{itemize}
% \item As the density becomes more equalized, $t(x,y;d)$ approaches $t(x,y;d_0)$.
% \item Mapping naturally converges toward identity.
% \end{itemize}
% \item \textbf{No collisions}
% \begin{itemize}
% \item Global, monotonic deformation prevents points from crossing paths.
% \end{itemize}
% \end{itemize}
% \end{frame}
\section{Implementation}
\begin{frame}{Networks}
\begin{itemize}
\item Auxiliary Network
\begin{frame}{Iterative Algorithm Overview}
\begin{enumerate}
\item Rasterize and smooth density
\item Compute integral images
\item Compute corrected deformation $t(x,y)$
\item Apply bi-linear interpolation to sample positions
\item Iterate until:
\begin{itemize}
\item sub-network of primary model
\item 3D-ResNet18
\item \textbf{3D-ResNet50x1/4}
\item Time budget reached
\item Uniformity threshold reached
\end{itemize}
\item Backbone network
\end{enumerate}
\end{frame}
\begin{frame}
\centering
\begin{figure}
\centering
\begin{minipage}{0.4\textwidth}
\centering
\includegraphics[width=\textwidth]{rsc/2408.06513v1_page_7_1}
\vspace{4pt}
\footnotesize MNIST Dataset (UMAP)~\cite{Rave_2025}
\end{minipage}
\begin{minipage}{0.15\textwidth}
\centering
$\Longrightarrow$
\end{minipage}
\begin{minipage}{0.4\textwidth}
\centering
\includegraphics[width=\textwidth]{rsc/2408.06513v1_page_7_2}
\vspace{4pt}
\footnotesize Visual encoding of the density-equalizing transform (32 Iterations)~\cite{Rave_2025}
\end{minipage}
\label{fig:figure}
\end{figure}
\end{frame}
%----------------------------------------------------------------------------------------
% SECTION 6: VISUAL ENCODING
%----------------------------------------------------------------------------------------
\section{Visual Encoding of Deformation}
\begin{frame}{Problem After Deformation}
\begin{itemize}
\item After equalization:
\begin{itemize}
\item larger version of aux-net
\item \textbf{3D-ResNet50}
\item Local densities lost
\item Cluster shapes distorted
\item Distances no longer meaningful
\end{itemize}
\item Need additional encodings to preserve structure
\end{itemize}
\end{frame}
\begin{frame}{Three Proposed Encodings I}
\begin{itemize}
\item \textbf{Deformed grid lines}
\begin{itemize}
\item Show local expansion / contraction
\end{itemize}
\item \textbf{Background density texture}
\begin{itemize}
\item Shows cluster cores after deformation
\end{itemize}
\item \textbf{Contour lines}
\begin{itemize}
\item Reveal subcluster structure
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}{Dataset}
\begin{itemize}
\item Kinetics-400
\begin{itemize}
\item 400 categories
\item 240k/20k training/validation samples
\end{itemize}
\item UCF-101
\begin{itemize}
\item 101 classes
\item 9.5k/4k training/validation samples
\end{itemize}
\item $\approx$10sec every video
\item 1\% or 10\% labeled subsets balanced sampled from distribution
\end{itemize}
\begin{frame}{Three Proposed Encodings II}
\centering
\begin{figure}
\centering
\begin{minipage}{0.3\textwidth}
\centering
\includegraphics[width=\textwidth]{rsc/2408.06513v1_page_7_2}
\vspace{4pt}
\footnotesize Deformed grid lines~\cite{Rave_2025}
\end{minipage}
\begin{minipage}{0.3\textwidth}
\centering
\includegraphics[width=\textwidth]{rsc/2408.06513v1_page_7_3}
\vspace{4pt}
\footnotesize Background density texture~\cite{Rave_2025}
\end{minipage}
\begin{minipage}{0.3\textwidth}
\centering
\includegraphics[width=\textwidth]{rsc/2408.06513v1_page_7_4}
\vspace{4pt}
\footnotesize Contour lines~\cite{Rave_2025}
\end{minipage}
\label{fig:figure2}
\end{figure}
\end{frame}
%----------------------------------------------------------------------------------------
% SECTION 5: IMPLEMENTATION
%----------------------------------------------------------------------------------------
\begin{frame}{Performance Results}
\includegraphics[scale=.65]{rsc/results}
\begin{frame}{Example I}
\centering
\includegraphics[scale=0.1]{rsc/2408.06513v1_page_8_1}\\
\footnotesize\text{Source: \cite{Rave_2025}}
\end{frame}
\begin{frame}{Example II}
\centering
\includegraphics[scale=0.1]{rsc/2408.06513v1_page_8_2}\\
\footnotesize\text{Source: \cite{Rave_2025}}
\end{frame}
\begin{frame}{Example III}
\centering
\includegraphics[scale=0.1]{rsc/2408.06513v1_page_8_3}\\
\footnotesize\text{Source: \cite{Rave_2025}}
\end{frame}
\begin{frame}{Example IV}
\centering
\includegraphics[scale=0.1]{rsc/2408.06513v1_page_8_4}\\
\footnotesize\text{Source: \cite{Rave_2025}}
\end{frame}
\begin{frame}{Example V}
\centering
\includegraphics[scale=0.1]{rsc/2408.06513v1_page_8_5}\\
\footnotesize\text{Source: \cite{Rave_2025}}
\end{frame}
\begin{frame}{Example VI}
\centering
\includegraphics[scale=0.1]{rsc/2408.06513v1_page_8_6}\\
\footnotesize\text{Source: \cite{Rave_2025}}
\end{frame}
% --- THE END
@@ -342,10 +414,58 @@
\appendix
\section{Backup Slides}\label{sec:backup}
\begin{frame}{Efficient GPU Computation}
\begin{itemize}
\item All major steps implemented on GPU:
\begin{itemize}
\item Density accumulation $\rightarrow$ vertex + fragment shader
\item Gaussian smoothing $\rightarrow$ 2 compute-shader passes
\item Integral image computation $\rightarrow$ fragment shader
\end{itemize}
\item Achieves interactive rates for millions of samples
\end{itemize}
\end{frame}
\begin{frame}{Performance}
\begin{itemize}
\item Runs at interactive frame rates:
\begin{itemize}
\item e.g. 4M samples in $\approx 3$ ms per iteration
\end{itemize}
%\item Standard deviation of samples/bin decreases monotonically
%\item Overplotting fraction also decreases monotonically
\end{itemize}
\centering
\includegraphics[scale=0.4]{rsc/img}\\
Source:~\cite{Rave_2025}
\end{frame}
\section{Math: Domain Transformation}
\begin{frame}{Domain Transformation (Molchanov \& Linsen)}
\begin{itemize}
\item Integral Images $\rightarrow$ Transformation mapping
\item Definition:
\[
t(x,y; d) = \frac{
\alpha q_1 + \beta q_2 + \gamma q_3 + \delta q_4
+ \alpha_t (x,1) + \beta_t (1,y) + \gamma_t (x,0) + \delta_t (0,y)
}{2C}
\]
\item Problems:
\begin{itemize}
\item Not identity for uniform density
\item Iteration unstable
\item Does not converge to equalized distribution
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}{Sources}
\nocite{*} % Display all references regardless of if they were cited
\bibliography{sources}
\bibliographystyle{plain}
\end{frame}
\end{document}

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 39 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 44 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 196 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.0 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1001 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 279 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 403 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 548 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 746 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 927 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 MiB

BIN
presentation/rsc/img.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 49 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 188 KiB

View File

@@ -1,16 +1,20 @@
@InProceedings{Xu_2022_CVPR,
author = {Xu, Yinghao and Wei, Fangyun and Sun, Xiao and Yang, Ceyuan and Shen, Yujun and Dai, Bo and Zhou, Bolei and Lin, Stephen},
title = {Cross-Model Pseudo-Labeling for Semi-Supervised Action Recognition},
booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
month = {June},
year = {2022},
pages = {2959-2968}
}
@article{Rave_2025,
title={De-Cluttering Scatterplots With Integral Images},
volume={31},
ISSN={2160-9306},
url={http://dx.doi.org/10.1109/TVCG.2024.3381453},
DOI={10.1109/tvcg.2024.3381453},
number={4},
journal={IEEE Transactions on Visualization and Computer Graphics},
publisher={Institute of Electrical and Electronics Engineers (IEEE)},
author={Rave, Hennes and Molchanov, Vladimir and Linsen, Lars},
year={2025},
month=apr, pages={21142126} }
@online{knuthwebsite,
author = "Kihyuk Sohn, David Berthelot, Chun-Liang Li",
title = "FixMatch: Simplifying Semi-Supervised Learning with Consistency and Confidence",
url = "https://arxiv.org/abs/2001.07685",
addendum = "(accessed: 20.03.2023)",
keywords = "FixMatch, semi-supervised"
@online{statisticsglobe_overplotting_r,
author = {Statistics Globe},
title = {Avoid Overplotting in R (4 Examples) | Point Size, Opacity \& Color},
year = {2025},
url = {https://statisticsglobe.com/avoid-overplotting-r},
note = {Accessed: 2025-11-23}
}