add basic presentation

2025-11-21 13:01:17 +01:00
parent 310ded1bd7
commit 905bad7af3
1 changed files with 196 additions and 209 deletions
--- a/presentation/main.tex
+++ b/presentation/main.tex
@@ -40,12 +40,9 @@
 %	 TITLE SLIDE
 %----------------------------------------------------------------------------------------
-\title{Cross-Model Pseudo-Labeling}
+\title{De-Cluttering Scatterplots}
-
+\subtitle{with Integral Images}
 \subtitle{for Semi-Supervised Action Recognition}
 \author{Lukas Heiligenbrunner}
 \date{\today}
 %------------------------------------------------
@@ -59,274 +56,264 @@
    \end{frame}
 %----------------------------------------------------------------------------------------
 %	 SECTION 1
 %----------------------------------------------------------------------------------------
 % todo pic of action
 %----------------------------------------------------------------------------------------
 %	 SECTION 1: INTRODUCTION
 %----------------------------------------------------------------------------------------
-    \section{The Goal}
+    \section{Introduction}
-    \begin{frame}{The goal}
+
    \begin{frame}{Goal of the Paper}
        \begin{itemize}
-            \item Train model
+            \item Scatterplots are fundamental for exploring multidimensional data
-            \item Recognize action of person
+            \item But: with large datasets they suffer from \textbf{overplotting}
-            \item From video [$\approx$10sec]
+            \item Dense regions obscure structure, samples become inaccessible
-            \item E.g.:
+            \item Goal:
            \begin{itemize}
-                \item brushing hair
+                \item Reduce clutter
-                \item riding bike
+                \item Preserve neighborhood relations
-                \item dancing
+                \item Achieve uniform sample distribution
-                \item playing violin
+                \item Maintain interpretability
            \end{itemize}
            \item As generic as possible
        \end{itemize}
    \end{frame}
 %----------------------------------------------------------------------------------------
-%	 SECTION 2
+%	SECTION 2: PROBLEM
 %----------------------------------------------------------------------------------------
    \section{Problem: Overplotting}
-    \section{The Problem} % Section title slide, unnumbered
+    \begin{frame}{Why Scatterplots Clutter}
 %------------------------------------------------
    \begin{frame}{Missing Labels}
        \begin{itemize}
-            \item Supervised action recoginition
+            \item Modern datasets: millions of samples
            \item Pixel resolution fixed → many samples map to the same pixel
            \item Consequences:
            \begin{itemize}
-                \item lots of labeled samples necessary
+                \item Occlusion of clusters + outliers
-                \item videos
+                \item Loss of density information
                \item Hard to select individual items
                \item Misleading visual perception
            \end{itemize}
-            \item Labeling Samples very expensive
+            \item A method is needed to \textbf{declutter} without losing structure
            \begin{itemize}
                \item Avoid!
            \end{itemize}
            \item Tremendous amount of unlabled data
            \begin{itemize}
                \item YouTube
            \end{itemize}
            \item Using semi-supervised learning might be benefitial
        \end{itemize}
    \end{frame}
-%------------------------------------------------
+    \begin{frame}{Limitations of Traditional Approaches}
    \begin{frame}{What's all about Semi supervised?}
        \begin{itemize}
-            \item Supervised learning
+            \item Transparency-based methods
            \begin{itemize}
-                \item Data samples
+                \item Improve density perception
-                \item Target labels
+                \item But still lose individual sample visibility
                \item Each sample is associated to target label
            \end{itemize}
-            \item Unsupervised learning
+            \item Down-sampling
            \begin{itemize}
-                \item Data samples
+                \item Removes data → not acceptable for analysis
                \item target is to find patterns in data
                \item without supervision
            \end{itemize}
-            \item Semi-Supervised learning
+            \item Local spatial distortions
            \begin{itemize}
-                \item combination of both
+                \item Risk of collisions
-                \item have labeled \& unlabeled data
+                \item Often non-monotonic mappings
                \item labeled data guides learning process
                \item unlabled helps to gain additional information
                \item goal is performance improvement
            \end{itemize}
-        \end{itemize}
+            \item Need a \textbf{global}, \textbf{smooth}, \textbf{monotonic}, \textbf{collision-free} method
    \end{frame}
 %------------------------------------------------
    \begin{frame}[allowframebreaks]{What's already been done}
        \begin{itemize}
            \item Pseudo-labeling
            \item Train model on labeled data
            \begin{itemize}
                \item Eg. 1\%/10\% of data labeled
            \end{itemize}
            \item Predict pseudo-labels from unlabeled data
            \item Confidence of prediction [Threshold]
            \item Drop/Use prediction to train model further
            \item Finally use pseudo-labels + 1/10\% to train main model
        \end{itemize}
        \framebreak
        \begin{itemize}
            \item quantity and quality of pseudo-labels
            \item significant impact on main model accuracy!
            \item we want to improve pseudo-label framework as much as possible
        \end{itemize}
    \end{frame}
    %----------------------------------------------------------------------------------------
-%	 SECTION 2
+%	SECTION 3: BACKGROUND
 %----------------------------------------------------------------------------------------
    \section{Background: Density Fields \& Integral Images}
-    \section{Cross-Model Pseudo-Labeling}
+    \begin{frame}{Density Estimation}
    \begin{frame}[allowframebreaks]{Papers approach}
        \begin{itemize}
-            \item Based on complementary-representations of model
+            \item Given samples $z_i = (x_i, y_i)$
-            \item Models of different size
+            \item Build smoothed density:
-            \item Different structural-bias $\rightarrow$ different category-wise performance
+            \[
-            \item Small model
+                d_r(x,y) = \sum_{p=1}^n \varphi_r(x-x_p, y-y_p)
-            \begin{itemize}
+            \]
-                \item lower capacity
+            \item Typically Gaussian kernel
-                \item better captures temporal dynamics in recognizing actions
+            \item Add global constant $d_0$ for stability:
-                \item scene changes/motion over time
+            \[
-            \end{itemize}
+                d(i,j) = d_r(i,j) + d_0
-            \item Large model
+            \]
-            \begin{itemize}
+            \item Ensures no empty regions → avoids singular mappings
                \item better learns spatial semantics
                \item to distinguish different action instances
                \item localize/identify objects in specific scene
            \end{itemize}
        \end{itemize}
        \framebreak
        \begin{itemize}
            \item Cross-Model Pseudo-Labeling
            \item Primary backbone (large model)
            \item Supplemented by lightweight auxiliary network
            \begin{itemize}
                \item Different structure
                \item Fewer channels (smaller)
            \end{itemize}
            \item Different representation of data complements primary backbone
        \end{itemize}
    \end{frame}
-    \begin{frame}{Structure Visualization}
+    \begin{frame}{Integral Images (InIms)}
        \includegraphics[scale=.17]{rsc/structure}
    \end{frame}
    \begin{frame}{Performance Perspectives}
        \begin{itemize}
-            \item 1\% labeled data + 400 Labels
+            \item Integral images compute cumulative sums over regions
-            \item Kinetics-400 dataset
+            \item Four standard tables:
-        \end{itemize}
+            \[
-        \includegraphics[scale=.205]{rsc/performance_comparison}
+                \alpha,\beta,\gamma,\delta
-    \end{frame}
+            \]
-
+            \item Four tilted (45°) tables:
-
+            \[
-    \section{Give me the math!}
+                \alpha_t, \beta_t, \gamma_t, \delta_t
-
+            \]
-    \begin{frame}{Definitions}
+            \item Each encodes global density distribution
            \item Key advantage:
            \begin{itemize}
-            \item Labeled data set of size $N_l$\\
+                \item Displacements depend on \textbf{global density}, not local neighborhood
-            $\mathcal{V} = \{(v_1,y_1), \dots, (v_{N_l}, y_{N_l})\}$
+                \item Avoids collisions
            \item Unlabeled data set of size $N_u$\\
            $\mathcal{U} = \{u_1, \dots, u_{N_u}\}$
            \item in general $\lvert\mathcal{U}\rvert \gg \lvert\mathcal{V}\rvert$\\
        \end{itemize}
    \end{frame}
    \begin{frame}[allowframebreaks]{How existing method \textit{FixMatch} works}
        \begin{itemize}
            \item $B_u \coloneqq \text{Batchsize}$
            \item $\tau \coloneqq \text{Confidence Threshold (Hyperparameter)}$
            \item $F(\mathcal{T}_{\text{strong}}(u_i)) \coloneqq \text{Class distribution}$
            \item $p_i \coloneqq F(\mathcal{T}_{\text{weak}}(u_i))$
            \item $\hat{y}_i \coloneqq \arg \max(p_i) \coloneqq \text{Pseudo Label}$
            \item $\mathcal{H} \coloneqq \text{Cross-entropy loss}$
            \item $\mathcal{L}_u \coloneqq \text{Loss on the unlabeled data}$
            \item $F \coloneqq \text{Model}$
            \item $\mathbbm{1} \coloneqq \text{Indicator Function}$
        \end{itemize}
        \begin{align*}
            \mathcal{L}_u = \frac{1}{B_u} \sum_{i=1}^{B_u} \mathbbm{1}(\max(p_i) \geq \tau) \mathcal{H}(\hat{y}_i,F(\mathcal{T}_{\text{strong}}(u_i)))
        \end{align*}
        \framebreak
        \begin{itemize}
            \item $\mathbbm{1}(\max(p_i) \geq \tau)$
            \begin{itemize}
                \item 'confidence-based masking'
                \item retain label only if largest probability is above threshold
                \item keep only 'high confidence' labels
            \end{itemize}
            \item $\mathcal{H}(\hat{y}_i,F(\mathcal{T}_{\text{strong}}(u_i)))$
            \begin{itemize}
                \item 'consistency regularization'
                \item cross-entropy loss of strong augmented and weak augmented data
            \end{itemize}
        \end{itemize}
    \end{frame}
    \begin{frame}[allowframebreaks]{CMPL (Cross-Model Pseudo-Labeling)}
        \begin{itemize}
            \item $F(\cdot) \coloneqq \text{Primary backbone}$
            \item $A(\cdot) \coloneqq \text{Auxiliary network}$
            \item Learning on labeled data
            \begin{align*}
                \mathcal{L}_s^F &= \frac{1}{B_l} \sum_{i=1}^{B_l} \mathcal{H}(y_i,F(\mathcal{T}^F_{\text{standard}}(v_i)))\\
                \mathcal{L}_s^A &= \frac{1}{B_l} \sum_{i=1}^{B_l} \mathcal{H}(y_i,A(\mathcal{T}^F_{\text{standard}}(v_i)))
            \end{align*}
            \item $\mathcal{T}^F_{\text{standard}}(v_i) \coloneqq \text{standard augmentations for action recognition}$
        \end{itemize}
        \framebreak
        \begin{itemize}
            \item Learning on unlabeled data
            \begin{align*}
                \mathcal{L}_u^F &= \frac{1}{B_u} \sum_{i=1}^{B_u} \mathbbm{1}(\max(p_i^A) \geq \tau) \mathcal{H}(\hat{y}_i^A,F(\mathcal{T}_{\text{strong}}(u_i)))\\
                \mathcal{L}_u^A &= \frac{1}{B_u} \sum_{i=1}^{B_u} \mathbbm{1}(\max(p_i^F) \geq \tau) \mathcal{H}(\hat{y}_i^F,A(\mathcal{T}_{\text{strong}}(u_i)))\\
            \end{align*}
            \item Complete training objective
            \begin{align*}
                \mathcal{L} = (\mathcal{L}_s^F + \mathcal{L}_s^A) + \lambda(\mathcal{L}_u^F + \mathcal{L}_u^A)
            \end{align*}
            \item $\lambda \coloneqq \text{Balancing coefficient for unsupervised loss}$
        \end{itemize}
    \end{frame}
    \section{Implementation}
    \begin{frame}{Networks}
        \begin{itemize}
            \item Auxiliary Network
            \begin{itemize}
                \item sub-network of primary model
                \item 3D-ResNet18
                \item \textbf{3D-ResNet50x1/4}
            \end{itemize}
            \item Backbone network
            \begin{itemize}
                \item larger version of aux-net
                \item \textbf{3D-ResNet50}
            \end{itemize}
        \end{itemize}
    \end{frame}
-    \begin{frame}{Dataset}
+    %----------------------------------------------------------------------------------------
 %	SECTION 4: METHOD
 %----------------------------------------------------------------------------------------
    \section{Density-Equalizing Mapping}
    \begin{frame}{Original Mapping (Molchanov \& Linsen)}
        \begin{itemize}
-            \item Kinetics-400
+            \item Prior work defined mapping:
            \[
                t(x,y; d) = \frac{
                    \alpha q_1 + \beta q_2 + \gamma q_3 + \delta q_4
                    + \alpha_t (x,1) + \beta_t (1,y) + \gamma_t (x,0) + \delta_t (0,y)
                }{2C}
            \]
            \item But:
            \begin{itemize}
-                \item 400 categories
+                \item Not identity for uniform density
-                \item 240k/20k training/validation samples
+                \item Iteration unstable
                \item Does not converge to equalized distribution
            \end{itemize}
            \item UCF-101
            \begin{itemize}
                \item 101 classes
                \item 9.5k/4k training/validation samples
            \end{itemize}
            \item $\approx$10sec every video
            \item 1\% or 10\% labeled subsets balanced sampled from distribution
        \end{itemize}
    \end{frame}
    \begin{frame}{Corrected Mapping (This Paper)}
        \begin{itemize}
            \item Compute deformation for true density $d$
            \item Compute deformation for constant density $d_0$
            \item Subtract:
            \[
                t(x,y) = (x,y) + t(x,y; d) - t(x,y; d_0)
            \]
            \item This ensures:
            \begin{itemize}
                \item Identity for uniform density
                \item Smooth monotonic deformation
                \item Progressive convergence to equalization
                \item No overlap of regions
            \end{itemize}
        \end{itemize}
    \end{frame}
-    \begin{frame}{Performance Results}
+    \begin{frame}{Iterative Algorithm Overview}
-        \includegraphics[scale=.65]{rsc/results}
+        \begin{enumerate}
            \item Rasterize and smooth density
            \item Compute integral images
            \item Compute corrected deformation $t(x,y)$
            \item Apply bi-linear interpolation to sample positions
            \item Iterate until:
            \begin{itemize}
                \item Time budget reached
                \item Uniformity threshold reached
            \end{itemize}
        \end{enumerate}
    \end{frame}
 %----------------------------------------------------------------------------------------
 %	SECTION 5: IMPLEMENTATION
 %----------------------------------------------------------------------------------------
    \section{GPU Implementation}
    \begin{frame}{Efficient GPU Computation}
        \begin{itemize}
            \item All major steps implemented on GPU:
            \begin{itemize}
                \item Density accumulation
                \item Gaussian smoothing
                \item Integral image computation
            \end{itemize}
            \item Fast multi-pass reduction for InIms
            \item Complexity:
            \[
                O(n + m)
            \]
            where $m = 2^k \times 2^k$ is texture resolution
            \item Achieves interactive rates for millions of samples
        \end{itemize}
    \end{frame}
 %----------------------------------------------------------------------------------------
 %	SECTION 6: VISUAL ENCODING
 %----------------------------------------------------------------------------------------
    \section{Visual Encoding of Deformation}
    \begin{frame}{Problem After Deformation}
        \begin{itemize}
            \item After equalization:
            \begin{itemize}
                \item Local densities lost
                \item Cluster shapes distorted
                \item Distances no longer meaningful
            \end{itemize}
            \item Need additional encodings to preserve structure
        \end{itemize}
    \end{frame}
    \begin{frame}{Three Proposed Encodings}
        \begin{itemize}
            \item \textbf{Deformed grid lines}
            \begin{itemize}
                \item Show local expansion / contraction
            \end{itemize}
            \item \textbf{Background density texture}
            \begin{itemize}
                \item Shows cluster cores after deformation
            \end{itemize}
            \item \textbf{Contour lines}
            \begin{itemize}
                \item Reveal subcluster structure
            \end{itemize}
        \end{itemize}
    \end{frame}
 %----------------------------------------------------------------------------------------
 %	SECTION 7: RESULTS
 %----------------------------------------------------------------------------------------
    \section{Results}
    \begin{frame}{Performance}
        \begin{itemize}
            \item Runs at interactive frame rates:
            \begin{itemize}
                \item e.g. 4M samples in $\approx 28$ ms per iteration
            \end{itemize}
            \item Standard deviation of samples/bin decreases monotonically
            \item Overplotting fraction also decreases monotonically
        \end{itemize}
        \centering
        \includegraphics[scale=0.4]{rsc/results}
    \end{frame}
    \begin{frame}{User Study}
        \begin{itemize}
            \item 25 participants, 3 tasks:
            \begin{enumerate}
                \item Estimate cluster size
                \item Sort clusters by size
                \item Select clusters (lasso)
            \end{enumerate}
            \item Findings:
            \begin{itemize}
                \item Size estimation (T1): regularized significantly better
                \item Sorting (T2): regularized significantly better
                \item Cluster selection (T3):
                \begin{itemize}
                    \item Grid encoding: worst
                    \item Background texture: better
                    \item Original scatterplot: best
                \end{itemize}
            \end{itemize}
        \end{itemize}
    \end{frame}