Seminar_in_AI_Master/presentation/main.tex

\documentclass[usenames,dvipsnames]{beamer}
%----------------------------------------------------------------------------------------
%	Struktur und Pointer Referat
%   20.04.2020
%----------------------------------------------------------------------------------------
\usetheme[nofirafonts]{focus}


\usepackage[utf8]{inputenc}

\usepackage{booktabs}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{amsfonts}
\usepackage{bbm}
\usepackage{hyperref}
\usepackage{graphicx}
\usepackage{xcolor}
\usepackage{mathtools}

\RequirePackage[T1]{fontenc}

\PassOptionsToPackage{sfdefault}{FiraSans}
\RequirePackage{FiraSans}

\RequirePackage{FiraMono}

% Farbdefinitionen
\definecolor{backgroundcoloreq}{RGB}{180,140,0}
\definecolor{codegreen}{rgb}{0,0.6,0}
\definecolor{codegray}{rgb}{0.5,0.5,0.5}
\definecolor{codepurple}{rgb}{0.58,0,0.82}
\definecolor{codeorange}{RGB}{190,100,0}

% we wanna use default caleographic alphabet
\DeclareMathAlphabet{\mathcal}{OMS}{cmbrs}{m}{n}


%----------------------------------------------------------------------------------------
%	 TITLE SLIDE
%----------------------------------------------------------------------------------------

\title{De-Cluttering Scatterplots}
\subtitle{with Integral Images}
\author{Lukas Heiligenbrunner}
\date{\today}

%------------------------------------------------

\begin{document}

%------------------------------------------------

    \begin{frame}
        \maketitle
    \end{frame}


%----------------------------------------------------------------------------------------
%	 SECTION 1: INTRODUCTION
%----------------------------------------------------------------------------------------

    \section{Introduction}

    \begin{frame}{Goal of the Paper}
        \begin{itemize}
            \item Scatterplots are fundamental for exploring multidimensional data
            \item But: with large datasets they suffer from \textbf{overplotting}
            \item Dense regions obscure structure, samples become inaccessible
            \item Goal:
            \begin{itemize}
                \item Reduce clutter
                \item Preserve neighborhood relations
                \item Achieve uniform sample distribution
                \item Maintain interpretability
            \end{itemize}
        \end{itemize}
    \end{frame}

%----------------------------------------------------------------------------------------
%	SECTION 2: PROBLEM
%----------------------------------------------------------------------------------------

    \section{Problem: Overplotting}

    \begin{frame}{Why Scatterplots Clutter}
        \begin{itemize}
            \item Modern datasets: millions of samples
            \item Pixel resolution fixed → many samples map to the same pixel
            \item Consequences:
            \begin{itemize}
                \item Occlusion of clusters + outliers
                \item Loss of density information
                \item Hard to select individual items
                \item Misleading visual perception
            \end{itemize}
            \item A method is needed to \textbf{declutter} without losing structure
        \end{itemize}
    \end{frame}

    \begin{frame}{Limitations of Traditional Approaches}
        \begin{itemize}
            \item Transparency-based methods
            \begin{itemize}
                \item Improve density perception
                \item But still lose individual sample visibility
            \end{itemize}
            \item Down-sampling
            \begin{itemize}
                \item Removes data → not acceptable for analysis
            \end{itemize}
            \item Local spatial distortions
            \begin{itemize}
                \item Risk of collisions
                \item Often non-monotonic mappings
            \end{itemize}
            \item Need a \textbf{global}, \textbf{smooth}, \textbf{monotonic}, \textbf{collision-free} method
        \end{itemize}
    \end{frame}

    %----------------------------------------------------------------------------------------
%	SECTION 3: BACKGROUND
%----------------------------------------------------------------------------------------

    \section{Background: Density Fields \& Integral Images}

    \begin{frame}{Density Estimation}
        \begin{itemize}
            \item Given samples $z_i = (x_i, y_i)$
            \item Build smoothed density:
            \[
                d_r(x,y) = \sum_{p=1}^n \varphi_r(x-x_p, y-y_p)
            \]
            \item Typically Gaussian kernel
            \item Add global constant $d_0$ for stability:
            \[
                d(i,j) = d_r(i,j) + d_0
            \]
            \item Ensures no empty regions → avoids singular mappings
        \end{itemize}
    \end{frame}

    \begin{frame}{Integral Images (InIms)}
        \begin{itemize}
            \item Integral images compute cumulative sums over regions
            \item Four standard tables:
            \[
                \alpha,\beta,\gamma,\delta
            \]
            \item Four tilted (45°) tables:
            \[
                \alpha_t, \beta_t, \gamma_t, \delta_t
            \]
            \item Each encodes global density distribution
            \item Key advantage:
            \begin{itemize}
                \item Displacements depend on \textbf{global density}, not local neighborhood
                \item Avoids collisions
            \end{itemize}
        \end{itemize}
    \end{frame}

    %----------------------------------------------------------------------------------------
%	SECTION 4: METHOD
%----------------------------------------------------------------------------------------

    \section{Density-Equalizing Mapping}

    \begin{frame}{Original Mapping (Molchanov \& Linsen)}
        \begin{itemize}
            \item Prior work defined mapping:
            \[
                t(x,y; d) = \frac{
                    \alpha q_1 + \beta q_2 + \gamma q_3 + \delta q_4
                    + \alpha_t (x,1) + \beta_t (1,y) + \gamma_t (x,0) + \delta_t (0,y)
                }{2C}
            \]
            \item But:
            \begin{itemize}
                \item Not identity for uniform density
                \item Iteration unstable
                \item Does not converge to equalized distribution
            \end{itemize}
        \end{itemize}
    \end{frame}

    \begin{frame}{Corrected Mapping (This Paper)}
        \begin{itemize}
            \item Compute deformation for true density $d$
            \item Compute deformation for constant density $d_0$
            \item Subtract:
            \[
                t(x,y) = (x,y) + t(x,y; d) - t(x,y; d_0)
            \]
            \item This ensures:
            \begin{itemize}
                \item Identity for uniform density
                \item Smooth monotonic deformation
                \item Progressive convergence to equalization
                \item No overlap of regions
            \end{itemize}
        \end{itemize}
    \end{frame}

    \begin{frame}{Iterative Algorithm Overview}
        \begin{enumerate}
            \item Rasterize and smooth density
            \item Compute integral images
            \item Compute corrected deformation $t(x,y)$
            \item Apply bi-linear interpolation to sample positions
            \item Iterate until:
            \begin{itemize}
                \item Time budget reached
                \item Uniformity threshold reached
            \end{itemize}
        \end{enumerate}
    \end{frame}
%----------------------------------------------------------------------------------------
%	SECTION 5: IMPLEMENTATION
%----------------------------------------------------------------------------------------

    \section{GPU Implementation}

    \begin{frame}{Efficient GPU Computation}
        \begin{itemize}
            \item All major steps implemented on GPU:
            \begin{itemize}
                \item Density accumulation
                \item Gaussian smoothing
                \item Integral image computation
            \end{itemize}
            \item Fast multi-pass reduction for InIms
            \item Complexity:
            \[
                O(n + m)
            \]
            where $m = 2^k \times 2^k$ is texture resolution
            \item Achieves interactive rates for millions of samples
        \end{itemize}
    \end{frame}

%----------------------------------------------------------------------------------------
%	SECTION 6: VISUAL ENCODING
%----------------------------------------------------------------------------------------

    \section{Visual Encoding of Deformation}

    \begin{frame}{Problem After Deformation}
        \begin{itemize}
            \item After equalization:
            \begin{itemize}
                \item Local densities lost
                \item Cluster shapes distorted
                \item Distances no longer meaningful
            \end{itemize}
            \item Need additional encodings to preserve structure
        \end{itemize}
    \end{frame}

    \begin{frame}{Three Proposed Encodings}
        \begin{itemize}
            \item \textbf{Deformed grid lines}
            \begin{itemize}
                \item Show local expansion / contraction
            \end{itemize}
            \item \textbf{Background density texture}
            \begin{itemize}
                \item Shows cluster cores after deformation
            \end{itemize}
            \item \textbf{Contour lines}
            \begin{itemize}
                \item Reveal subcluster structure
            \end{itemize}
        \end{itemize}
    \end{frame}

%----------------------------------------------------------------------------------------
%	SECTION 7: RESULTS
%----------------------------------------------------------------------------------------

    \section{Results}

    \begin{frame}{Performance}
        \begin{itemize}
            \item Runs at interactive frame rates:
            \begin{itemize}
                \item e.g. 4M samples in $\approx 28$ ms per iteration
            \end{itemize}
            \item Standard deviation of samples/bin decreases monotonically
            \item Overplotting fraction also decreases monotonically
        \end{itemize}
        \centering
        \includegraphics[scale=0.4]{rsc/results}
    \end{frame}

    \begin{frame}{User Study}
        \begin{itemize}
            \item 25 participants, 3 tasks:
            \begin{enumerate}
                \item Estimate cluster size
                \item Sort clusters by size
                \item Select clusters (lasso)
            \end{enumerate}
            \item Findings:
            \begin{itemize}
                \item Size estimation (T1): regularized significantly better
                \item Sorting (T2): regularized significantly better
                \item Cluster selection (T3):
                \begin{itemize}
                    \item Grid encoding: worst
                    \item Background texture: better
                    \item Original scatterplot: best
                \end{itemize}
            \end{itemize}
        \end{itemize}
    \end{frame}


    % ---  THE END

    \begin{frame}[focus]
        Thanks for your Attention!
    \end{frame}

%----------------------------------------------------------------------------------------
%	 CLOSING/SUPPLEMENTARY SLIDES
%----------------------------------------------------------------------------------------

    \appendix

    \begin{frame}{Sources}
        \nocite{*} % Display all references regardless of if they were cited
        \bibliography{sources}
        \bibliographystyle{plain}
    \end{frame}

\end{document}