Files
Seminar_in_AI_Master/presentation/main.tex
2025-11-23 00:40:19 +01:00

403 lines
13 KiB
TeX

\documentclass[usenames,dvipsnames]{beamer}
%----------------------------------------------------------------------------------------
% Struktur und Pointer Referat
% 20.04.2020
%----------------------------------------------------------------------------------------
\usetheme[nofirafonts]{focus}
\usepackage[utf8]{inputenc}
\usepackage{booktabs}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{amsfonts}
\usepackage{bbm}
\usepackage{hyperref}
\usepackage{graphicx}
\usepackage{xcolor}
\usepackage{mathtools}
\RequirePackage[T1]{fontenc}
\PassOptionsToPackage{sfdefault}{FiraSans}
\RequirePackage{FiraSans}
\RequirePackage{FiraMono}
% Farbdefinitionen
\definecolor{backgroundcoloreq}{RGB}{180,140,0}
\definecolor{codegreen}{rgb}{0,0.6,0}
\definecolor{codegray}{rgb}{0.5,0.5,0.5}
\definecolor{codepurple}{rgb}{0.58,0,0.82}
\definecolor{codeorange}{RGB}{190,100,0}
% we wanna use default caleographic alphabet
\DeclareMathAlphabet{\mathcal}{OMS}{cmbrs}{m}{n}
%----------------------------------------------------------------------------------------
% TITLE SLIDE
%----------------------------------------------------------------------------------------
\title{De-Cluttering Scatterplots}
\subtitle{with Integral Images}
\author{Lukas Heiligenbrunner}
\date{\today}
%------------------------------------------------
\begin{document}
%------------------------------------------------
\begin{frame}
\maketitle
\end{frame}
%----------------------------------------------------------------------------------------
% SECTION 1: INTRODUCTION
%----------------------------------------------------------------------------------------
\section{Introduction}
\begin{frame}{Goal of the Paper}
\begin{itemize}
\item Scatterplots are fundamental for exploring multidimensional data
\item But: with large datasets they suffer from \textbf{overplotting}
\item Dense regions obscure structure, samples become inaccessible
\item Goal:
\begin{itemize}
\item Reduce clutter
\item Preserve neighborhood relations
\item Achieve uniform sample distribution
\item Maintain interpretability
\end{itemize}
\end{itemize}
\end{frame}
%----------------------------------------------------------------------------------------
% SECTION 2: PROBLEM
%----------------------------------------------------------------------------------------
\section{Problem: Overplotting}
\begin{frame}{Why Scatterplots Clutter}
\begin{itemize}
\item Modern datasets: millions of samples
\item Pixel resolution fixed → many samples map to the same pixel
\item Consequences:
\begin{itemize}
\item Occlusion of clusters + outliers
\item Loss of density information
\item Hard to select individual items
\item Misleading visual perception
\end{itemize}
\item A method is needed to \textbf{declutter} without losing structure
\end{itemize}
\end{frame}
\begin{frame}
\centering
\includegraphics[scale=0.8]{rsc/overplotting}
\footnotesize\text{Source: https://statisticsglobe.com/avoid-overplotting-r}
\end{frame}
\begin{frame}{Limitations of Traditional Approaches}
\begin{itemize}
\item Transparency-based methods
\begin{itemize}
\item Improve density perception
\item But still lose individual sample visibility
\end{itemize}
\item Down-sampling
\begin{itemize}
\item Removes data → not acceptable for analysis
\end{itemize}
\item Local spatial distortions
\begin{itemize}
\item Risk of collisions
\item Often non-monotonic mappings
\end{itemize}
\item Need a \textbf{global}, \textbf{smooth}, \textbf{monotonic}, \textbf{collision-free} method
\end{itemize}
\end{frame}
%----------------------------------------------------------------------------------------
% SECTION 3: BACKGROUND
%----------------------------------------------------------------------------------------
\section{Background: Density Fields \& Integral Images}
\begin{frame}{Density Estimation}
\begin{itemize}
\item Given samples $z_i = (x_i, y_i)$
\item Build smoothed density:
\[
d_r(x,y) = \sum_{p=1}^n \varphi_r(x-x_p, y-y_p)
\]
\item Typically Gaussian kernel
\item Add global constant $d_0$ for stability:
\[
d(i,j) = d_r(i,j) + d_0
\]
\item Ensures no empty regions → avoids singular mappings
\end{itemize}
\end{frame}
\begin{frame}{Integral Images (InIms)}
\begin{itemize}
\item Integral images compute cumulative sums over regions
\item Four standard tables:
\[
\alpha,\beta,\gamma,\delta
\]
\item Four tilted (45°) tables:
\[
\alpha_t, \beta_t, \gamma_t, \delta_t
\]
\item Each encodes global density distribution
\item Key advantage:
\begin{itemize}
\item Displacements depend on \textbf{global density}, not local neighborhood
\item Avoids collisions
\end{itemize}
\end{itemize}
\end{frame}
%----------------------------------------------------------------------------------------
% SECTION 4: METHOD
%----------------------------------------------------------------------------------------
\section{Density-Equalizing Mapping}
\begin{frame}{Original Mapping (Molchanov \& Linsen)}
\begin{itemize}
\item Prior work defined mapping:
\[
t(x,y; d) = \frac{
\alpha q_1 + \beta q_2 + \gamma q_3 + \delta q_4
+ \alpha_t (x,1) + \beta_t (1,y) + \gamma_t (x,0) + \delta_t (0,y)
}{2C}
\]
\item But:
\begin{itemize}
\item Not identity for uniform density
\item Iteration unstable
\item Does not converge to equalized distribution
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}{Corrected Mapping (This Paper)}
\begin{itemize}
\item Compute deformation for true density $d$
\item Compute deformation for constant density $d_0$
\item Subtract:
\[
t(x,y) = (x,y) + t(x,y; d) - t(x,y; d_0)
\]
\item This ensures:
\begin{itemize}
\item Identity for uniform density
\item Smooth monotonic deformation
\item Progressive convergence to equalization
\item No overlap of regions
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}{Iterative Algorithm Overview}
\begin{enumerate}
\item Rasterize and smooth density
\item Compute integral images
\item Compute corrected deformation $t(x,y)$
\item Apply bi-linear interpolation to sample positions
\item Iterate until:
\begin{itemize}
\item Time budget reached
\item Uniformity threshold reached
\end{itemize}
\end{enumerate}
\end{frame}
\begin{frame}
\centering
\begin{figure}
\centering
\begin{minipage}{0.4\textwidth}
\centering
\includegraphics[width=\textwidth]{rsc/2408.06513v1_page_7_1}
\vspace{4pt}
\footnotesize MNIST Dataset (UMAP)
\end{minipage}
\begin{minipage}{0.15\textwidth}
\centering
$\Longrightarrow$
\end{minipage}
\begin{minipage}{0.4\textwidth}
\centering
\includegraphics[width=\textwidth]{rsc/2408.06513v1_page_7_2}
\vspace{4pt}
\footnotesize Visual encoding of the density-equalizing transform
\end{minipage}
\label{fig:figure}
\end{figure}
\end{frame}
%----------------------------------------------------------------------------------------
% SECTION 6: VISUAL ENCODING
%----------------------------------------------------------------------------------------
\section{Visual Encoding of Deformation}
\begin{frame}{Problem After Deformation}
\begin{itemize}
\item After equalization:
\begin{itemize}
\item Local densities lost
\item Cluster shapes distorted
\item Distances no longer meaningful
\end{itemize}
\item Need additional encodings to preserve structure
\end{itemize}
\end{frame}
\begin{frame}{Three Proposed Encodings}
\begin{itemize}
\item \textbf{Deformed grid lines}
\begin{itemize}
\item Show local expansion / contraction
\end{itemize}
\item \textbf{Background density texture}
\begin{itemize}
\item Shows cluster cores after deformation
\end{itemize}
\item \textbf{Contour lines}
\begin{itemize}
\item Reveal subcluster structure
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}{Three Proposed Encodings II}
\centering
\begin{figure}
\centering
\begin{minipage}{0.3\textwidth}
\centering
\includegraphics[width=\textwidth]{rsc/2408.06513v1_page_7_2}
\vspace{4pt}
\footnotesize Deformed grid lines
\end{minipage}
\begin{minipage}{0.3\textwidth}
\centering
\includegraphics[width=\textwidth]{rsc/2408.06513v1_page_7_3}
\vspace{4pt}
\footnotesize Background density texture
\end{minipage}
\begin{minipage}{0.3\textwidth}
\centering
\includegraphics[width=\textwidth]{rsc/2408.06513v1_page_7_4}
\vspace{4pt}
\footnotesize Contour lines
\end{minipage}
\label{fig:figure2}
\end{figure}
\end{frame}
%----------------------------------------------------------------------------------------
% SECTION 5: IMPLEMENTATION
%----------------------------------------------------------------------------------------
\section{GPU Implementation}
\begin{frame}{Efficient GPU Computation}
\begin{itemize}
\item All major steps implemented on GPU:
\begin{itemize}
\item Density accumulation
\item Gaussian smoothing
\item Integral image computation
\end{itemize}
\item Fast multi-pass reduction for InIms
\item Complexity:
\[
O(n + m)
\]
where $m = 2^k \times 2^k$ is texture resolution
\item Achieves interactive rates for millions of samples
\end{itemize}
\end{frame}
%----------------------------------------------------------------------------------------
% SECTION 7: RESULTS
%----------------------------------------------------------------------------------------
\section{Results}
\begin{frame}{Performance}
\begin{itemize}
\item Runs at interactive frame rates:
\begin{itemize}
\item e.g. 4M samples in $\approx 28$ ms per iteration
\end{itemize}
\item Standard deviation of samples/bin decreases monotonically
\item Overplotting fraction also decreases monotonically
\end{itemize}
\centering
\includegraphics[scale=0.4]{rsc/results}
\end{frame}
% --- THE END
\begin{frame}[focus]
Thanks for your Attention!
\end{frame}
%----------------------------------------------------------------------------------------
% CLOSING/SUPPLEMENTARY SLIDES
%----------------------------------------------------------------------------------------
\appendix
\begin{frame}{Sources}
\nocite{*} % Display all references regardless of if they were cited
\bibliography{sources}
\bibliographystyle{plain}
\end{frame}
\section{Backup}\label{sec:backup}
\begin{frame}{User Study}
\begin{itemize}
\item 25 participants, 3 tasks:
\begin{enumerate}
\item Estimate cluster size
\item Sort clusters by size
\item Select clusters (lasso)
\end{enumerate}
\item Findings:
\begin{itemize}
\item Size estimation (T1): regularized significantly better
\item Sorting (T2): regularized significantly better
\item Cluster selection (T3):
\begin{itemize}
\item Grid encoding: worst
\item Background texture: better
\item Original scatterplot: best
\end{itemize}
\end{itemize}
\end{itemize}
\end{frame}
\end{document}