\documentclass[usenames,dvipsnames]{beamer} %---------------------------------------------------------------------------------------- % Struktur und Pointer Referat % 20.04.2020 %---------------------------------------------------------------------------------------- \usetheme[nofirafonts]{focus} \usepackage[utf8]{inputenc} \usepackage{booktabs} \usepackage{amsmath} \usepackage{amssymb} \usepackage{amsfonts} \usepackage{bbm} \usepackage{hyperref} \usepackage{graphicx} \usepackage{xcolor} \usepackage{mathtools} \RequirePackage[T1]{fontenc} \PassOptionsToPackage{sfdefault}{FiraSans} \RequirePackage{FiraSans} \RequirePackage{FiraMono} % Farbdefinitionen \definecolor{backgroundcoloreq}{RGB}{180,140,0} \definecolor{codegreen}{rgb}{0,0.6,0} \definecolor{codegray}{rgb}{0.5,0.5,0.5} \definecolor{codepurple}{rgb}{0.58,0,0.82} \definecolor{codeorange}{RGB}{190,100,0} % we wanna use default caleographic alphabet \DeclareMathAlphabet{\mathcal}{OMS}{cmbrs}{m}{n} %---------------------------------------------------------------------------------------- % TITLE SLIDE %---------------------------------------------------------------------------------------- \title{De-Cluttering Scatterplots} \subtitle{with Integral Images} \author{Lukas Heiligenbrunner} \date{\today} %------------------------------------------------ \begin{document} %------------------------------------------------ \begin{frame} \maketitle \end{frame} %---------------------------------------------------------------------------------------- % SECTION 1: INTRODUCTION %---------------------------------------------------------------------------------------- \section{Introduction} \begin{frame}{Goal of the Paper} \begin{itemize} \item Scatterplots are fundamental for exploring multidimensional data \item But: with large datasets they suffer from \textbf{overplotting} \item Dense regions obscure structure, samples become inaccessible \item Goal: \begin{itemize} \item Reduce clutter \item Preserve neighborhood relations \item Achieve uniform sample distribution \item Maintain interpretability \end{itemize} \end{itemize} \end{frame} %---------------------------------------------------------------------------------------- % SECTION 2: PROBLEM %---------------------------------------------------------------------------------------- \section{Problem: Overplotting} \begin{frame}{Why Scatterplots Clutter} \begin{itemize} \item Modern datasets: millions of samples \item Pixel resolution fixed → many samples map to the same pixel \item Consequences: \begin{itemize} \item Occlusion of clusters + outliers \item Loss of density information \item Hard to select individual items \item Misleading visual perception \end{itemize} \item A method is needed to \textbf{declutter} without losing structure \end{itemize} \end{frame} \begin{frame} \centering \includegraphics[scale=0.8]{rsc/overplotting} \footnotesize\text{Source: https://statisticsglobe.com/avoid-overplotting-r} \end{frame} \begin{frame}{Limitations of Traditional Approaches} \begin{itemize} \item Transparency-based methods \begin{itemize} \item Improve density perception \item But still lose individual sample visibility \end{itemize} \item Down-sampling \begin{itemize} \item Removes data → not acceptable for analysis \end{itemize} \item Local spatial distortions \begin{itemize} \item Risk of collisions \item Often non-monotonic mappings \end{itemize} \item Need a \textbf{global}, \textbf{smooth}, \textbf{monotonic}, \textbf{collision-free} method \end{itemize} \end{frame} %---------------------------------------------------------------------------------------- % SECTION 3: BACKGROUND %---------------------------------------------------------------------------------------- \section{Background: Density Fields \& Integral Images} \begin{frame}{Density Estimation} \begin{itemize} \item Given samples $z_i = (x_i, y_i)$ \item Build smoothed density: \[ d_r(x,y) = \sum_{p=1}^n \varphi_r(x-x_p, y-y_p) \] \item Typically Gaussian kernel \item Add global constant $d_0$ for stability: \[ d(i,j) = d_r(i,j) + d_0 \] \item Ensures no empty regions → avoids singular mappings \end{itemize} \end{frame} \begin{frame}{Integral Images (InIms)} \begin{itemize} \item Integral images compute cumulative sums over regions \item Four standard tables: \[ \alpha,\beta,\gamma,\delta \] \item Four tilted (45°) tables: \[ \alpha_t, \beta_t, \gamma_t, \delta_t \] \item Each encodes global density distribution \item Key advantage: \begin{itemize} \item Displacements depend on \textbf{global density}, not local neighborhood \item Avoids collisions \end{itemize} \end{itemize} \end{frame} %---------------------------------------------------------------------------------------- % SECTION 4: METHOD %---------------------------------------------------------------------------------------- \section{Density-Equalizing Mapping} \begin{frame}{Original Mapping (Molchanov \& Linsen)} \begin{itemize} \item Prior work defined mapping: \[ t(x,y; d) = \frac{ \alpha q_1 + \beta q_2 + \gamma q_3 + \delta q_4 + \alpha_t (x,1) + \beta_t (1,y) + \gamma_t (x,0) + \delta_t (0,y) }{2C} \] \item But: \begin{itemize} \item Not identity for uniform density \item Iteration unstable \item Does not converge to equalized distribution \end{itemize} \end{itemize} \end{frame} \begin{frame}{Corrected Mapping (This Paper)} \begin{itemize} \item Compute deformation for true density $d$ \item Compute deformation for constant density $d_0$ \item Subtract: \[ t(x,y) = (x,y) + t(x,y; d) - t(x,y; d_0) \] \item This ensures: \begin{itemize} \item Identity for uniform density \item Smooth monotonic deformation \item Progressive convergence to equalization \item No overlap of regions \end{itemize} \end{itemize} \end{frame} \begin{frame}{Iterative Algorithm Overview} \begin{enumerate} \item Rasterize and smooth density \item Compute integral images \item Compute corrected deformation $t(x,y)$ \item Apply bi-linear interpolation to sample positions \item Iterate until: \begin{itemize} \item Time budget reached \item Uniformity threshold reached \end{itemize} \end{enumerate} \end{frame} \begin{frame} \centering \begin{figure} \centering \begin{minipage}{0.4\textwidth} \centering \includegraphics[width=\textwidth]{rsc/2408.06513v1_page_7_1} \vspace{4pt} \footnotesize MNIST Dataset (UMAP) \end{minipage} \begin{minipage}{0.15\textwidth} \centering $\Longrightarrow$ \end{minipage} \begin{minipage}{0.4\textwidth} \centering \includegraphics[width=\textwidth]{rsc/2408.06513v1_page_7_2} \vspace{4pt} \footnotesize Visual encoding of the density-equalizing transform \end{minipage} \label{fig:figure} \end{figure} \end{frame} %---------------------------------------------------------------------------------------- % SECTION 6: VISUAL ENCODING %---------------------------------------------------------------------------------------- \section{Visual Encoding of Deformation} \begin{frame}{Problem After Deformation} \begin{itemize} \item After equalization: \begin{itemize} \item Local densities lost \item Cluster shapes distorted \item Distances no longer meaningful \end{itemize} \item Need additional encodings to preserve structure \end{itemize} \end{frame} \begin{frame}{Three Proposed Encodings} \begin{itemize} \item \textbf{Deformed grid lines} \begin{itemize} \item Show local expansion / contraction \end{itemize} \item \textbf{Background density texture} \begin{itemize} \item Shows cluster cores after deformation \end{itemize} \item \textbf{Contour lines} \begin{itemize} \item Reveal subcluster structure \end{itemize} \end{itemize} \end{frame} \begin{frame}{Three Proposed Encodings II} \centering \begin{figure} \centering \begin{minipage}{0.3\textwidth} \centering \includegraphics[width=\textwidth]{rsc/2408.06513v1_page_7_2} \vspace{4pt} \footnotesize Deformed grid lines \end{minipage} \begin{minipage}{0.3\textwidth} \centering \includegraphics[width=\textwidth]{rsc/2408.06513v1_page_7_3} \vspace{4pt} \footnotesize Background density texture \end{minipage} \begin{minipage}{0.3\textwidth} \centering \includegraphics[width=\textwidth]{rsc/2408.06513v1_page_7_4} \vspace{4pt} \footnotesize Contour lines \end{minipage} \label{fig:figure2} \end{figure} \end{frame} %---------------------------------------------------------------------------------------- % SECTION 5: IMPLEMENTATION %---------------------------------------------------------------------------------------- \section{GPU Implementation} \begin{frame}{Efficient GPU Computation} \begin{itemize} \item All major steps implemented on GPU: \begin{itemize} \item Density accumulation \item Gaussian smoothing \item Integral image computation \end{itemize} \item Fast multi-pass reduction for InIms \item Complexity: \[ O(n + m) \] where $m = 2^k \times 2^k$ is texture resolution \item Achieves interactive rates for millions of samples \end{itemize} \end{frame} %---------------------------------------------------------------------------------------- % SECTION 7: RESULTS %---------------------------------------------------------------------------------------- \section{Results} \begin{frame}{Performance} \begin{itemize} \item Runs at interactive frame rates: \begin{itemize} \item e.g. 4M samples in $\approx 28$ ms per iteration \end{itemize} \item Standard deviation of samples/bin decreases monotonically \item Overplotting fraction also decreases monotonically \end{itemize} \centering \includegraphics[scale=0.4]{rsc/results} \end{frame} % --- THE END \begin{frame}[focus] Thanks for your Attention! \end{frame} %---------------------------------------------------------------------------------------- % CLOSING/SUPPLEMENTARY SLIDES %---------------------------------------------------------------------------------------- \appendix \begin{frame}{Sources} \nocite{*} % Display all references regardless of if they were cited \bibliography{sources} \bibliographystyle{plain} \end{frame} \section{Backup}\label{sec:backup} \begin{frame}{User Study} \begin{itemize} \item 25 participants, 3 tasks: \begin{enumerate} \item Estimate cluster size \item Sort clusters by size \item Select clusters (lasso) \end{enumerate} \item Findings: \begin{itemize} \item Size estimation (T1): regularized significantly better \item Sorting (T2): regularized significantly better \item Cluster selection (T3): \begin{itemize} \item Grid encoding: worst \item Background texture: better \item Original scatterplot: best \end{itemize} \end{itemize} \end{itemize} \end{frame} \end{document}