forked from lukas/Seminar_in_AI
339 lines
11 KiB
TeX
339 lines
11 KiB
TeX
\documentclass[usenames,dvipsnames]{beamer}
|
|
%----------------------------------------------------------------------------------------
|
|
% Struktur und Pointer Referat
|
|
% 20.04.2020
|
|
%----------------------------------------------------------------------------------------
|
|
\usetheme[nofirafonts]{focus}
|
|
|
|
|
|
\usepackage[utf8]{inputenc}
|
|
|
|
\usepackage{booktabs}
|
|
\usepackage{amsmath}
|
|
\usepackage{amssymb}
|
|
\usepackage{amsfonts}
|
|
\usepackage{bbm}
|
|
\usepackage{hyperref}
|
|
\usepackage{graphicx}
|
|
\usepackage{xcolor}
|
|
\usepackage{mathtools}
|
|
|
|
\RequirePackage[T1]{fontenc}
|
|
|
|
\PassOptionsToPackage{sfdefault}{FiraSans}
|
|
\RequirePackage{FiraSans}
|
|
|
|
\RequirePackage{FiraMono}
|
|
|
|
% Farbdefinitionen
|
|
\definecolor{backgroundcoloreq}{RGB}{180,140,0}
|
|
\definecolor{codegreen}{rgb}{0,0.6,0}
|
|
\definecolor{codegray}{rgb}{0.5,0.5,0.5}
|
|
\definecolor{codepurple}{rgb}{0.58,0,0.82}
|
|
\definecolor{codeorange}{RGB}{190,100,0}
|
|
|
|
% we wanna use default caleographic alphabet
|
|
\DeclareMathAlphabet{\mathcal}{OMS}{cmbrs}{m}{n}
|
|
|
|
|
|
%----------------------------------------------------------------------------------------
|
|
% TITLE SLIDE
|
|
%----------------------------------------------------------------------------------------
|
|
|
|
\title{De-Cluttering Scatterplots}
|
|
\subtitle{with Integral Images}
|
|
\author{Lukas Heiligenbrunner}
|
|
\date{\today}
|
|
|
|
%------------------------------------------------
|
|
|
|
\begin{document}
|
|
|
|
%------------------------------------------------
|
|
|
|
\begin{frame}
|
|
\maketitle
|
|
\end{frame}
|
|
|
|
|
|
|
|
%----------------------------------------------------------------------------------------
|
|
% SECTION 1: INTRODUCTION
|
|
%----------------------------------------------------------------------------------------
|
|
|
|
\section{Introduction}
|
|
|
|
\begin{frame}{Goal of the Paper}
|
|
\begin{itemize}
|
|
\item Scatterplots are fundamental for exploring multidimensional data
|
|
\item But: with large datasets they suffer from \textbf{overplotting}
|
|
\item Dense regions obscure structure, samples become inaccessible
|
|
\item Goal:
|
|
\begin{itemize}
|
|
\item Reduce clutter
|
|
\item Preserve neighborhood relations
|
|
\item Achieve uniform sample distribution
|
|
\item Maintain interpretability
|
|
\end{itemize}
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
%----------------------------------------------------------------------------------------
|
|
% SECTION 2: PROBLEM
|
|
%----------------------------------------------------------------------------------------
|
|
|
|
\section{Problem: Overplotting}
|
|
|
|
\begin{frame}{Why Scatterplots Clutter}
|
|
\begin{itemize}
|
|
\item Modern datasets: millions of samples
|
|
\item Pixel resolution fixed → many samples map to the same pixel
|
|
\item Consequences:
|
|
\begin{itemize}
|
|
\item Occlusion of clusters + outliers
|
|
\item Loss of density information
|
|
\item Hard to select individual items
|
|
\item Misleading visual perception
|
|
\end{itemize}
|
|
\item A method is needed to \textbf{declutter} without losing structure
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
\begin{frame}{Limitations of Traditional Approaches}
|
|
\begin{itemize}
|
|
\item Transparency-based methods
|
|
\begin{itemize}
|
|
\item Improve density perception
|
|
\item But still lose individual sample visibility
|
|
\end{itemize}
|
|
\item Down-sampling
|
|
\begin{itemize}
|
|
\item Removes data → not acceptable for analysis
|
|
\end{itemize}
|
|
\item Local spatial distortions
|
|
\begin{itemize}
|
|
\item Risk of collisions
|
|
\item Often non-monotonic mappings
|
|
\end{itemize}
|
|
\item Need a \textbf{global}, \textbf{smooth}, \textbf{monotonic}, \textbf{collision-free} method
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
%----------------------------------------------------------------------------------------
|
|
% SECTION 3: BACKGROUND
|
|
%----------------------------------------------------------------------------------------
|
|
|
|
\section{Background: Density Fields \& Integral Images}
|
|
|
|
\begin{frame}{Density Estimation}
|
|
\begin{itemize}
|
|
\item Given samples $z_i = (x_i, y_i)$
|
|
\item Build smoothed density:
|
|
\[
|
|
d_r(x,y) = \sum_{p=1}^n \varphi_r(x-x_p, y-y_p)
|
|
\]
|
|
\item Typically Gaussian kernel
|
|
\item Add global constant $d_0$ for stability:
|
|
\[
|
|
d(i,j) = d_r(i,j) + d_0
|
|
\]
|
|
\item Ensures no empty regions → avoids singular mappings
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
\begin{frame}{Integral Images (InIms)}
|
|
\begin{itemize}
|
|
\item Integral images compute cumulative sums over regions
|
|
\item Four standard tables:
|
|
\[
|
|
\alpha,\beta,\gamma,\delta
|
|
\]
|
|
\item Four tilted (45°) tables:
|
|
\[
|
|
\alpha_t, \beta_t, \gamma_t, \delta_t
|
|
\]
|
|
\item Each encodes global density distribution
|
|
\item Key advantage:
|
|
\begin{itemize}
|
|
\item Displacements depend on \textbf{global density}, not local neighborhood
|
|
\item Avoids collisions
|
|
\end{itemize}
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
%----------------------------------------------------------------------------------------
|
|
% SECTION 4: METHOD
|
|
%----------------------------------------------------------------------------------------
|
|
|
|
\section{Density-Equalizing Mapping}
|
|
|
|
\begin{frame}{Original Mapping (Molchanov \& Linsen)}
|
|
\begin{itemize}
|
|
\item Prior work defined mapping:
|
|
\[
|
|
t(x,y; d) = \frac{
|
|
\alpha q_1 + \beta q_2 + \gamma q_3 + \delta q_4
|
|
+ \alpha_t (x,1) + \beta_t (1,y) + \gamma_t (x,0) + \delta_t (0,y)
|
|
}{2C}
|
|
\]
|
|
\item But:
|
|
\begin{itemize}
|
|
\item Not identity for uniform density
|
|
\item Iteration unstable
|
|
\item Does not converge to equalized distribution
|
|
\end{itemize}
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
\begin{frame}{Corrected Mapping (This Paper)}
|
|
\begin{itemize}
|
|
\item Compute deformation for true density $d$
|
|
\item Compute deformation for constant density $d_0$
|
|
\item Subtract:
|
|
\[
|
|
t(x,y) = (x,y) + t(x,y; d) - t(x,y; d_0)
|
|
\]
|
|
\item This ensures:
|
|
\begin{itemize}
|
|
\item Identity for uniform density
|
|
\item Smooth monotonic deformation
|
|
\item Progressive convergence to equalization
|
|
\item No overlap of regions
|
|
\end{itemize}
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
\begin{frame}{Iterative Algorithm Overview}
|
|
\begin{enumerate}
|
|
\item Rasterize and smooth density
|
|
\item Compute integral images
|
|
\item Compute corrected deformation $t(x,y)$
|
|
\item Apply bi-linear interpolation to sample positions
|
|
\item Iterate until:
|
|
\begin{itemize}
|
|
\item Time budget reached
|
|
\item Uniformity threshold reached
|
|
\end{itemize}
|
|
\end{enumerate}
|
|
\end{frame}
|
|
%----------------------------------------------------------------------------------------
|
|
% SECTION 5: IMPLEMENTATION
|
|
%----------------------------------------------------------------------------------------
|
|
|
|
\section{GPU Implementation}
|
|
|
|
\begin{frame}{Efficient GPU Computation}
|
|
\begin{itemize}
|
|
\item All major steps implemented on GPU:
|
|
\begin{itemize}
|
|
\item Density accumulation
|
|
\item Gaussian smoothing
|
|
\item Integral image computation
|
|
\end{itemize}
|
|
\item Fast multi-pass reduction for InIms
|
|
\item Complexity:
|
|
\[
|
|
O(n + m)
|
|
\]
|
|
where $m = 2^k \times 2^k$ is texture resolution
|
|
\item Achieves interactive rates for millions of samples
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
%----------------------------------------------------------------------------------------
|
|
% SECTION 6: VISUAL ENCODING
|
|
%----------------------------------------------------------------------------------------
|
|
|
|
\section{Visual Encoding of Deformation}
|
|
|
|
\begin{frame}{Problem After Deformation}
|
|
\begin{itemize}
|
|
\item After equalization:
|
|
\begin{itemize}
|
|
\item Local densities lost
|
|
\item Cluster shapes distorted
|
|
\item Distances no longer meaningful
|
|
\end{itemize}
|
|
\item Need additional encodings to preserve structure
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
\begin{frame}{Three Proposed Encodings}
|
|
\begin{itemize}
|
|
\item \textbf{Deformed grid lines}
|
|
\begin{itemize}
|
|
\item Show local expansion / contraction
|
|
\end{itemize}
|
|
\item \textbf{Background density texture}
|
|
\begin{itemize}
|
|
\item Shows cluster cores after deformation
|
|
\end{itemize}
|
|
\item \textbf{Contour lines}
|
|
\begin{itemize}
|
|
\item Reveal subcluster structure
|
|
\end{itemize}
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
%----------------------------------------------------------------------------------------
|
|
% SECTION 7: RESULTS
|
|
%----------------------------------------------------------------------------------------
|
|
|
|
\section{Results}
|
|
|
|
\begin{frame}{Performance}
|
|
\begin{itemize}
|
|
\item Runs at interactive frame rates:
|
|
\begin{itemize}
|
|
\item e.g. 4M samples in $\approx 28$ ms per iteration
|
|
\end{itemize}
|
|
\item Standard deviation of samples/bin decreases monotonically
|
|
\item Overplotting fraction also decreases monotonically
|
|
\end{itemize}
|
|
\centering
|
|
\includegraphics[scale=0.4]{rsc/results}
|
|
\end{frame}
|
|
|
|
\begin{frame}{User Study}
|
|
\begin{itemize}
|
|
\item 25 participants, 3 tasks:
|
|
\begin{enumerate}
|
|
\item Estimate cluster size
|
|
\item Sort clusters by size
|
|
\item Select clusters (lasso)
|
|
\end{enumerate}
|
|
\item Findings:
|
|
\begin{itemize}
|
|
\item Size estimation (T1): regularized significantly better
|
|
\item Sorting (T2): regularized significantly better
|
|
\item Cluster selection (T3):
|
|
\begin{itemize}
|
|
\item Grid encoding: worst
|
|
\item Background texture: better
|
|
\item Original scatterplot: best
|
|
\end{itemize}
|
|
\end{itemize}
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
|
|
% --- THE END
|
|
|
|
\begin{frame}[focus]
|
|
Thanks for your Attention!
|
|
\end{frame}
|
|
|
|
%----------------------------------------------------------------------------------------
|
|
% CLOSING/SUPPLEMENTARY SLIDES
|
|
%----------------------------------------------------------------------------------------
|
|
|
|
\appendix
|
|
|
|
\begin{frame}{Sources}
|
|
\nocite{*} % Display all references regardless of if they were cited
|
|
\bibliography{sources}
|
|
\bibliographystyle{plain}
|
|
\end{frame}
|
|
|
|
\end{document}
|