diff --git a/presentation/main.tex b/presentation/main.tex index 2a664e7..d2ab206 100644 --- a/presentation/main.tex +++ b/presentation/main.tex @@ -64,6 +64,7 @@ %---------------------------------------------------------------------------------------- % todo pic of action + \section{The Goal} \begin{frame}{The goal} \begin{itemize} @@ -105,7 +106,7 @@ \begin{itemize} \item YouTube \end{itemize} - \item using semi-supervised learning might be benefitial + \item Using semi-supervised learning might be benefitial \end{itemize} \end{frame} @@ -143,11 +144,10 @@ \item Pseudo-labeling \item Train model on labeled data \begin{itemize} - \item Eg. 1\% of data labeled + \item Eg. 1\%/10\% of data labeled \end{itemize} - \item Confidence of prediction - \item If high enough - \item Use to predict unlabeled data + \item Confidence of prediction [Threshold] + \item Use pseudo-labels to predict unlabeled data \end{itemize} \framebreak @@ -174,11 +174,13 @@ \begin{itemize} \item lower capacity \item better captures temporal dynamics in recognizing actions + \item scene changes/motion over time \end{itemize} \item Large model \begin{itemize} \item better learns spatial semantics \item to distinguish different action instances + \item localize/identify objects in specific scene \end{itemize} \end{itemize} @@ -190,7 +192,7 @@ \item Supplemented by lightweight auxiliary network \begin{itemize} \item Different structure - \item Fewer channels + \item Fewer channels (smaller) \end{itemize} \item Different representation of data complements primary backbone \end{itemize} @@ -201,6 +203,10 @@ \end{frame} \begin{frame}{Performance Perspectives} + \begin{itemize} + \item 1\% labeled data + 400 Labels + \item Kinetics-400 dataset + \end{itemize} \includegraphics[scale=.205]{rsc/performance_comparison} \end{frame} @@ -219,24 +225,24 @@ \begin{frame}[allowframebreaks]{How existing method \textit{FixMatch} works} \begin{itemize} - \item $\mathbbm{1} \coloneqq \text{Indicator Function}$ \item $B_u \coloneqq \text{Batchsize}$ - \item $\mathcal{T} \coloneqq \text{Confidence Threshold}$ + \item $\tau \coloneqq \text{Confidence Threshold (Hyperparameter)}$ \item $F(\mathcal{T}_{\text{strong}}(u_i)) \coloneqq \text{Class distribution}$ \item $p_i \coloneqq F(\mathcal{T}_{\text{weak}}(u_i))$ \item $\hat{y}_i \coloneqq \arg \max(p_i) \coloneqq \text{Pseudo Label}$ \item $\mathcal{H} \coloneqq \text{Cross-entropy loss}$ \item $\mathcal{L}_u \coloneqq \text{Loss on the unlabeled data}$ - \item $\mathcal{F} \coloneqq \text{Model}$ + \item $F \coloneqq \text{Model}$ + \item $\mathbbm{1} \coloneqq \text{Indicator Function}$ \end{itemize} \begin{align*} - \mathcal{L}_u = \frac{1}{B_u} \sum_{i=1}^{B_u} \mathbbm{1}(\max(p_i) \geq \mathcal{T}) \mathcal{H}(\hat{y}_i,F(\mathcal{T}_{\text{strong}}(u_i))) + \mathcal{L}_u = \frac{1}{B_u} \sum_{i=1}^{B_u} \mathbbm{1}(\max(p_i) \geq \tau) \mathcal{H}(\hat{y}_i,F(\mathcal{T}_{\text{strong}}(u_i))) \end{align*} \framebreak \begin{itemize} - \item $\mathbbm{1}(\max(p_i) \geq \mathcal{T})$ + \item $\mathbbm{1}(\max(p_i) \geq \tau)$ \begin{itemize} \item 'confidence-based masking' \item retain label only if largest probability is above threshold @@ -268,8 +274,8 @@ \begin{itemize} \item Learning on unlabeled data \begin{align*} - \mathcal{L}_u^F &= \frac{1}{B_u} \sum_{i=1}^{B_u} \mathbbm{1}(\max(p_i^A) \geq \mathcal{T}) \mathcal{H}(\hat{y}_i^A,F(\mathcal{T}_{\text{strong}}(u_i)))\\ - \mathcal{L}_u^A &= \frac{1}{B_u} \sum_{i=1}^{B_u} \mathbbm{1}(\max(p_i^F) \geq \mathcal{T}) \mathcal{H}(\hat{y}_i^F,A(\mathcal{T}_{\text{strong}}(u_i)))\\ + \mathcal{L}_u^F &= \frac{1}{B_u} \sum_{i=1}^{B_u} \mathbbm{1}(\max(p_i^A) \geq \tau) \mathcal{H}(\hat{y}_i^A,F(\mathcal{T}_{\text{strong}}(u_i)))\\ + \mathcal{L}_u^A &= \frac{1}{B_u} \sum_{i=1}^{B_u} \mathbbm{1}(\max(p_i^F) \geq \tau) \mathcal{H}(\hat{y}_i^F,A(\mathcal{T}_{\text{strong}}(u_i)))\\ \end{align*} \item Complete training objective \begin{align*} @@ -279,6 +285,7 @@ \end{itemize} \end{frame} + \section{Implementation} \begin{frame}{Networks}