\documentclass{beamer}

\mode<presentation>
{
 \usetheme{Boadilla}
 \setbeamercovered{transparent}}

\usepackage[english]{babel}
\usepackage{times}

\usepackage{xcolor}
\usepackage{colortbl}
%\usepackage{subfigure}

%% for tables
\newcommand{\mc}{\multicolumn}
\newcommand{\lab}[1]{\multicolumn{1}{c}{#1}}
\newcommand{\ind}[1]{{\fboxsep1pt\raisebox{-.5ex}{\fbox{{\tiny #1}}}}}
\newcommand{\IND}[1]{{\fboxsep1pt\raisebox{0ex}{\fbox{{\small #1}}}}}
\newcommand\production[2]{\ensuremath{\langle\mbox{#1}, \mbox{#2}\rangle}}

%% markup
\newcommand{\buffer}[1]{{\color{blue}\textbf{#1}}}
\newcommand{\pred}[1]{\code{#1}}

%% colors
\newcommand{\textred}[1]{\alert{#1}}
\newcommand{\textblue}[1]{\buffer{#1}}
\definecolor{tablecolor}{cmyk}{0,0.3,0.3,0}
\newcommand{\keytab}[1]{\mc{1}{>{\columncolor{tablecolor}}d}{#1}}

% rules
\newcommand{\psr}[2]{#1 $\rightarrow \langle $ #2 $\rangle$}

\newenvironment{unpacked_itemize}{
\begin{itemize}
  \setlength{\itemsep}{10pt}
  \setlength{\parskip}{0pt}
  \setlength{\parsep}{0pt}
}{\end{itemize}}

\newcommand{\condon}{\hspace{0pt} | \hspace{1pt}}
\definecolor{darkblue}{rgb}{0,0,0.6}
\newcommand{\blueexample}[1]{\textcolor{darkblue}{\rm #1}}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\newcommand{\ws}{\ensuremath{\vec{w}}}
\newcommand{\pu}{\ensuremath{P_0}}
\newcommand{\bx}{\mathbf{x}}
\newcommand{\bz}{\mathbf{z}}
\newcommand{\bd}{\mathbf{d}}
\newcommand{\by}{\mathbf{y}}
\newcommand\bleu{${B{\scriptstyle LEU}}$}


\title{Clustering of phrases and contexts}
\author{Trevor Cohn}
\date{\today}

\begin{document}

\begin{frame}[t]{Motivation}
%\vspace{1.0cm}
\begin{exampleblock}{Distributional Hypothesis}
\begin{quote}
\emph{Words that occur in the same contexts tend to have similar meanings}
\end{quote}
\hfill (Zellig Harris, 1954)
\end{exampleblock}

\vspace{3ex}

We will leverage this in a translation setting:
\begin{itemize}
    \item Use the contexts to \alert{cluster} translation units into groups
    \item Units in the same group expected to be semantically and syntactically similar
    \item Then use these cluster labels to guide translation
    \begin{itemize}
        \item lexical selection: translating ambiguous source word/s
        \item reordering: consistent syntactic patterns of reordering
    \end{itemize}
\end{itemize}
\end{frame}

\begin{frame}[t]{Monolingual Example}
Task: cluster words into their parts-of-speech. \\

\vspace{1ex}
Illustrate by starting with the word `deal' (noun or verb):

\only<1>{\includegraphics[width=\columnwidth]{deal_first.pdf} \\ Step 1: Find contexts for `deal'}
\only<2->{\includegraphics[width=\columnwidth]{deal.pdf} \\ Step 2: Find other words which occur in these contexts}
%\only<3>{\includegraphics[width=\columnwidth]{deal_more.pdf} \\ \ldots continue to expand}

\only<3>{
\vspace{1ex}
Notice that the instances of deal can be split into two connected sub-graphs:
\begin{itemize}
    \item noun: the left two contexts ``a \ldots with'' and ``a \ldots that''
    \item verb: the right two contexts ``to \ldots with'' and ``not \ldots with''
    \item neighbouring words of these contexts share the same PoS
\end{itemize}
}

\end{frame}

\begin{frame}[t]{More Formally}

Construct a bipartite graph
\begin{itemize}
    \item Nodes on the top layer denote word types (bilingual phrase pairs)
    \item Nodes on the bottom layer denote context types (monlingual/bilingual words)
    \item Edges connect words and their contexts
\end{itemize}

\includegraphics[width=\columnwidth]{bipartite.pdf}

\end{frame}

\begin{frame}[t]{Clustering}

Task is to cluster the graph into sub-graphs. Nodes in the sub-graphs should be
\begin{itemize}
\item strongly connected to one another
\item weakly connected to nodes outside the sub-graph
\item could formulate as either \emph{hard} or \emph{soft} clustering
\end{itemize}
Choose \alert{soft clustering} to allow for syntactic and semantic ambiguity

\centering
\includegraphics[width=0.7\columnwidth]{bipartite_lda.pdf}

\end{frame}

\begin{frame}[t]{Latent Dirichlet Allocation (LDA)}

LDA is a generative model which treats documents as bags of words
\begin{itemize}
    \item each word is assign a \alert{topic} (cluster tag)
    \item words are generated from a topic-specific multinomial
    \item topics are \alert{tied} across a document using a Dirichlet prior
    \item $\alpha < 1$ biases towards \alert{sparse} distributions, i.e., topic reuse
    \item inferred $\theta_d$ describes a document and $\phi_t$ describes a topic
\end{itemize}

\vspace{-3ex}
\includegraphics[scale=0.55]{lda.pdf}

\end{frame}

\begin{frame}[t]{LDA over Contexts}

Generative story:
\begin{itemize}
    \item for each word type $w$
    \item for each of the $L$ contexts
    \item first we draw a topic $t$, then generate the context $\vec{c}$ given the topic
    \item the Dirichlet prior ties the topics for each $w$
    \item we're primarily interested in the learnt $\theta$ values
\end{itemize}

\includegraphics[scale=0.4]{context_lda.pdf}

\end{frame}

\end{document}