summaryrefslogtreecommitdiff
path: root/report/intro_slides/tcohn_slides.tex
diff options
context:
space:
mode:
Diffstat (limited to 'report/intro_slides/tcohn_slides.tex')
-rw-r--r--report/intro_slides/tcohn_slides.tex167
1 files changed, 167 insertions, 0 deletions
diff --git a/report/intro_slides/tcohn_slides.tex b/report/intro_slides/tcohn_slides.tex
new file mode 100644
index 00000000..950a0036
--- /dev/null
+++ b/report/intro_slides/tcohn_slides.tex
@@ -0,0 +1,167 @@
+\documentclass{beamer}
+
+\mode<presentation>
+{
+ \usetheme{Boadilla}
+ \setbeamercovered{transparent}}
+
+\usepackage[english]{babel}
+\usepackage{times}
+
+\usepackage{xcolor}
+\usepackage{colortbl}
+%\usepackage{subfigure}
+
+%% for tables
+\newcommand{\mc}{\multicolumn}
+\newcommand{\lab}[1]{\multicolumn{1}{c}{#1}}
+\newcommand{\ind}[1]{{\fboxsep1pt\raisebox{-.5ex}{\fbox{{\tiny #1}}}}}
+\newcommand{\IND}[1]{{\fboxsep1pt\raisebox{0ex}{\fbox{{\small #1}}}}}
+\newcommand\production[2]{\ensuremath{\langle\mbox{#1}, \mbox{#2}\rangle}}
+
+%% markup
+\newcommand{\buffer}[1]{{\color{blue}\textbf{#1}}}
+\newcommand{\pred}[1]{\code{#1}}
+
+%% colors
+\newcommand{\textred}[1]{\alert{#1}}
+\newcommand{\textblue}[1]{\buffer{#1}}
+\definecolor{tablecolor}{cmyk}{0,0.3,0.3,0}
+\newcommand{\keytab}[1]{\mc{1}{>{\columncolor{tablecolor}}d}{#1}}
+
+% rules
+\newcommand{\psr}[2]{#1 $\rightarrow \langle $ #2 $\rangle$}
+
+\newenvironment{unpacked_itemize}{
+\begin{itemize}
+ \setlength{\itemsep}{10pt}
+ \setlength{\parskip}{0pt}
+ \setlength{\parsep}{0pt}
+}{\end{itemize}}
+
+\newcommand{\condon}{\hspace{0pt} | \hspace{1pt}}
+\definecolor{darkblue}{rgb}{0,0,0.6}
+\newcommand{\blueexample}[1]{\textcolor{darkblue}{\rm #1}}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\newcommand{\ws}{\ensuremath{\vec{w}}}
+\newcommand{\pu}{\ensuremath{P_0}}
+\newcommand{\bx}{\mathbf{x}}
+\newcommand{\bz}{\mathbf{z}}
+\newcommand{\bd}{\mathbf{d}}
+\newcommand{\by}{\mathbf{y}}
+\newcommand\bleu{${B{\scriptstyle LEU}}$}
+
+
+\title{Clustering of phrases and contexts}
+\author{Trevor Cohn}
+\date{\today}
+
+\begin{document}
+
+\begin{frame}[t]{Motivation}
+%\vspace{1.0cm}
+\begin{exampleblock}{Distributional Hypothesis}
+\begin{quote}
+\emph{Words that occur in the same contexts tend to have similar meanings}
+\end{quote}
+\hfill (Zellig Harris, 1954)
+\end{exampleblock}
+
+\vspace{3ex}
+
+We will leverage this in a translation setting:
+\begin{itemize}
+ \item Use the contexts to \alert{cluster} translation units into groups
+ \item Units in the same group expected to be semantically and syntactically similar
+ \item Then use these cluster labels to guide translation
+ \begin{itemize}
+ \item lexical selection: translating ambiguous source word/s
+ \item reordering: consistent syntactic patterns of reordering
+ \end{itemize}
+\end{itemize}
+\end{frame}
+
+\begin{frame}[t]{Monolingual Example}
+Task: cluster words into their parts-of-speech. \\
+
+\vspace{1ex}
+Illustrate by starting with the word `deal' (noun or verb):
+
+\only<1>{\includegraphics[width=\columnwidth]{deal_first.pdf} \\ Step 1: Find contexts for `deal'}
+\only<2->{\includegraphics[width=\columnwidth]{deal.pdf} \\ Step 2: Find other words which occur in these contexts}
+%\only<3>{\includegraphics[width=\columnwidth]{deal_more.pdf} \\ \ldots continue to expand}
+
+\only<3>{
+\vspace{1ex}
+Notice that the instances of deal can be split into two connected sub-graphs:
+\begin{itemize}
+ \item noun: the left two contexts ``a \ldots with'' and ``a \ldots that''
+ \item verb: the right two contexts ``to \ldots with'' and ``not \ldots with''
+ \item neighbouring words of these contexts share the same PoS
+\end{itemize}
+}
+
+\end{frame}
+
+\begin{frame}[t]{More Formally}
+
+Construct a bipartite graph
+\begin{itemize}
+ \item Nodes on the top layer denote word types (bilingual phrase pairs)
+ \item Nodes on the bottom layer denote context types (monlingual/bilingual words)
+ \item Edges connect words and their contexts
+\end{itemize}
+
+\includegraphics[width=\columnwidth]{bipartite.pdf}
+
+\end{frame}
+
+\begin{frame}[t]{Clustering}
+
+Task is to cluster the graph into sub-graphs. Nodes in the sub-graphs should be
+\begin{itemize}
+\item strongly connected to one another
+\item weakly connected to nodes outside the sub-graph
+\item could formulate as either \emph{hard} or \emph{soft} clustering
+\end{itemize}
+Choose \alert{soft clustering} to allow for syntactic and semantic ambiguity
+
+\centering
+\includegraphics[width=0.7\columnwidth]{bipartite_lda.pdf}
+
+\end{frame}
+
+\begin{frame}[t]{Latent Dirichlet Allocation (LDA)}
+
+LDA is a generative model which treats documents as bags of words
+\begin{itemize}
+ \item each word is assign a \alert{topic} (cluster tag)
+ \item words are generated from a topic-specific multinomial
+ \item topics are \alert{tied} across a document using a Dirichlet prior
+ \item $\alpha < 1$ biases towards \alert{sparse} distributions, i.e., topic reuse
+ \item inferred $\theta_d$ describes a document and $\phi_t$ describes a topic
+\end{itemize}
+
+\vspace{-3ex}
+\includegraphics[scale=0.55]{lda.pdf}
+
+\end{frame}
+
+\begin{frame}[t]{LDA over Contexts}
+
+Generative story:
+\begin{itemize}
+ \item for each word type $w$
+ \item for each of the $L$ contexts
+ \item first we draw a topic $t$, then generate the context $\vec{c}$ given the topic
+ \item the Dirichlet prior ties the topics for each $w$
+ \item we're primarily interested in the learnt $\theta$ values
+\end{itemize}
+
+\includegraphics[scale=0.4]{context_lda.pdf}
+
+\end{frame}
+
+\end{document}