diff options
Diffstat (limited to 'report/intro_slides/final_slides.tex')
| -rw-r--r-- | report/intro_slides/final_slides.tex | 783 | 
1 files changed, 0 insertions, 783 deletions
| diff --git a/report/intro_slides/final_slides.tex b/report/intro_slides/final_slides.tex deleted file mode 100644 index a73481ac..00000000 --- a/report/intro_slides/final_slides.tex +++ /dev/null @@ -1,783 +0,0 @@ -\documentclass{beamer} - -\mode<presentation> -{ - \usetheme{Boadilla} - \setbeamercovered{transparent}} - -\usepackage[english]{babel} -\usepackage{times} - -\usepackage{xcolor} -\usepackage{colortbl} -%\usepackage{subfigure} - -\usepackage{fontspec}  -\usepackage{xunicode} -\usepackage{xltxtra} -\usepackage{booktabs} -\newenvironment{CJK}{\fontspec[Scale=0.9]{PMingLiU}}{} -\newenvironment{Geeza}{\fontspec[Scale=0.9]{Geeza Pro}}{} - -%% for tables -\newcommand{\mc}{\multicolumn} -\newcommand{\lab}[1]{\multicolumn{1}{c}{#1}} -\newcommand{\ind}[1]{{\fboxsep1pt\raisebox{-.5ex}{\fbox{{\tiny #1}}}}} -\newcommand{\IND}[1]{{\fboxsep1pt\raisebox{0ex}{\fbox{{\small #1}}}}} -\newcommand\production[2]{\ensuremath{\langle\mbox{#1}, \mbox{#2}\rangle}} - -%% markup -\newcommand{\buffer}[1]{{\color{blue}\textbf{#1}}} -\newcommand{\pred}[1]{\code{#1}} - -%% colors -\newcommand{\textred}[1]{\alert{#1}} -\newcommand{\textblue}[1]{\buffer{#1}} -\definecolor{tablecolor}{cmyk}{0,0.3,0.3,0} -\newcommand{\keytab}[1]{\mc{1}{>{\columncolor{tablecolor}}d}{#1}} - -% rules -\newcommand{\psr}[2]{#1 $\rightarrow \langle $ #2 $\rangle$} - -\newenvironment{unpacked_itemize}{ -\begin{itemize} -  \setlength{\itemsep}{10pt} -  \setlength{\parskip}{0pt} -  \setlength{\parsep}{0pt} -}{\end{itemize}} - -\newcommand{\condon}{\hspace{0pt} | \hspace{1pt}} -\definecolor{darkblue}{rgb}{0,0,0.6} -\newcommand{\blueexample}[1]{\textcolor{darkblue}{\rm #1}} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -\newcommand{\ws}{\ensuremath{\vec{w}}} -\newcommand{\pu}{\ensuremath{P_0}} -\newcommand{\bx}{\mathbf{x}} -\newcommand{\bz}{\mathbf{z}} -\newcommand{\bd}{\mathbf{d}} -\newcommand{\by}{\mathbf{y}} -\newcommand\bleu{${B{\scriptstyle LEU}}$} - - -\title[Models of SCFG Induction]{Models of Synchronous Grammar Induction for SMT} - -\author[CLSP Workshop 2010]{ -  Workshop 2010 -  %Phil Blunsom$^1$ \and Trevor Cohn$^2$ \and Chris Dyer$^3$ \and Adam Lopez$^4$ -} - -\institute[Baltimore]{ -  The Center for Speech and Language Processing \\ Johns Hopkins University -%  $^1$University of Oxford\\  -%  $^2$University of Sheffield\\  -%  $^3$Carnegie Mellon University\\  -%  $^4$University of Edinburgh -} -\date[June 28]{June 28, 2010} - -%\subject{Unsupervised models of Synchronous Grammar Induction for SMT} - -%\pgfdeclareimage[height=1.0cm]{university-logo}{logo} -%\logo{\pgfuseimage{university-logo}} - -%\AtBeginSection[] -%{ -%  \begin{frame}<beamer>{Outline} -%    %\tableofcontents[currentsection,currentsubsection] -%    \tableofcontents[currentsection] -%  \end{frame} -%} - -%\beamerdefaultoverlayspecification{<+->} - -\begin{document} - -\begin{frame} -  \titlepage -\end{frame} - -%\begin{frame}{Outline} -%  \tableofcontents -% You might wish to add the option [pausesections] -%\end{frame} -  -%\begin{frame}{Outline} -%  \tableofcontents -%  % You might wish to add the option [pausesections] -%\end{frame} - - -%\begin{frame}[t]{Team members} -%\begin{center} -%{\bf Senior Members} \\ -%  Phil Blunsom (Oxford)\\ -%  Trevor Cohn (Sheffield)\\ -%  Adam Lopez (Edinburgh/COE)\\ -%  Chris Dyer (CMU)\\ -%  Jonathan Graehl (ISI)\\ -%  Chris Callison-Burch (JHU)\\ -%\vspace{0.1in} -%{\bf Graduate Students} \\ -%  Jan Botha (Oxford) \\ -%  Vladimir Eidelman (Maryland) \\ -%  Ziyuan Wang (JHU) \\ -%  ThuyLinh Nguyen (CMU) \\ -%\vspace{0.1in} -%{\bf Undergraduate Students} \\ -%  Olivia Buzek (Maryland) \\ -%  Desai Chen (CMU) \\ -%\end{center} -%\end{frame} - - - -\begin{frame}[t]{Statistical machine translation} -%\vspace{1.0cm} -\begin{exampleblock}{Urdu $\rightarrow$ English} -  \begin{figure} -    {\centering \includegraphics[scale=0.55]{urdu.pdf}} -  \end{figure} -\vspace{0.10cm} -\end{exampleblock} -\begin{itemize} -  \item Statistical machine translation: Learn how to translate from parallel corpora. -\end{itemize} -\end{frame} - - -\begin{frame}[t]{Statistical machine translation: } -%\vspace{1.0cm} -\begin{exampleblock}{Urdu $\rightarrow$ English} -  \begin{figure} -    {\centering \includegraphics[scale=0.55]{urdu-ref.pdf}} -  \end{figure} -\end{exampleblock} -\begin{itemize} -  \item Statistical machine translation: Learn how to translate from parallel corpora  -\end{itemize} -\end{frame} - -\begin{frame}[t]{Statistical machine translation: state-of-the-art} -%\vspace{1.0cm} -\begin{exampleblock}{Urdu $\rightarrow$ English} -  \begin{figure} -    {\centering \includegraphics[scale=0.55]{urdu-bl.pdf}} -  \end{figure} -\end{exampleblock} -\begin{itemize} -  \item Current state-of-the-art translation models struggle with language pairs which exhibit large differences in structure. -\end{itemize} -\end{frame} - -\begin{frame}[t]{Statistical machine translation: successes} -\begin{center} -  \includegraphics[scale=0.35]{GoogleTranslateLanguages.pdf} -\end{center} -\end{frame} - -\begin{frame}[t]{Statistical machine translation: limitations} -\vspace{1.0cm} -  \begin{table} -  \centering -    \begin{tabular}{|l|l|} -    \hline -      {\bf English}  & {\bf \textcolor{blue}{Who wrote} \textcolor{red}{this letter}?} \\ -      \hline -      Arabic   & \begin{Geeza}من الذي كتب هذه الرسالة؟\end{Geeza} \\ -               & \textcolor{gray}{(function-word)} \textcolor{blue}{(who)} \textcolor{green}{(wrote)} \textcolor{red}{(this)} \textcolor{orange}{(the-letter)} \\ -      \hline -      Chinese  & \begin{CJK}\textcolor{red}{这封  信}  \textcolor{blue}{是  谁  写}  的 ?\end{CJK} \\ -               & \textcolor{red}{(this) (letter)} \textcolor{blue}{(be) (who) (write) (come-from)} \textcolor{gray}{(function-word)} \\ -      \hline -    \end{tabular} -  \end{table} -\end{frame} - - -\begin{frame}[t]{Statistical machine translation: limitations} -\vspace{1.0cm} -\begin{exampleblock}{Structural divergence between languages:} -  %\vspace{0.3cm} -  \begin{table} -  \centering -    \only<1>{ -    \begin{tabular}{|l|l|} -    \hline -      {\bf English}  & {\bf Who wrote this letter?} \\ -      \hline -      Arabic   & \begin{Geeza}من الذي كتب هذه الرسالة؟\end{Geeza} \\ -               & \textcolor{gray}{(function-word)} (who) (wrote) (this) (the-letter) \\ -      \hline -      Chinese  & \begin{CJK}这封  信  是  谁  写  的 ?\end{CJK} \\ -               & (this) (letter) (be) (who) (write) (come-from) \textcolor{gray}{(function-word)} \\ -      \hline -    \end{tabular} -    } -    \only<2>{ -    \begin{tabular}{|l|l|} -    \hline -      {\bf English}  & {\bf \textcolor{blue}{Who} \textcolor{green}{wrote} \textcolor{red}{this} \textcolor{orange}{letter?}} \\ -      \hline -      Arabic   & \begin{Geeza}من الذي كتب هذه الرسالة؟\end{Geeza} \\ -               & \textcolor{gray}{(function-word)} \textcolor{blue}{(who)} \textcolor{green}{(wrote)} \textcolor{red}{(this)} \textcolor{orange}{(the-letter)} \\ -      \hline -      Chinese  & \begin{CJK}这封  信  是  谁  写  的 ?\end{CJK} \\ -               & (this) (letter) (be) (who) (write) (come-from) \textcolor{gray}{(function-word)} \\ -      \hline -    \end{tabular} -    } -    \only<3->{ -    \begin{tabular}{|l|l|} -    \hline -      {\bf English}  & {\bf \textcolor{blue}{Who wrote} \textcolor{red}{this letter}?} \\ -      \hline -      Arabic   & \begin{Geeza}من الذي كتب هذه الرسالة؟\end{Geeza} \\ -               & \textcolor{gray}{(function-word)} (who) (wrote) (this) (the-letter) \\ -      \hline -      Chinese  & \begin{CJK}\textcolor{red}{这封  信}  \textcolor{blue}{是  谁  写}  的 ?\end{CJK} \\ -               & \textcolor{red}{(this) (letter)} \textcolor{blue}{(be) (who) (write) (come-from)} \textcolor{gray}{(function-word)} \\ -      \hline -    \end{tabular} -  } -  \end{table} -\end{exampleblock} -\only<4>{ -  \begin{itemize} -  \item Phrasal translation equivalences -  \item Constituent reordering -  \item Morphology -  \end{itemize} -} -\end{frame} - -\begin{frame} -\frametitle{Using syntax in Machine Translation:} -  \footnotesize -  \begin{block}{Synchronous Context Free Grammar (SCFG)} -    \begin{figure} -    \begin{align*} -        \alert<2>{S} & \alert<2>{\rightarrow  \langle X\ind{1},\ X\ind{1} \rangle}  -            &\quad \alert<3,5>{X} & \alert<3,5>{\rightarrow  \langle X\ind{1}\ X\ind{2},\ X\ind{1}\ X\ind{2} \rangle} \\ -        \alert<7>{X} & \alert<7>{\rightarrow  \langle X\ind{1}\ X\ind{2},\ X\ind{2}\ X\ind{1} \rangle} & &\\ -        \alert<4>{X} & \alert<4>{\rightarrow  \langle Sie,\ She \rangle}  -           &\quad \alert<6>{X} & \alert<6>{\rightarrow  \langle will,\ wants\ to \rangle} \\ -         \alert<8>{X} & \alert<8>{\rightarrow  \langle eine\ Tasse\ Kaffee,\ a\ cup\ of\ coffee \rangle} -           &\quad \alert<9>{X} & \alert<9>{\rightarrow  \langle trinken,\ drink\rangle} \\ -    \end{align*} -    \end{figure} -  \end{block} -  \begin{exampleblock}{Example Derivation} -      %\begin{figure} -      %\begin{align*} -      \center -        \vspace{0.2cm} -        \onslide<2->{\alert<2>{$S \Rightarrow \langle X\ind{1},\ X\ind{1}\ \rangle$}} \quad \onslide<3->{\alert<3>{$\Rightarrow \langle X\ind{2}\ X\ind{3},\ X\ind{2}\ X\ind{3} \rangle$ \\}} -        \vspace{0.2cm} -        \onslide<4->{\alert<4>{$\Rightarrow \langle Sie\ X\ind{3},\ She\ X\ind{3} \rangle$}} \quad \onslide<5->{\alert<5>{$\Rightarrow \langle Sie\ X\ind{4}\ X\ind{5},\ She\ X\ind{4}\ X\ind{5} \rangle$ \\}} -        \vspace{0.2cm} -        \onslide<6->{\alert<6>{$\Rightarrow \langle Sie\ will\ X\ind{5},\ She\ wants\ to\  X\ind{5} \rangle$ }} \quad \onslide<7->{\alert<7>{$\Rightarrow \langle Sie\ will\ X\ind{6} X\ind{7},\ She\ wants\ to\  X\ind{7} X\ind{6} \rangle$ \\}} -        \vspace{0.2cm} -        \onslide<8->{\alert<8>{$\Rightarrow \langle Sie\ will\ eine\ Tasse\ Kaffee\ X\ind{7},\ She\ wants\ to\ X\ind{7}\ a\ cup\ of\ coffee\rangle$} \\} -        \vspace{0.2cm} -        \onslide<9->{\alert<9>{$\Rightarrow \langle Sie\ will\ eine\ Tasse\ Kaffee\ trinken,\ She\ wants\ to\ drink\ a\ cup\ of\ coffee\rangle$}} -        \vspace{0.2cm} -      %\end{align*} -      %\end{figure} -  \end{exampleblock} -\end{frame} - - -\begin{frame}[t]{Models of translation} -\begin{block}{Unlabelled SCFG: Hiero} -  \begin{center} -    \includegraphics[scale=0.55]{JeNeVeuxPasTravailler-Hiero.pdf} -    \hspace{0.3in} -    \includegraphics[scale=0.55]{JeVeuxTravailler-Hiero.pdf} -  \end{center} -\end{block} -\begin{unpacked_itemize} -\only<1>{\item S -> $\langle$ X\ind{1}, X\ind{1} $\rangle$, \\ S -> $\langle$ S\ind{1} X\ind{2}, S\ind{1} X\ind{2} $\rangle$} -\only<1>{\item X -> $\langle$ Je, I $\rangle$, \hspace{0.5in} X -> $\langle$ ne X\ind{1} pas, do not X\ind{1} $\rangle$, \\ X -> $\langle$ veux, want to$\rangle$, X -> $\langle$ travailler, work $\rangle$} -\only<2->{ -\item Only requires the parallel corpus. -\item But weak model of sentence structure. -} -\end{unpacked_itemize} -\end{frame} - -\begin{frame}[t]{Models of translation} -\begin{exampleblock}{Supervised SCFG: Syntactic Tree-to-String} -\begin{center} -  \includegraphics[scale=0.55]{JeNeVeuxPasTravailler-tsg.pdf} -  \hspace{0.3in} -  \includegraphics[scale=0.55]{JeVeuxTravailler-tsg.pdf} -\end{center} -\end{exampleblock} -\begin{itemize} -\only<1>{\item S -> $\langle$ NP\ind{1} VP\ind{2}, NP\ind{1} VP\ind{2} $\rangle$, \\ NP -> $\langle$ PRP\ind{1}, PRP\ind{1} $\rangle$} -\only<1>{\item PRP -> $\langle$ Je, I $\rangle$, VB -> $\langle$ travailler, work $\rangle$ \\ VP -> $\langle$ ne veux pas VB\ind{1}, do not want to VB\ind{1} $\rangle$} -\only<2->{ -  \item Strong model of sentence structure. -  \item Reliant on a treebank to train the parser. -} -\end{itemize} -\end{frame} - - -\begin{frame}[t]{Impact} -\vspace{0.5in} -\begin{table} -  \begin{tabular}{l|rr} -    \hline -    Language & Words &  Domain \\ \hline -    English & 4.5M& Financial news \\ -    Chinese & 0.5M & Broadcasting news \\  -    Arabic &  300K (1M planned)  &  News  \\ -    Korean & 54K  & Military \\ \hline -  \end{tabular} -\caption{Major treebanks: data size and domain \label{table_treebanks_size}} -\end{table} -\end{frame} - - -\begin{frame}[t]{Impact} -Parallel corpora far exceed treebanks (millions of words): -  \begin{figure} -    {\centering \includegraphics[scale=0.7]{resource_matrix.pdf}} -  \end{figure} -\end{frame} - - -\begin{frame}[t]{Models of translation} -\begin{exampleblock}{Phrase extraction:} -\begin{center} -\only<1>{\includegraphics[scale=0.8]{PhraseExtraction6.pdf}\\[1cm]} -\only<2>{\includegraphics[scale=0.8]{PhraseExtraction5.pdf}\\[1cm]} -\only<3>{\includegraphics[scale=0.8]{PhraseExtraction4.pdf}\\[1cm]} -\only<4>{\includegraphics[scale=0.8]{PhraseExtraction3.pdf}\\[1cm]} -\only<5>{\includegraphics[scale=0.8]{PhraseExtraction2.pdf}\\[1cm]} -\only<6>{\includegraphics[scale=0.8]{PhraseExtraction1.pdf}\\[1cm]} -\only<7>{\includegraphics[scale=0.8]{PhraseExtraction.pdf}\\[1cm]} -\end{center} -\end{exampleblock} -\only<2->{ -\begin{unpacked_itemize} -\only<2>{\item Use a word-based translation model to annotate the parallel corpus with word-alignments} -\only<3->{\item $\langle$ Je, I $\rangle$, $\langle$ veux, want to $\rangle$,  $\langle$ travailler, work $\rangle$}\only<4->{, $\langle$ ne veux pas, do not want to $\rangle$}\only<5->{, $\langle$ ne veux pas travailler, do not want to work $\rangle$}\only<6->{, $\langle$ Je ne veux pas, I do not want to $\rangle$}\only<7->{, $\langle$ Je ne veux pas travailler, I do not want to work $\rangle$} -\end{unpacked_itemize} -} -\end{frame} - - -\begin{frame}[t]{Models of translation} -\begin{exampleblock}{SCFG Rule extraction:} -\begin{center} -\only<1>{\includegraphics[scale=0.8]{HieroExtraction1.pdf}\\[1cm]} -\only<2>{\includegraphics[scale=0.8]{HieroExtraction2.pdf}\\[1cm]} -\only<3>{\includegraphics[scale=0.8]{HieroExtraction3.pdf}\\[1cm]} -\only<4>{\includegraphics[scale=0.8]{HieroExtraction4.pdf}\\[1cm]} -\end{center} -\end{exampleblock} -\begin{unpacked_itemize} -\only<1>{ \item X -> $\langle$ ne veux pas, do not want to $\rangle$ } -\only<2>{ \item X -> $\langle$ ne veux pas, do not want to $\rangle$, \item X -> $\langle$ ne X\ind{1} pas, do not X\ind{1} $\rangle$ } -\only<3>{ \item VP$/$NN -> $\langle$ ne veux pas, do not want to $\rangle$, \item VP$/$NN -> $\langle$ ne V\ind{1} pas, do not V\ind{1} $\rangle$ } -\only<4>{ \item X10 -> $\langle$ ne veux pas, do not want to $\rangle$, \item X10 -> $\langle$ ne X14\ind{1} pas, do not X14\ind{1} $\rangle$ } -\end{unpacked_itemize} -\end{frame} - -\begin{frame}[t]{Models of translation} -\vspace{0.25in} -\begin{block}{This workshop} -  \begin{center} -    \includegraphics[scale=0.55]{JeNeVeuxPasTravailler-Hiero-labelled.pdf} -    \hspace{0.3in} -    \includegraphics[scale=0.55]{JeVeuxTravailler-Hiero-labelled.pdf} -  \end{center} -\end{block} -\begin{unpacked_itemize} -\only<1>{\item S -> $\langle$ X4\ind{1} X2\ind{2}, X4\ind{1} X2\ind{2} $\rangle$, X4 -> $\langle$ X1\ind{1} X10\ind{2}, X1\ind{1} X10\ind{2} $\rangle$} -\only<1>{\item X1 -> $\langle$ Je, I $\rangle$, X10 -> $\langle$ ne X14\ind{1} pas, do not X14\ind{1} $\rangle$, \\ X14 -> $\langle$ veux, want to$\rangle$, X10 -> $\langle$ travailler, work $\rangle$} -\only<2->{ -  \item Only requires the parallel corpus. -  \item But also gives a strong model of sentence structure. -} -\end{unpacked_itemize} -\end{frame} - - -\begin{frame}[t]{Workshop overview} -Input: -  \begin{itemize} -%  \item Joshua decoder -  \item Existing procedures for unlabelled synchronous grammar extraction -  \end{itemize} -\vspace{0.3in} -Output: -  \begin{itemize} -    \item New unsupervised models for large scale synchronous grammar extraction, -%    \item An implementation of this model, -    \item A comparison and analysis of the existing and proposed models, -    \item Extended decoders (cdec/Joshua) capable of working efficiently with these models. -  \end{itemize} -\end{frame} - - -%\begin{frame}[t]{Models of translation} -%\begin{block}{Hierarchical} -%  \begin{center} -%    \includegraphics[scale=0.55]{JeNeVeuxPasTravailler-Hiero.pdf} -%    \hspace{0.3in} -%    \includegraphics[scale=0.55]{JeVeuxTravailler-Hiero.pdf} -%  \end{center} -%\end{block} -%\end{frame} - - -%\begin{frame}[t]{Impact} -%  \begin{center} -%    \includegraphics[scale=0.3]{ccb_tree.pdf} -%  \end{center} -%\end{frame} - - - -%\begin{frame}[t]{Models of translation} -%\vspace{0.25in} -%\begin{block}{Hierarchical} -%  \begin{center} -%    \includegraphics[scale=0.55]{JeNeVeuxPasTravailler-Hiero-labelled.pdf} -%    \hspace{0.3in} -%    \includegraphics[scale=0.55]{JeVeuxTravailler-Hiero-labelled.pdf} -%  \end{center} -%\end{block} -%%\begin{itemize} -%%\item \alert{AIM: Implement a large scale open-source synchronous constituent learning system.}  -%%\item \alert{AIM: Investigate and understand the relationship between the choice of synchronous grammar and SMT performance,}  -%%\item \alert{AIM: and fix our decoders accordingly.}  -%%\end{itemize} -%\end{frame} -% -%\begin{frame}[t]{Evaluation goals} -%We will predominately evaluate using BLEU, but also use automatic structured metrics and perform small scale human evaluation: -%\vspace{0.25in} -%\begin{unpacked_itemize} -%\item Evaluate phrasal, syntactic, unsupervised syntactic, -%\item Aim 1: Do no harm (not true of existing syntactic approach) -%\item Aim 2: Exceed the performance of current non-syntactic systems. -%\item Aim 3: Meet or exceed performance of existing syntactic systems. -%\end{unpacked_itemize} -%\end{frame} - -%\begin{frame}[t]{Impact} -%Success will have a significant impact on two areas of CL: -%\vspace{0.25in} -%\begin{unpacked_itemize} -%\item Machine translation -%\begin{unpacked_itemize} -%  \item Make the benefits of richly structured translation models available to a much wider range of researchers and for a wider range of languages. -%% \item Change the research outlook of the field. -%\end{unpacked_itemize} -%\item Grammar induction: -%\begin{unpacked_itemize} -%  \item Provide an empirical validation of state-of-the-art grammar induction techniques. -%\end{unpacked_itemize} -%\end{unpacked_itemize} -%\end{frame} - - -\begin{frame}[t]{Workshop Streams} -\begin{enumerate} -\item Implement scalable labelled SCFG grammar induction algorithms: -\begin{itemize} -\item by clustering translation phrases which occur in the same context we can learn which phrases are substituteable, -\item we have implemented both parametric and non-parametric Bayesian clustering algorithms. -\end{itemize} -\item Improve SCFG decoders to efficiently handle the grammars produced: -\begin{itemize} -\item translation complexity scales quadratically as we add more categories, -\item in order to decode efficiently with the grammars we've induced we have created faster search algorithms tuned for syntactic grammars. -\end{itemize} -\item Investigate discriminative training regimes to leverage features extracted from these grammars: -\begin{itemize} -\item to make the most of our induced grammars we need discriminative training algorithms that learn from more than a handful of features, -\item we've implemented two large scale discriminative algorithms for training our models. -\end{itemize} -\end{enumerate} -\end{frame} - -\begin{frame}[t]{Extrinsic evaluation: Bleu} -\begin{exampleblock}{ -   \only<1>{Ngram overlap metrics:} -   \only<2>{Ngram overlap metrics: 1-gram precision $p_1 = \frac{11}{14}$} -   \only<3>{Ngram overlap metrics: 2-gram precision $p_2 = \frac{5}{13}$} -   \only<4>{Ngram overlap metrics: 3-gram precision $p_3 = \frac{2}{12}$} -   \only<5>{Ngram overlap metrics: 4-gram precision $p_4 = \frac{1}{11}$} -} -\vspace{0.2cm} -{\em Source}: \begin{CJK}欧盟 办事处 与 澳洲 大使馆 在 同 一 建筑 内 \end{CJK} \\ -\vspace{0.3cm} -\only<1>{{\em Candidate}: the chinese embassy in australia and the eu representative office in the same building} -\only<2>{{\em Candidate}: \alert{the} chinese \alert{embassy} \alert{in} australia \alert{and} \alert{the} \alert{eu} representative \alert{office} \alert{in} \alert{the} \alert{same} \alert{building}} -\only<3>{{\em Candidate}: the chinese embassy in australia \alert{and} \alert{the} \alert{eu} representative office \alert{in} \alert{the} \alert{same} \alert{building}} -\only<4>{{\em Candidate}: the chinese embassy in australia and the eu representative office \alert{in} \alert{the} \alert{same} \alert{building}} -\only<5>{{\em Candidate}: the chinese embassy in australia and the eu representative office \alert{in} \alert{the} \alert{same} \alert{building}} -\vspace{0.2cm} -\end{exampleblock} - -\begin{block}{Reference Translations:} -  \begin{enumerate} -  \only<1>{\item the eu office and the australian embassy are housed in the same building} -  \only<2>{\item \alert{the} \alert{eu} \alert{office} \alert{and} \alert{the} australian \alert{embassy} are housed \alert{in} \alert{the} \alert{same} \alert{building}} -  \only<3>{\item \alert{the} \alert{eu} office \alert{and} \alert{the} australian embassy are housed \alert{in} \alert{the} \alert{same} \alert{building}} -  \only<4>{\item the eu office and the australian embassy are housed \alert{in} \alert{the} \alert{same} \alert{building}} -  \only<5>{\item the eu office and the australian embassy are housed \alert{in} \alert{the} \alert{same} \alert{building}} - -  \only<1>{\item the european union office is in the same building as the australian embassy} -  \only<2>{\item \alert{the} european union \alert{office} is \alert{in} \alert{the} \alert{same} \alert{building} as \alert{the} australian \alert{embassy}} -  \only<3>{\item the european union office is \alert{in} \alert{the} \alert{same} \alert{building} as the australian embassy} -  \only<4>{\item the european union office is \alert{in} \alert{the} \alert{same} \alert{building} as the australian embassy} -  \only<5>{\item the european union office is \alert{in} \alert{the} \alert{same} \alert{building} as the australian embassy} - -  \only<1>{\item the european union 's office and the australian embassy are both located in the same building} -  \only<2>{\item \alert{the} european union 's \alert{office} \alert{and} \alert{the} australian \alert{embassy} are both located \alert{in} \alert{the} \alert{same} \alert{building}} -  \only<3>{\item the european union 's office \alert{and} \alert{the} australian embassy are both located \alert{in} \alert{the} \alert{same} \alert{building}} -  \only<4>{\item the european union 's office and the australian embassy are both located \alert{in} \alert{the} \alert{same} \alert{building}} -  \only<5>{\item the european union 's office and the australian embassy are both located \alert{in} \alert{the} \alert{same} \alert{building}} -   -  \only<1>{\item the eu 's mission is in the same building with the australian embassy} -  \only<2>{\item \alert{the} \alert{eu} 's mission is \alert{in} \alert{the} \alert{same} \alert{building} with \alert{the} australian \alert{embassy}} -  \only<3>{\item \alert{the} \alert{eu} 's mission is \alert{in} \alert{the} \alert{same} \alert{building} with the australian embassy} -  \only<4>{\item the eu 's mission is \alert{in} \alert{the} \alert{same} \alert{building} with the australian embassy} -  \only<5>{\item the eu 's mission is \alert{in} \alert{the} \alert{same} \alert{building} with the australian embassy} -  \end{enumerate} -\end{block} -\end{frame} - -\begin{frame}[t]{Extrinsic evaluation: Bleu} -\begin{exampleblock}{BLEU} -\Large -\begin{align} -\nonumber BLEU_n = BP \times \exp{\left( \sum_{n=1}^{N} w_n \log{p_n} \right) }\\ -\nonumber BP = \left\{  -  \begin{array}{ll}  -    1 & \mbox{if $c > r$}  \\ -    \exp{(1-\frac{R'}{C'})} & \mbox{if $c <= r$} -  \end{array} \right. -\end{align} -\end{exampleblock} -\begin{itemize} -\item {\em BP} is the {\em Brevity Penalty}, $w_n$ is the ngram length weights (usually $\frac{1}{n}$), $p_n$ is precision of ngram predictions, $R'$ is the total length of all references and $C'$ is the sum of the best matching candidates. -\item statistics are calculate over the whole {\em document}, i.e. all the sentences. -\end{itemize} -\end{frame} - - - -\begin{frame}[t]{Language pairs} -\begin{unpacked_itemize} -\item BTEC Chinese-English: -  \begin{itemize} -  \item 44k sentence pairs, short sentences -  \item Widely reported `prototyping' corpus -  \item Hiero baseline score: 57.0 (16 references) -  \end{itemize} -\item NIST Urdu-English: -  \begin{itemize} -  \item 50k sentence pairs -  \item Hiero baseline score: 21.1 (4 references) -  \item Major challenges: major long-range reordering, SOV word order -  \end{itemize} -\item Europarl Dutch-French: -  \begin{itemize} -  \item 100k sentence pairs, standard Europarl test sets -  \item Hiero baseline score: Europarl 2008 - 15.75 (1 reference) -  \item Major challenges: V2 / V-final word order, morphology -  \end{itemize} -\end{unpacked_itemize} -\end{frame} - - -\begin{frame}[t]{Outline} -\begin{columns} -  \begin{column}{0.2\textwidth} -    \begin{exampleblock}{} -      \begin{figure} -        \tiny -        {\centering \includegraphics[scale=0.07]{trevor.jpg}} \\ -        Trevor Cohn \\ -         -        {\centering \includegraphics[scale=0.06]{dyer.jpg}} \\ -        Chris Dyer\\ -         -        {\centering \includegraphics[scale=0.11]{jan.jpg}} \\ -        Jan Botha \\ -         -        {\centering \includegraphics[scale=0.06]{olivia.jpg}} \\ -        Olivia Buzek\\ -         -        {\centering \includegraphics[scale=0.10]{desai.jpg}}\\ -        Desai Chen\\ - -      \end{figure} -    \end{exampleblock} -  %\vspace{0.25in} -  \end{column} -  \begin{column}{0.7\textwidth} -    \begin{unpacked_itemize} -      \item 1:55pm Grammar induction and evaluation. Trevor -      \item 2:10pm Non-parametric models of category induction. Chris -      \item 2:25pm Inducing categories for morphology. Jan -      \item 2:35pm Smoothing, backoff and hierarchical grammars. Olivia -      \item 2:45pm Parametric models: posterior regularisation. Desai -      \item 3:00pm Break. -    \end{unpacked_itemize} -  \end{column} -\end{columns} -\end{frame} - - - -\begin{frame}[t]{Outline} -\begin{columns} -  \begin{column}{0.2\textwidth} -    \begin{exampleblock}{} -      \begin{figure} -        \tiny -        {\centering \includegraphics[scale=0.05]{vlad.jpg}} \\ -        Vlad Eidelman\\ -         -        {\centering \includegraphics[scale=0.15]{ziyuan.pdf}} \\ -        Ziyuan Wang\\ -         -        {\centering \includegraphics[scale=0.06]{adam.jpg}} \\ -        Adam Lopez\\ -         -        {\centering \includegraphics[scale=0.10]{jon.pdf}} \\ -        Jon Graehl\\ -         -        {\centering \includegraphics[scale=0.15]{linh.pdf}} \\ -        ThuyLinh Nguyen\\ - -      \end{figure} -    \end{exampleblock} -  \end{column} -  \begin{column}{0.7\textwidth} -    \begin{itemize} -    \setlength{\itemsep}{25pt} -    \setlength{\parskip}{0pt} -    \setlength{\parsep}{0pt} -    \item 3:15pm Training models with rich features spaces. Vlad -    \item 3:30pm Decoding with complex grammars. Adam -    \item 4:00pm Closing remarks. Phil -    \item 4:05pm Finish. -    \end{itemize} -  \end{column} -\end{columns} -\end{frame} - - - -\begin{frame}[t]{Remember:} -  \vspace{0.5in} -  \begin{unpacked_itemize} -    \item Idea: Learn synchronous grammar labels which encode substituteability; phrases which occur in the same context should receive the same label. -    \item Result: Better models of translation structure, morphology and improved decoding algorithms. -  \end{unpacked_itemize} -\end{frame} - - -\begin{frame}[t]{Outline} -\begin{columns} -  \begin{column}{0.2\textwidth} -    \begin{exampleblock}{} -      \begin{figure} -        \tiny -        {\centering \includegraphics[scale=0.07]{trevor.jpg}} \\ -        Trevor Cohn \\ -         -        {\centering \includegraphics[scale=0.06]{dyer.jpg}} \\ -        Chris Dyer\\ -         -        {\centering \includegraphics[scale=0.11]{jan.jpg}} \\ -        Jan Botha \\ -         -        {\centering \includegraphics[scale=0.06]{olivia.jpg}} \\ -        Olivia Buzek\\ -         -        {\centering \includegraphics[scale=0.10]{desai.jpg}}\\ -        Desai Chen\\ - -      \end{figure} -    \end{exampleblock} -  %\vspace{0.25in} -  \end{column} -  \begin{column}{0.7\textwidth} -    \begin{unpacked_itemize} -      \only<1>{\item \alert{1:55pm Grammar induction and evaluation. Trevor}} -      \only<2->{\item 1:55pm Grammar induction and evaluation. Trevor} -      \only<2>{\item \alert{2:10pm Non-parametric models of category induction. Chris}} -      \only<1,3->{\item 2:10pm Non-parametric models of category induction. Chris} -      \only<3>{\item \alert{2:25pm Inducing categories for morphology. Jan}} -      \only<1,2,4->{\item 2:25pm Inducing categories for morphology. Jan} -      \only<4>{\item \alert{2:35pm Smoothing, backoff and hierarchical grammars. Olivia}} -      \only<1-3,5->{\item 2:35pm Smoothing, backoff and hierarchical grammars. Olivia} -      \only<5>{\item \alert{2:45pm Parametric models: posterior regularisation. Desai}} -      \only<1-4,6->{\item 2:45pm Parametric models: posterior regularisation. Desai} -      \only<6>{\item \alert{3:00pm Break.}} -      \only<1-5>{\item 3:00pm Break.} -    \end{unpacked_itemize} -  \end{column} -\end{columns} -\end{frame} - - - -\begin{frame}[t]{Outline} -\begin{columns} -  \begin{column}{0.2\textwidth} -    \begin{exampleblock}{} -      \begin{figure} -        \tiny -        {\centering \includegraphics[scale=0.05]{vlad.jpg}} \\ -        Vlad Eidelman\\ -         -        {\centering \includegraphics[scale=0.15]{ziyuan.pdf}} \\ -        Ziyuan Wang\\ -         -        {\centering \includegraphics[scale=0.06]{adam.jpg}} \\ -        Adam Lopez\\ -         -        {\centering \includegraphics[scale=0.10]{jon.pdf}} \\ -        Jon Graehl\\ -         -        {\centering \includegraphics[scale=0.15]{linh.pdf}} \\ -        ThuyLinh Nguyen\\ - -      \end{figure} -    \end{exampleblock} -  \end{column} -  \begin{column}{0.7\textwidth} -    \begin{itemize} -    \setlength{\itemsep}{25pt} -    \setlength{\parskip}{0pt} -    \setlength{\parsep}{0pt} -    \only<1>{\item \alert{3:20pm Parametric models: posterior regularisation. Desai}} -    \only<2->{\item 3:20pm Parametric models: posterior regularisation. Desai} -    \only<2>{\item \alert{3:35pm Training models with rich features spaces. Vlad}} -    \only<1,3->{\item 3:35pm Training models with rich features spaces. Vlad} -    \only<3>{\item \alert{3:50pm Decoding with complex grammars. Adam}} -    \only<1,2,4->{\item 3:50pm Decoding with complex grammars. Adam} -    \only<4>{\item \alert{4:20pm Closing remarks. Phil}} -    \only<1-3,5->{\item 4:20pm Closing remarks. Phil} -    \only<5>{\item \alert{4:25pm Finish.}} -    \only<1-4>{\item 4:25pm Finish.} -    \end{itemize} -  \end{column} -\end{columns} -\end{frame} - - -\end{document} | 
