summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorphilblunsom@gmail.com <philblunsom@gmail.com@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-08-15 19:09:42 +0000
committerphilblunsom@gmail.com <philblunsom@gmail.com@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-08-15 19:09:42 +0000
commit60b865d0ec060f2eda4803f168a02d6b26945869 (patch)
treecb871afd20fee4ef51cb170b66dd60765e489dc0
parentb17c1e8d4295033ce1a721305d9ff89b3cf76d2a (diff)
Added some SCFG stuff. This should migrate to Chapter 2.
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@558 ec762483-ff6d-05da-a07a-a48fb63a330f
-rw-r--r--report/example_derivation2.graffle536
-rw-r--r--report/example_derivation2.pdfbin0 -> 19177 bytes
-rw-r--r--report/introduction.tex53
-rwxr-xr-xreport/report.tex1
4 files changed, 568 insertions, 22 deletions
diff --git a/report/example_derivation2.graffle b/report/example_derivation2.graffle
new file mode 100644
index 00000000..9a32db8b
--- /dev/null
+++ b/report/example_derivation2.graffle
@@ -0,0 +1,536 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+ <key>ActiveLayerIndex</key>
+ <integer>0</integer>
+ <key>AutoAdjust</key>
+ <true/>
+ <key>CanvasColor</key>
+ <dict>
+ <key>w</key>
+ <string>1</string>
+ </dict>
+ <key>CanvasOrigin</key>
+ <string>{0, 0}</string>
+ <key>CanvasScale</key>
+ <real>1</real>
+ <key>ColumnAlign</key>
+ <integer>1</integer>
+ <key>ColumnSpacing</key>
+ <real>36</real>
+ <key>CreationDate</key>
+ <string>2008-07-01 20:57:54 +0100</string>
+ <key>Creator</key>
+ <string>Phil Blunsom</string>
+ <key>DisplayScale</key>
+ <string>1 cm = 1 cm</string>
+ <key>GraphDocumentVersion</key>
+ <integer>5</integer>
+ <key>GraphicsList</key>
+ <array>
+ <dict>
+ <key>Bounds</key>
+ <string>{{234, 191}, {146, 23}}</string>
+ <key>Class</key>
+ <string>ShapedGraphic</string>
+ <key>FitText</key>
+ <string>YES</string>
+ <key>Flow</key>
+ <string>Resize</string>
+ <key>ID</key>
+ <integer>142</integer>
+ <key>Shape</key>
+ <string>Rectangle</string>
+ <key>Style</key>
+ <dict>
+ <key>fill</key>
+ <dict>
+ <key>Draws</key>
+ <string>NO</string>
+ </dict>
+ <key>shadow</key>
+ <dict>
+ <key>Draws</key>
+ <string>NO</string>
+ </dict>
+ <key>stroke</key>
+ <dict>
+ <key>Draws</key>
+ <string>NO</string>
+ </dict>
+ </dict>
+ <key>Text</key>
+ <dict>
+ <key>Text</key>
+ <string>{\rtf1\ansi\ansicpg1252\cocoartf949\cocoasubrtf330
+{\fonttbl\f0\fnil\fcharset128 HiraMinProN-W3;\f1\fnil\fcharset128 HiraKakuProN-W3;\f2\fnil\fcharset134 STXihei;
+}
+{\colortbl;\red255\green255\blue255;}
+\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\qc\pardirnatural
+
+\f0\b\fs28 \cf0 S
+\b0 \'81\'cb \'81\'71
+\f1\b\fs24 X
+\b0 \sub 1
+\f2 \nosupersub \'b5\'c4
+\f1 \sub
+\f2 \nosupersub \'a1\'a3
+\f1 ,
+\b X
+\b0 \sub 1
+\f2 \nosupersub
+\f1\b .
+\f0\b0\fs28 \'81\'72}</string>
+ </dict>
+ <key>Wrap</key>
+ <string>NO</string>
+ </dict>
+ <dict>
+ <key>Bounds</key>
+ <string>{{234, 220}, {308, 23}}</string>
+ <key>Class</key>
+ <string>ShapedGraphic</string>
+ <key>FitText</key>
+ <string>YES</string>
+ <key>Flow</key>
+ <string>Resize</string>
+ <key>ID</key>
+ <integer>141</integer>
+ <key>Shape</key>
+ <string>Rectangle</string>
+ <key>Style</key>
+ <dict>
+ <key>fill</key>
+ <dict>
+ <key>Draws</key>
+ <string>NO</string>
+ </dict>
+ <key>shadow</key>
+ <dict>
+ <key>Draws</key>
+ <string>NO</string>
+ </dict>
+ <key>stroke</key>
+ <dict>
+ <key>Draws</key>
+ <string>NO</string>
+ </dict>
+ </dict>
+ <key>Text</key>
+ <dict>
+ <key>Text</key>
+ <string>{\rtf1\ansi\ansicpg1252\cocoartf949\cocoasubrtf330
+{\fonttbl\f0\fnil\fcharset128 HiraMinProN-W3;\f1\fnil\fcharset128 HiraKakuProN-W3;}
+{\colortbl;\red255\green255\blue255;}
+\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\qc\pardirnatural
+
+\f0\b\fs28 \cf0 X
+\f1\b0\fs24 \sub 1
+\f0\fs28 \nosupersub \'81\'cb \'81\'71
+\f1\b\fs24 X
+\b0 \sub 2 \nosupersub \'90\'a5 \'8d\'f0\'93\'56 \'90\'5b\'96\'e9
+\b X
+\b0 \sub 3 \nosupersub ,
+\b X
+\b0 \sub 2
+\b \nosupersub X
+\b0 \sub 3 \nosupersub late last night
+\f0\fs28 \'81\'72}</string>
+ </dict>
+ <key>Wrap</key>
+ <string>NO</string>
+ </dict>
+ <dict>
+ <key>Bounds</key>
+ <string>{{234, 336}, {150, 23}}</string>
+ <key>Class</key>
+ <string>ShapedGraphic</string>
+ <key>FitText</key>
+ <string>YES</string>
+ <key>Flow</key>
+ <string>Resize</string>
+ <key>ID</key>
+ <integer>140</integer>
+ <key>Shape</key>
+ <string>Rectangle</string>
+ <key>Style</key>
+ <dict>
+ <key>fill</key>
+ <dict>
+ <key>Draws</key>
+ <string>NO</string>
+ </dict>
+ <key>shadow</key>
+ <dict>
+ <key>Draws</key>
+ <string>NO</string>
+ </dict>
+ <key>stroke</key>
+ <dict>
+ <key>Draws</key>
+ <string>NO</string>
+ </dict>
+ </dict>
+ <key>Text</key>
+ <dict>
+ <key>Text</key>
+ <string>{\rtf1\ansi\ansicpg1252\cocoartf949\cocoasubrtf330
+{\fonttbl\f0\fnil\fcharset128 HiraMinProN-W3;\f1\fnil\fcharset128 HiraKakuProN-W3;}
+{\colortbl;\red255\green255\blue255;}
+\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\qc\pardirnatural
+
+\f0\b\fs28 \cf0 X
+\f1\b0\fs24 \sub 5
+\f0\fs28 \nosupersub \'81\'cb \'81\'71
+\f1\fs24 \'96\'6b\'8b\'9e , Beijing
+\f0\fs28 \'81\'72}</string>
+ </dict>
+ <key>Wrap</key>
+ <string>NO</string>
+ </dict>
+ <dict>
+ <key>Bounds</key>
+ <string>{{234, 307}, {165, 23}}</string>
+ <key>Class</key>
+ <string>ShapedGraphic</string>
+ <key>FitText</key>
+ <string>YES</string>
+ <key>Flow</key>
+ <string>Resize</string>
+ <key>ID</key>
+ <integer>138</integer>
+ <key>Shape</key>
+ <string>Rectangle</string>
+ <key>Style</key>
+ <dict>
+ <key>fill</key>
+ <dict>
+ <key>Draws</key>
+ <string>NO</string>
+ </dict>
+ <key>shadow</key>
+ <dict>
+ <key>Draws</key>
+ <string>NO</string>
+ </dict>
+ <key>stroke</key>
+ <dict>
+ <key>Draws</key>
+ <string>NO</string>
+ </dict>
+ </dict>
+ <key>Text</key>
+ <dict>
+ <key>Text</key>
+ <string>{\rtf1\ansi\ansicpg1252\cocoartf949\cocoasubrtf330
+{\fonttbl\f0\fnil\fcharset128 HiraMinProN-W3;\f1\fnil\fcharset128 HiraKakuProN-W3;\f2\fnil\fcharset134 STXihei;
+}
+{\colortbl;\red255\green255\blue255;}
+\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\qc\pardirnatural
+
+\f0\b\fs28 \cf0 X
+\f1\b0\fs24 \sub 4
+\f0\fs28 \nosupersub \'81\'cb \'81\'71
+\f2\fs24 \'c9\'cf\'ba\'a3
+\f1 , Shanghai
+\f0\fs28 \'81\'72}</string>
+ </dict>
+ <key>Wrap</key>
+ <string>NO</string>
+ </dict>
+ <dict>
+ <key>Bounds</key>
+ <string>{{234, 278}, {287, 23}}</string>
+ <key>Class</key>
+ <string>ShapedGraphic</string>
+ <key>FitText</key>
+ <string>YES</string>
+ <key>Flow</key>
+ <string>Resize</string>
+ <key>ID</key>
+ <integer>137</integer>
+ <key>Shape</key>
+ <string>Rectangle</string>
+ <key>Style</key>
+ <dict>
+ <key>fill</key>
+ <dict>
+ <key>Draws</key>
+ <string>NO</string>
+ </dict>
+ <key>shadow</key>
+ <dict>
+ <key>Draws</key>
+ <string>NO</string>
+ </dict>
+ <key>stroke</key>
+ <dict>
+ <key>Draws</key>
+ <string>NO</string>
+ </dict>
+ </dict>
+ <key>Text</key>
+ <dict>
+ <key>Text</key>
+ <string>{\rtf1\ansi\ansicpg1252\cocoartf949\cocoasubrtf330
+{\fonttbl\f0\fnil\fcharset128 HiraMinProN-W3;\f1\fnil\fcharset128 HiraKakuProN-W3;\f2\fnil\fcharset134 STXihei;
+}
+{\colortbl;\red255\green255\blue255;}
+\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\qc\pardirnatural
+
+\f0\b\fs28 \cf0 X
+\f1\b0\fs24 \sub 3
+\f0\fs28 \nosupersub \'81\'cb \'81\'71
+\f1\fs24 \'98\'b8
+\b X
+\b0 \sub 4 \nosupersub \'92\'ef
+\f2 \'b4\'ef
+\f1\b X
+\b0 \sub 5 \nosupersub , arrived in
+\b X
+\b0 \sub 5\nosupersub from
+\b X
+\b0 \sub 4
+\f0\fs28 \nosupersub \'81\'72}</string>
+ </dict>
+ <key>Wrap</key>
+ <string>NO</string>
+ </dict>
+ <dict>
+ <key>Bounds</key>
+ <string>{{234, 249}, {148, 23}}</string>
+ <key>Class</key>
+ <string>ShapedGraphic</string>
+ <key>FitText</key>
+ <string>YES</string>
+ <key>Flow</key>
+ <string>Resize</string>
+ <key>ID</key>
+ <integer>101</integer>
+ <key>Shape</key>
+ <string>Rectangle</string>
+ <key>Style</key>
+ <dict>
+ <key>fill</key>
+ <dict>
+ <key>Draws</key>
+ <string>NO</string>
+ </dict>
+ <key>shadow</key>
+ <dict>
+ <key>Draws</key>
+ <string>NO</string>
+ </dict>
+ <key>stroke</key>
+ <dict>
+ <key>Draws</key>
+ <string>NO</string>
+ </dict>
+ </dict>
+ <key>Text</key>
+ <dict>
+ <key>Text</key>
+ <string>{\rtf1\ansi\ansicpg1252\cocoartf949\cocoasubrtf330
+{\fonttbl\f0\fnil\fcharset128 HiraMinProN-W3;\f1\fnil\fcharset128 HiraKakuProN-W3;}
+{\colortbl;\red255\green255\blue255;}
+\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\qc\pardirnatural
+
+\f0\b\fs28 \cf0 X
+\f1\b0\fs24 \sub 2
+\f0\fs28 \nosupersub \'81\'cb \'81\'71
+\f1\fs24 \'95\'7a\'98\'4e , Brown
+\f0\fs28 \'81\'72}</string>
+ </dict>
+ <key>Wrap</key>
+ <string>NO</string>
+ </dict>
+ </array>
+ <key>GridInfo</key>
+ <dict/>
+ <key>GuidesLocked</key>
+ <string>NO</string>
+ <key>GuidesVisible</key>
+ <string>YES</string>
+ <key>HPages</key>
+ <integer>1</integer>
+ <key>ImageCounter</key>
+ <integer>1</integer>
+ <key>IsPalette</key>
+ <string>NO</string>
+ <key>KeepToScale</key>
+ <false/>
+ <key>Layers</key>
+ <array>
+ <dict>
+ <key>Lock</key>
+ <string>NO</string>
+ <key>Name</key>
+ <string>Layer 1</string>
+ <key>Print</key>
+ <string>YES</string>
+ <key>View</key>
+ <string>YES</string>
+ </dict>
+ </array>
+ <key>LayoutInfo</key>
+ <dict/>
+ <key>LinksVisible</key>
+ <string>NO</string>
+ <key>MagnetsVisible</key>
+ <string>NO</string>
+ <key>MasterSheet</key>
+ <string>Master 1</string>
+ <key>MasterSheets</key>
+ <array>
+ <dict>
+ <key>ActiveLayerIndex</key>
+ <integer>0</integer>
+ <key>AutoAdjust</key>
+ <true/>
+ <key>CanvasColor</key>
+ <dict>
+ <key>w</key>
+ <string>1</string>
+ </dict>
+ <key>CanvasOrigin</key>
+ <string>{0, 0}</string>
+ <key>CanvasScale</key>
+ <real>1</real>
+ <key>ColumnAlign</key>
+ <integer>1</integer>
+ <key>ColumnSpacing</key>
+ <real>36</real>
+ <key>DisplayScale</key>
+ <string>1 cm = 1 cm</string>
+ <key>GraphicsList</key>
+ <array/>
+ <key>GridInfo</key>
+ <dict/>
+ <key>HPages</key>
+ <integer>1</integer>
+ <key>IsPalette</key>
+ <string>NO</string>
+ <key>KeepToScale</key>
+ <false/>
+ <key>Layers</key>
+ <array>
+ <dict>
+ <key>Lock</key>
+ <string>NO</string>
+ <key>Name</key>
+ <string>Layer 1</string>
+ <key>Print</key>
+ <string>YES</string>
+ <key>View</key>
+ <string>YES</string>
+ </dict>
+ </array>
+ <key>LayoutInfo</key>
+ <dict/>
+ <key>Orientation</key>
+ <integer>2</integer>
+ <key>OutlineStyle</key>
+ <string>Basic</string>
+ <key>RowAlign</key>
+ <integer>1</integer>
+ <key>RowSpacing</key>
+ <real>36</real>
+ <key>SheetTitle</key>
+ <string>Master 1</string>
+ <key>UniqueID</key>
+ <integer>1</integer>
+ <key>VPages</key>
+ <integer>1</integer>
+ </dict>
+ </array>
+ <key>ModificationDate</key>
+ <string>2008-07-01 21:05:14 +0100</string>
+ <key>Modifier</key>
+ <string>Phil Blunsom</string>
+ <key>NotesVisible</key>
+ <string>NO</string>
+ <key>Orientation</key>
+ <integer>2</integer>
+ <key>OriginVisible</key>
+ <string>NO</string>
+ <key>OutlineStyle</key>
+ <string>Basic</string>
+ <key>PageBreaks</key>
+ <string>YES</string>
+ <key>PrintInfo</key>
+ <dict>
+ <key>NSBottomMargin</key>
+ <array>
+ <string>coded</string>
+ <string>BAtzdHJlYW10eXBlZIHoA4QBQISEhAhOU051bWJlcgCEhAdOU1ZhbHVlAISECE5TT2JqZWN0AIWEASqEhAFklwCG</string>
+ </array>
+ <key>NSLeftMargin</key>
+ <array>
+ <string>coded</string>
+ <string>BAtzdHJlYW10eXBlZIHoA4QBQISEhAhOU051bWJlcgCEhAdOU1ZhbHVlAISECE5TT2JqZWN0AIWEASqEhAFklwCG</string>
+ </array>
+ <key>NSOrientation</key>
+ <array>
+ <string>int</string>
+ <string>1</string>
+ </array>
+ <key>NSPaperSize</key>
+ <array>
+ <string>size</string>
+ <string>{842, 595}</string>
+ </array>
+ <key>NSRightMargin</key>
+ <array>
+ <string>coded</string>
+ <string>BAtzdHJlYW10eXBlZIHoA4QBQISEhAhOU051bWJlcgCEhAdOU1ZhbHVlAISECE5TT2JqZWN0AIWEASqEhAFklwCG</string>
+ </array>
+ <key>NSTopMargin</key>
+ <array>
+ <string>coded</string>
+ <string>BAtzdHJlYW10eXBlZIHoA4QBQISEhAhOU051bWJlcgCEhAdOU1ZhbHVlAISECE5TT2JqZWN0AIWEASqEhAFklwCG</string>
+ </array>
+ </dict>
+ <key>ReadOnly</key>
+ <string>NO</string>
+ <key>RowAlign</key>
+ <integer>1</integer>
+ <key>RowSpacing</key>
+ <real>36</real>
+ <key>SheetTitle</key>
+ <string>Canvas 1</string>
+ <key>SmartAlignmentGuidesActive</key>
+ <string>YES</string>
+ <key>SmartDistanceGuidesActive</key>
+ <string>YES</string>
+ <key>UniqueID</key>
+ <integer>1</integer>
+ <key>UseEntirePage</key>
+ <true/>
+ <key>VPages</key>
+ <integer>1</integer>
+ <key>WindowInfo</key>
+ <dict>
+ <key>CurrentSheet</key>
+ <string>0</string>
+ <key>DrawerOpen</key>
+ <false/>
+ <key>DrawerTab</key>
+ <string>Outline</string>
+ <key>DrawerWidth</key>
+ <real>209</real>
+ <key>FitInWindow</key>
+ <false/>
+ <key>Frame</key>
+ <string>{{68, 11}, {846, 735}}</string>
+ <key>ShowRuler</key>
+ <false/>
+ <key>ShowStatusBar</key>
+ <true/>
+ <key>VisibleRegion</key>
+ <string>{{-6, -26}, {831, 621}}</string>
+ <key>Zoom</key>
+ <string>1</string>
+ </dict>
+</dict>
+</plist>
diff --git a/report/example_derivation2.pdf b/report/example_derivation2.pdf
new file mode 100644
index 00000000..b143245c
--- /dev/null
+++ b/report/example_derivation2.pdf
Binary files differ
diff --git a/report/introduction.tex b/report/introduction.tex
index 3b673c8e..12cc2705 100644
--- a/report/introduction.tex
+++ b/report/introduction.tex
@@ -81,7 +81,7 @@ We structured the workshop into three parallel but interdependent streams:
\begin{figure}
\centering
- \subfigure{\includegraphics[scale=0.5]{intro_slides/JeNeVeuxPasTravailler-hiero-labelled.pdf}}
+ \includegraphics[scale=0.5]{intro_slides/JeNeVeuxPasTravailler-hiero-labelled.pdf}
\caption{Example derivation using the Hiero grammar extraction heuristics where non-terminals have been clustered into unsupervised syntactic categories denoted by $X?$.}
\label{fig:intro_labelled_hiero}
\end{figure}
@@ -124,31 +124,24 @@ Chapter \ref{chap:training} describes this work.
The remainder of this introductory chapter provides a formal definition of SCFGs and describes the language pairs that we experimented with.
\section{Synchronous context free grammar} \label{sec:scfg}
+{\em This section will be moved to the start of Chapter 2}
+
+%\subsubsection*{Synchronous context free grammar} \label{sec:scfg}
+\begin{figure}[t]
+\begin{center}
+\includegraphics[width=0.6\columnwidth]{example_derivation2.pdf}
+\end{center}
+\caption[Derivation]{An example SCFG derivation from a Chinese source sentence which yields the English sentence: {\em ``Brown arrived in Shanghai from Beijing late last night.''}. The non-terminal alignment $\mathbf{a}$ is specified by the variable subscripts.}
+\label{fig:intro_example_derivation}
+\end{figure}
+
+The translation models discussed explored in this workshop are based on synchronous grammars.
+Here we provide a short definition of the formalism we've employed: synchronous context free grammar (SCFG).
A synchronous context free grammar (SCFG, \cite{lewis68scfg}) generalizes context-free grammars to generate strings concurrently in two (or more) languages. A string pair is generated by applying a series of paired rewrite rules of the form, $X \rightarrow \langle \mathbf{e}, \mathbf{f}, \mathbf{a} \rangle$, where $X$ is a non-terminal, $\mathbf{e}$ and $\mathbf{f}$ are strings of terminals and non-terminals and $\mathbf{a}$ specifies a one-to-one alignment between non-terminals in $\mathbf{e}$ and $\mathbf{f}$.
In the context of SMT, by assigning the source and target languages to the respective sides of a probabilistic SCFG it is possible to describe translation as the process of parsing the source sentence, which induces a parallel tree structure and translation in the target language \cite{chiang07hierarchical}.
Terminal are rewritten as pairs of strings of terminal symbols in the source and target languages. Additionally, one side of a terminal expansion may be the special symbol $\epsilon$, which indicates a null alignment which permits arbitrary insertions and deletions.
-Figure \ref{fig:scfg} shows an example derivation for Japanese to English translation using an SCFG.
-
-\begin{figure}Grammar fragment:
-\begin{eqnarray*}
-\label{rule:discont}X & \rightarrow & \langle \nt{X}{1}\ \nt{X}{2}\ \nt{X}{3},\ \nt{X}{1}\ \nt{X}{3}\ \nt{X}{2} \rangle \\
-X & \rightarrow & \langle \textrm{\emph{John-ga}},\ \textrm{\emph{John}} \rangle \\
-X & \rightarrow & \langle \textrm{\emph{ringo-o}},\ \textrm{\emph{an apple}} \rangle \\
-X & \rightarrow & \langle \textrm{\emph{tabeta}},\ \textrm{\emph{ate}} \rangle
-\end{eqnarray*}
-Sample derivation:
-\begin{eqnarray*}
-\label{derivationt}
-& &\langle \nt{S}{1},\nt{S}{1} \rangle \Rightarrow \langle \nt{X}{2},\ \nt{X}{2} \rangle \\
- & \Rightarrow& \langle \nt{X}{3}\ \nt{X}{4}\ \nt{X}{5},\ \nt{X}{3}\ \nt{X}{5}\ \nt{X}{4} \rangle \\
- & \Rightarrow &\langle \textrm{\emph{John-ga}}\ \nt{X}{4}\ \nt{X}{5},\ \textrm{\emph{John}}\ \nt{X}{5}\ \nt{X}{4} \rangle \\
- & \Rightarrow &\langle \textrm{\emph{John-ga}}\ \textrm{\emph{ringo-o}}\ \nt{X}{5},\ \textrm{\emph{John}}\ \nt{X}{5}\ \textrm{\emph{an apple}} \rangle \\
- & \Rightarrow &\langle \textrm{\emph{John-ga ringo-o tabeta}},\ \textrm{\emph{John ate an apple}} \rangle
-\end{eqnarray*}
-\caption{A fragment of an SCFG with a ternary non-terminal expansion and three terminal rules.}
-\label{fig:scfg}
-\end{figure}
+Figure \ref{fig:intro_example_derivation} is an example derivation for Chinese to English translation using an SCFG of the form that I propose to learn using non-parametric Bayesian models.
The generative story is as follows.
In the beginning was the grammar, in which we allow two types of rules: {\emph non-terminal} and {\emph terminal} expansions.
@@ -159,4 +152,20 @@ Rewrite each frontier non-terminal, $c$, using a rule chosen from our grammar ex
Repeat until there are no remaining frontier non-terminals.
The sentences in both languages can then be read off the leaves, using the rules' alignments to find the right ordering.
+\begin{figure}[t]
+ \centering
+ \subfigure{\includegraphics[scale=0.7]{intro_slides/PhraseExtraction1.pdf}}
+ \subfigure{\includegraphics[scale=0.7]{intro_slides/HieroExtraction2.pdf}}
+\caption{Extracting translation rules from aligned sentences. All the phrases obtained using the standard phrase extraction heuristics are depicted in the left figure, these are: $\langle$ Je, I $\rangle$, $\langle$ veux, want to $\rangle$, $\langle$ travailler, work $\rangle$, $\langle$ ne veux pas, do not want to $\rangle$, $\langle$ ne veux pas travailler, do not want to work $\rangle$, $\langle$ Je ne veux pas, I do not want to $\rangle$, $\langle$ Je ne veux pas travailler, I do not want to work $\rangle$. On the right is shown how a discontiguous SCFG rule is created by generalising a phrase embedded in another phrase, the extracted rule is: X $\rightarrow$ $\langle$ ne X$_1$ pas, do not X$_1$ $\rangle$.}
+\label{fig:intro_rule_extraction}
+\end{figure}
+
+The process for extracting SCFG rules is based on that used to extract translation phrases in phrase based translation systems.
+The phrase based approach \cite{koehn03} uses heuristics to extract phrase translation pairs from a word-aligned corpus.
+The phrase extraction heuristic is illustrated in Figure \ref{fig:intro_rule_extraction}.
+This heuristic extracts all phrases whose words are either not aligned, or aligned with only other words in the same phrase.
+The phrase translation probabilities are then calculated using a maximum likelihood estimation.
+The Hiero \cite{chiang07hierarchical} SCFG extraction heuristic starts from a grammar consisting of the set of contiguous phrases, wherever a phrase is wholly embedded within another a new rule is add with the embedded phrase replace by the non-terminal X.
+This process continues until all possible rules have been extracted, subject to the constraints that every rule must contain a terminal on the source side, a rule may only contain two non-terminals on its right side and that those non-terminals may not be adjacent.
+The left example in Figure \ref{fig:intro_rule_extraction} depicts this rule generalisation process.
diff --git a/report/report.tex b/report/report.tex
index 9e8cc185..2b8e045f 100755
--- a/report/report.tex
+++ b/report/report.tex
@@ -29,6 +29,7 @@
\theoremstyle{plain}
\newcommand{\nt}[2]{\textrm{#1}_{\framebox[5pt]{\scriptsize #2}}}
+\newcommand{\ind}[1]{{\fboxsep1pt\raisebox{-.5ex}{\fbox{{\tiny #1}}}}}
\begin{document}
\title{\vspace{-15mm}\LARGE {\bf Final Report}\\[2mm]