2 files changed, 103 insertions, 34 deletions
diff --git a/report/SCFGs.tex b/report/SCFGs.tex
index 3441c7db..0810c95e 100644
--- a/report/SCFGs.tex
+++ b/report/SCFGs.tex
@@ -2,24 +2,15 @@
 \chapter{Synchronous context free grammars} \label{sec:scfg}
 
 
-The translation models used in this workshop are synchronous context free grammars (SCFGs).
-SCFGs \cite{lewis68scfg} generalizes context-free grammars to generate strings concurrently in two (or more) languages. A string pair is generated by applying a series of paired rewrite rules of the form, $X \rightarrow \langle \mathbf{e}, \mathbf{f}, \mathbf{a} \rangle$, where $X$ is a non-terminal, $\mathbf{e}$ and $\mathbf{f}$ are strings of terminals and non-terminals and $\mathbf{a}$ specifies a one-to-one alignment between non-terminals in $\mathbf{e}$ and $\mathbf{f}$.
-In the statistical machine translation, the two righthand sides of SCFG rules represent the source and target languages.  The process of translation occurs by parsing the source sentence, which induces a parallel tree structure and translation in the target language \cite{chiang07hierarchical}.  
-Terminal are rewritten as pairs of strings of terminal symbols in the source and target languages.  Additionally, one side of a terminal expansion may be the special symbol $\epsilon$, which indicates a null alignment which permits arbitrary insertions and deletions.
-Figure \ref{fig:toy-scfg} gives an example SCFG between Urdu and English.  Figure \ref{fig:toy-scfg-parse} shows how the SCFG is used to derive the translation of an input Urdu sentence.
-
-
-
 
 \begin{figure}
 \begin{center}
 \includegraphics[width=.6\linewidth]{SCFGs/example-scfg}
 \end{center}
-\caption{A toy example that illustrates a SCFG that can translate (romanized) Urdu into English for one sentence.    }\label{fig:toy-scfg} 
+\caption{A toy example that illustrates a SCFG that can translate (romanized) Urdu into English for one sentence.    }\label{toy-scfg} 
 \end{figure}
 
 
-
 \begin{figure}
 \begin{tabular}{lll}
 \multicolumn{3}{>{\columncolor[rgb]{0.95,0.95,0.75}}c}{The input is an Urdu sentence which is initially unanalyzed.}\\
@@ -37,39 +28,98 @@ Figure \ref{fig:toy-scfg} gives an example SCFG between Urdu and English.  Figur
 \multicolumn{3}{>{\columncolor[rgb]{0.95,0.95,0.75}}c}{Applying the S rule, means that we have a complete translation of the Urdu sentence.}\\
 \includegraphics[width=.45\linewidth]{SCFGs/urdu-step4} &  & \includegraphics[width=.45\linewidth]{SCFGs/english-step4} 
 \end{tabular}
-\caption{Using SCFGs as the underlying formalism means that the process of translation is one of parsing.  This shows how an English sentence can be generated by parsing the Urdu sentence using the rules given in Figure \ref{fig:toy-scfg}}\label{fig:toy-scfg-parse} 
+\caption{Using SCFGs as the underlying formalism means that the process of translation is one of parsing.  This shows how an English sentence can be generated by parsing the Urdu sentence using the rules given in Figure \ref{toy-scfg}}\label{toy-scfg-parse} 
+\end{figure}
+
+
+
+The translation models used in this workshop are synchronous context free grammars (SCFGs).
+SCFGs \cite{lewis68scfg}  generalize  context free grammars so they generate pairs of related strings.  
+Because they generate pairs of strings they are useful for specifying the relationship between two languages, and can be used to describe translation and re-ordering.  
+Probabilistic SCFGs can be formally defined as follows:
+\begin{itemize}
+\item T$_{S}$: a set of source-language terminal symbols
+\item T$_{T}$: a set of target-language terminal symbols
+\item N: a shared set of nonterminal symbols
+\item A set of rules of the form  $\text{X} \rightarrow \langle \gamma, \alpha, \sim, w \rangle$
+	\begin{itemize}
+	\item X $\in$ N
+	\item $\gamma$ is a sequence source terminals and non-terminals
+	\item $\alpha$ is a sequence of target terminals and non-terminals
+	\item $\sim$ is a one-to-one correspondence between the non-terminals in  $\gamma$  and $\alpha$
+	\item $w$ is a weight or probability assigned to the rule
+	\end{itemize}
+\end{itemize}
+
+
+
+A toy example of an SCFG is given in Figure \ref{toy-scfg}.  The nonterminal symbols, which are written in uppercase, are identical across the two right hand sides of the context free grammar rules, but can come in different orders. 
+The process of translation is accomplished by parsing the source language input sentence and simultaneously generating the target language output sentence.  This
+process is illustrated in Figure \ref{toy-scfg-parse}, which shows how parsing an  Urdu sentence generates an English translation using the toy example grammar.  The toy grammar and example parse omit the $w$ weights/probabilities assigned to the grammar rules.  In practice there are a huge number of alternative translations and derivations, and assigning probabilities allows us to choose the best translation according to the model, and to reduce the search space by expanding only the most promising partial translations. 
+
+\subsection{Hiero: SCFGs with one non-terminal symbol}
+
+The use of SCFGs for statistical machine translation was popularized by \citet{Chiang2005} with the introduction of the Hiero system.  Chiang's Hiero system extended the standard phrase-based approaches to statistical machine translation by allowing phrases that contain gaps.  Chiang described how these {\it hierarchical phrases} could be obtained by straightforwardly extending the standard methods \cite{Koehn2004,Koehn2003,Tillmann2003,Venugopal2003} for extracting phrase pairs from word-aligned sentence pairs.  Figure \ref{hiero-phrase-extraction} shows how a hierarchical phrase can be constructed by replacing two smaller phrase pairs with the nonterminals indexed $1$ and $2$.   These rules are written in the same format as the SCFG rules given in Figure \ref{toy-scfg}.  Although the Hiero rules are devoid of linguistic information, they are able to indicate reordering, as shown with the swapped positions of $X_1$ and $X_2$. 
+
+\begin{figure}
+\begin{center}
+\includegraphics[width=.9\linewidth]{SCFGs/hiero-phrase-extraction.pdf}
+\end{center}
+\caption{An example of a hierarchal phrase extracted from a word-aligned Chinese-English sentence pair.  Chiang's Hiero system used rules that were written in the form of synchronous grammars, but which are devoid of linguistic information.}\label{hiero-phrase-extraction}
+\end{figure}
+
+
+Rather than using the full power of the SCFG formalism, the Hiero system instead uses a simple grammar with one non-terminal symbol, X, to extend conventional phrase-based models to allow phrases with gaps in them.  The Hiero system is technically a grammar-based approach to translation, but does not incorporate any linguistic information in its grammars.  Its process of decoding is also one of parsing, and it employs the Cocke-Kasami-Younger (CKY) dynamic programming algorithm to find the best derivation using its probabilistic grammar rules.   However, because Hiero-style parses are devoid of linguistic information, they fail to capture facts about Urdu like that it is post-positional or verb final. 
+
+\subsection{Syntax-augmented SCFGs extracted from supervised parses}\label{samt}
+
+
+\begin{figure}
+\begin{center}
+\includegraphics[height=3in]{SCFGs/scfg-phrase-extraction.pdf}
+\end{center}
+\caption{An example of extracting linguistically-motivated SCFGs by labeling phrase pairs using the labels of the corresponding nodes in a parse tree.  Rules with syntactic non-terminals on the right-hand sides can be formed by replacing  phrase pairs with their non-terminal labels, as shown with the VP on the right hand sides of the second NP rule.}\label{scfg-phrase-extraction}
 \end{figure}
 
 
+SCFGs with a rich set of linguistically motivated non-terminal symbols have been proposed as an alternative to Hiero. A number of approaches have been proposed for extracting linguistically-motivated grammars from a parsed parallel corpus \cite{Galley2004,samt}.   These approaches extract SCFGs from a parallel corpus that has been parsed using a supervised parser that was trained on a treebank.  
+
+Figure \ref{scfg-phrase-extraction} shows how linguistically motivated grammar rules can be extracted from a training sentence pair that has been automatically parsed and word-aligned.    Instead of assigning the generic label ``X'' to all extracted phrase pairs, as the Hiero model does, linguistic labels are extracted from the parse tree.   
+
+In the standard phrase-based and hierarchical phrase-based approaches to machine translation, many of the ``phrases'' that are used are not phrases in the sense of syntactic constituents.  They are simply sequences of words.  For example, in Figure \ref{scfg-phrase-extraction}, the phrases {\it Australia is} and {\it one of} are examples of phrases which have consistent Chinese phrase pairs, but which do not correspond to nodes in the parse tree.  If we were to limit ourselves to extracting only phrase pairs that were licensed by the parse tree and which had consistent word alignments, then we would have reduced coverage compared to Hiero.  Instead, we adopt the methodology described by\citet{samt}, which achieves the same coverage as Hiero by generating complex labels for non-consistuent phrases.  
 
 
+\begin{figure}
+\begin{center}
+\includegraphics[height=3in]{SCFGs/scfg-ccg-phrase-extraction.pdf}
+\end{center}
+\caption{An example of a CCG-style label for the non-constituent phrase {\it Australia is}.  The label indicates that the non-constituent phrase would be an S if an NP were found to its right.  This complex label can be treated like any other label during parsing and translation. }\label{scfg-ccg-phrase-extraction}
+\end{figure}
+
 
+\citet{samt} define a framework for extracting SCFGs with linguistically motivated nonterminals from an aligned parallel corpus where one side of the parallel corpus has been parsed. The resulting context-free rules contain a rich set of nonterminal categories. These categories include traditional nonterminal categories taken directly from the monolingual parse trees (e.g. DT, NP, VP), and extended categories formed by gluing together adjacent nonterminals (e.g. NP+V, RB+JJ) and incomplete constituents that denote a category missing a child component to the left (e.g.  NP\textbackslash DT) or to the right (e.g. NP/NN) in the style of Combinatory Categorial Grammars \cite{ccg1982}.   \citet{samt}'s Syntax Augmented Machine Translation (SAMT) grammars may also include hierarchical rules and glue rules \cite{chiang:2007} that are not linguistically motivated; such rules allow partial translations to be combined (with some penalty) without regard to syntactic structure.
 
-% of the form that I propose to learn using non-parametric Bayesian models.
 
-The generative story is as follows. 
-In the beginning was the grammar, in which we allow  two types of rules: {\emph non-terminal} and {\emph terminal} expansions. 
-The former rewrites a non-terminal symbol as a string of two or three non-terminals along with an alignment $\mathbf{a}$, specifying the corresponding ordering of the child trees in the source and target language. 
-Terminal expansion rewrite a non-terminal as a pair of terminal n-grams, where either but not both may be empty. 
-Given a grammar, each sentence is generated as follows, starting with the distinguished root non-terminal, $S$. 
-Rewrite each frontier non-terminal, $c$, using a rule chosen from our grammar expanding $c$. 
-Repeat until there are no remaining frontier non-terminals. 
-The sentences in both languages can then be read off the leaves, using the rules' alignments to find the right ordering. 
 
-\begin{figure}[t]
-  \centering
-  \subfigure{\includegraphics[scale=0.7]{intro_slides/PhraseExtraction1.pdf}}
-  \subfigure{\includegraphics[scale=0.7]{intro_slides/HieroExtraction2.pdf}}
-\caption{Extracting translation rules from aligned sentences. All the phrases obtained using the standard phrase extraction heuristics are depicted in the left figure, these are: $\langle$ Je, I $\rangle$, $\langle$ veux, want to $\rangle$, $\langle$ travailler, work $\rangle$, $\langle$ ne veux pas, do not want to $\rangle$, $\langle$ ne veux pas travailler, do not want to work $\rangle$, $\langle$ Je ne veux pas, I do not want to $\rangle$, $\langle$ Je ne veux pas travailler, I do not want to work $\rangle$. On the right is shown how a discontiguous SCFG rule is created by generalising a phrase embedded in another phrase, the extracted rule is: X $\rightarrow$ $\langle$ ne X$_1$ pas, do not X$_1$ $\rangle$.}
-\label{fig:intro_rule_extraction}
+\begin{figure}
+%\begin{center}
+\includegraphics[height=3.75in]{SCFGs/hiero-tree}
+\includegraphics[height=3.75in]{SCFGs/samt-tree}
+\caption{Sample derivations for an Urdu sentence translated into English translation using a hierarchical phrase-based SCFG (left) and a linguistically motivated SCFG (right).  For reference, a human translator produced ``Hamid Ansari nominated for the post of vice president" as the translation of the Urdu. }
+%\end{center}
+\label{example-derivations}
 \end{figure}
 
-The process for extracting SCFG rules is based on that used to extract translation phrases in phrase based translation systems. 
-The phrase based approach \cite{koehn03} uses heuristics to extract phrase translation pairs from a word-aligned corpus. 
-The phrase extraction heuristic is illustrated in Figure \ref{fig:intro_rule_extraction}. 
-This heuristic extracts all phrases whose words are either not aligned, or aligned with only other words in the same phrase. 
-The phrase translation probabilities are then calculated using a maximum likelihood estimation. 
 
-The Hiero \cite{chiang07hierarchical} SCFG extraction heuristic starts from a grammar consisting of the set of contiguous phrases, wherever a phrase is wholly embedded within another a new rule is add with the embedded phrase replace by the non-terminal X. 
-This process continues until all possible rules have been extracted, subject to the constraints that every rule must contain a terminal on the source side, a rule may only contain two non-terminals on its right side and that those non-terminals may not be adjacent.
-The left example in Figure \ref{fig:intro_rule_extraction} depicts this rule generalisation process. 
+\subsection{SCFGs with syntactic labels extracted from supervised parses}\label{samt}
+
+Note that one of the major advantages of extracting the linguistic SCFG for an automatically parsed parallel corpus is that only one side of the parallel corpus needs to be parsed.  To extract an Urdu-English SCFG we therefore could use an English parser without the need for an Urdu parser.  During translation the Urdu input text gets parsed with the projected rules, but a stand-alone Urdu parser is never required. 
+However, all of the current approaches require that a parser, trained on supervised data, exist for at least one of the languages.  
+
+The methods explored in this summer workshop obviate the need for a supervised parser, and instead automatically induce the grammars from unlabeled texts.  The potential advantages of our approach are:
+\begin{enumerate}
+\item It can be applied to language pairs for which treebanks do not exist.
+\item The set of non-terminal symbols is not tied to a particular treebank formalism, and therefore may be more appropriate for translation.
+\end{enumerate} 
+
+
diff --git a/report/biblio.bib b/report/biblio.bib
index da5cc326..55615ade 100644
--- a/report/biblio.bib
+++ b/report/biblio.bib
@@ -1,3 +1,19 @@
+@string{acl-1989 = {27th Annual Meeting of the Association for Computational Linguistics (ACL-1989)}}
+@string{acl-1989-address = {Vancouver, British Columbia, Canada}}
+@string{acl-1995 = {33rd Annual Meeting of the Association for Computational Linguistics (ACL-1995)}}
+@string{acl-1995-address = {Cambridge, Massachusetts}}
+@string{acl-1996 = {34rd Annual Meeting of the Association for Computational Linguistics (ACL-1996)}}
+@string{acl-1996-address = {Santa Cruz, California}}
+@string{acl-1997 = {35th Annual Meeting of the Association for Computational Linguistics (ACL-1997)}}
+@string{acl-1997-address = {Madrid, Spain}}
+@string{acl-1998 = {36th Annual Meeting of the Association for Computational Linguistics and 17th International Conference on Computational Linguistics (ACL-CoLing-1998)}}
+@string{acl-1998-address = {Montreal, Canada}}
+@string{acl-1999 = {Proceedings of the 37th Annual Meeting of the Association for Computational Linguistics (ACL)}}
+@string{acl-1999-address = {College Park, Maryland}}
+@string{acl-2000 = {Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics (ACL-2000)}}
+@string{acl-2000-address = {Hong Kong}}
+@string{acl-2001 = {Proceedings of the 39th Annual Meeting of the Association for Computational Linguistics (ACL-2001)}}
+@string{acl-2001-address = {Toulouse, France}}
+@string{acl-2002 = {Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics (ACL-2002)}}
+@string{acl-2002-address = {Philadelphia, Pennsylvania}}
+@string{acl-2003 = {Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics (ACL-2003)}}
+@string{acl-2003-address = {Sapporo, Japan}}
+@string{acl-2004 = {Proceedings of the 42nd Annual Meeting of the Association for Computational Linguistics (ACL-2004)}}
+@string{acl-2004-address = {Barcelona, Spain}}
+@string{acl-2005 = {Proceedings of the 43rd Annual Meeting of the Association for Computational Linguistics (ACL-2005)}}
+@string{acl-2005-address = {Ann Arbor, Michigan}}
+@string{acl-2006 = {Proceedings of the 21st International Conference on Computational Linguistics and 44th Annual Meeting of the Association for Computational Linguistics (ACL-CoLing-2006)}}
+@string{acl-2006-address = {Sydney, Australia}}
+@string{acl-2007 = {Proceedings of the 45th Annual Meeting of the Association for Computational Linguistics (ACL-2007)}}
+@string{acl-2007-address = {Prague, Czech Republic}}
+@string{acl-2008 = {Proceedings of the 46th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies}}
+@string{acl-2008-address = {Colmbus, Ohio}}
+@string{acl-2009-address = {Singapore}}
+@string{amta-2002 = {Proceedings of the 5th Biennial Conference of the Association for Machine Translation in the Americas (AMTA-2002)}}
+@string{amta-2002-address = {Tiburon, California}}
+@string{amta-2004 = {Proceedings of the 6th Biennial Conference of the Association for Machine Translation in the Americas (AMTA-2004)}}
+@string{amta-2004-address = {Washington DC}}
+@string{amta-2006 = {Proceedings of the 7th Biennial Conference of the Association for Machine Translation in the Americas (AMTA-2006)}}
+@string{amta-2006-address = {Cambridge, Massachusetts}}
+@string{amta-2008 = {Proceedings of the 8th Biennial Conference of the Association for Machine Translation in the Americas (AMTA-2008)}}
+@string{amta-2008-address = {Honolulu, Hawaii}}
+@string{coling-2008 = {Proceedings of the 22nd International Conference on Computational Linguistics (COLING-2008)}}
+@string{coling-2008-address = {Manchester, England}}
+@string{eacl-1989 = {4th Conference of the European Chapter of the Association for Computational Linguistics (EACL-1989)}}
+@string{eacl-1989-address = {Manchester, England}}
+@string{eacl-2003 = {10th Conference of the European Chapter of the Association for Computational Linguistics (EACL-2003)}}
+@string{eacl-2003-address = {Budapest, Hungary}}
+@string{eacl-2006 = {11th Conference of the European Chapter of the Association for Computational Linguistics (EACL-2006)}}
+@string{eacl-2006-address = {Trento, Italy}}
+@string{eacl-2009 = {12th Conference of the European Chapter of the Association for Computational Linguistics (EACL-2009)}}
+@string{eacl-2009-address = {Athens, Greece}}
+@string{emnlp-2000 = {2000 Joint SIGDAT Conference on Empirical Methods in Natural Language Processing and Very Large Corpora}}
+@string{emnlp-2000-address = {Hong Kong}}
+@string{emnlp-2001 = {Proceedings of the 2001 Conference on Empirical Methods in Natural Language Processing (EMNLP-2001)}}
+@string{emnlp-2001-address = {Pittsburgh, Pennsylvania}}
+@string{emnlp-2002 = {Proceedings of the 2002 Conference on Empirical Methods in Natural Language Processing (EMNLP-2002)}}
+@string{emnlp-2002-address = {Philadelphia, Pennsylvania}}
+@string{emnlp-2003 = {Proceedings of the 2003 Conference on Empirical Methods in Natural Language Processing (EMNLP-2003)}}
+@string{emnlp-2003-address = {Sapporo, Japan}}
+@string{emnlp-2004 = {Proceedings of the 2004 Conference on Empirical Methods in Natural Language Processing (EMNLP-2004)}}
+@string{emnlp-2004-address = {Barcelona, Spain}}
+@string{emnlp-2005 = {Proceedings of the 2005 Conference on Empirical Methods in Natural Language Processing (EMNLP-2005)}}
+@string{emnlp-2005-address = {Vancouver, British Columbia., Canada}}
+@string{emnlp-2006 = {Proceedings of the 2006 Conference on Empirical Methods in Natural Language Processing (EMNLP-2006)}}
+@string{emnlp-2006-address = {Sydney, Australia}}
+@string{emnlp-2007 = {Proceedings of the 2007 Joint Conference on Empirical Methods in Natural Language Processing and Computational Natural Language Learning (EMNLP-CoNLL)}}
+@string{emnlp-2007-address = {Prague, Czech Republic}}
+@string{emnlp-2008 = {Proceedings of the 2008 Conference on Empirical Methods in Natural Language Processing (EMNLP-2008)}}
+@string{emnlp-2008-address = {Honolulu, Hawaii}}
+@string{emnlp-2009 = {Proceedings of the 2009 Conference on Empirical Methods in Natural Language Processing (EMNLP-2009)}}
+@string{emnlp-2009-address = {Singapore}}
+@string{hlt-2002 = {Proceedings of Second International Conference on Human Language Technology Research (HLT-02)}}
+@string{hlt-2002-address = {San Diego}}
+@string{hlt-naacl-2003 = {Proceedings of the Human Language Technology Conference of the North American chapter of the Association for Computational Linguistics (HLT/NAACL-2003)}}
+@string{hlt-naacl-2003-address = {Edmonton, Alberta}}
+@string{hlt-naacl-2004 = {Proceedings of the Human Language Technology Conference of the North American chapter of the Association for Computational Linguistics (HLT/NAACL-2004)}}
+@string{hlt-naacl-2004-address = {Boston, Massachusetts}}
+@string{hlt-naacl-2006 = {Proceedings of the Human Language Technology Conference of the North American chapter of the Association for Computational Linguistics (HLT/NAACL-2006)}}
+@string{hlt-naacl-2006-address = {New York, New York}}
+@string{hlt-naacl-2007 = {Proceedings of the Human Language Technology Conference of the North American chapter of the Association for Computational Linguistics (HLT/NAACL-2007)}}
+@string{hlt-naacl-2007-address = {Rochester, New York}}
+@string{hlt-naacl-2009-address = {Boulder, Colorado}}
+@string{iwpt = {Proceedings of the International Workshop on Parsing Technologies}}
+@string{iwpt-2005-address = {Vancouver, BC, Canada}}
+@string{iwslt = {Proceedings of the International Workshop on Spoken Language Technology}}
+@string{kdd = {Proceeding of the ACM SIGKDD international conference on Knowledge discovery and data mining}}
+@string{kdd-2008-address = {New York}}
+@string{mt-summit-9-address = {New Orleans, Louisiana}}
+@string{naacl-2001 = {Second Meeting of the North American Chapter of the Association for Computational Linguistics}}
+@string{naacl-2001-address = {Pittsburgh, Pennsylvania}}
+@string{wmt = {Proceedings of the Workshop on Statistical Machine Translation}}
+
+@inproceedings{Chiang2005,
+	Address = acl-2005-address,
+	Author = {David Chiang},
+	Booktitle = {Proceedings of the 43rd Annual Meeting of the Association for Computational Linguistics (ACL-2005)}},
+	Title = {A Hierarchical Phrase-based Model for Statistical Machine Translation},
+	Year = {2005}}
+
+
+@inproceedings{Koehn2003,
+	Address = hlt-naacl-2003-address,
+	Author = {Philipp Koehn and  Franz Josef Och and Daniel Marcu},
+	Booktitle = {{Proceedings of the Human Language Technology Conference of the North American chapter of the Association for Computational Linguistics (HLT/NAACL-2003)}},
+	Title = {Statistical Phrase-Based Translation},
+	Url = {http://www.isi.edu/~koehn/publications/phrase2003.pdf},
+	Year = {2003}}
+
+
+@inproceedings{Koehn2004,
+	Address = amta-2004-address,
+	Author = {Philipp Koehn},
+	Booktitle = amta-2004,
+	Title = {Pharaoh: A Beam Search Decoder for Phrase-Based Statistical Machine Translation Models},
+	Url = {http://www.iccs.informatics.ed.ac.uk/~pkoehn/publications/pharaoh-amta2004.pdf},
+	Year = {2004}}
+
+
+@inproceedings{Tillmann2003,
+	Address = emnlp-2003-address,
+	Author = {Christoph Tillmann},
+	Booktitle = emnlp-2003,
+	Title = {A Projection Extension Algorithm for Statistical Machine Translation},
+	Url = {http://acl.ldc.upenn.edu/W/W03/W03-1001.pdf},
+	Year = {2003}}
+
+
+
+@inproceedings{Venugopal2003,
+	Address = acl-2003-address,
+	Author = {Ashish Venugopal and Stephan Vogel and Alex Waibel },
+	Booktitle = acl-2003,
+	Title = {Effective Phrase Translation Extraction from Alignment Models},
+	Year = {2003}}
+
+
+@inproceedings{Galley2004,
+	Address = hlt-naacl-2004-address,
+	Author = {Galley, Michel  and  Hopkins, Mark  and  Knight, Kevin  and  Marcu, Daniel},
+	Booktitle = hlt-naacl-2004,
+	Title = {What's in a translation rule?},
+	Url = {http://aclweb.org/anthology-new/N/N04/N04-1035.bib},
+	Year = {2004}}
+
+
+@article{ccg1982,
+	Author = {Anthony Ades and Mark Steedman},
+	Journal = {Linguistics and Philosophy},
+	Pages = {517-558},
+	Title = {On the Order of Words},
+	Volume = {4},
+	Year = {1982}}
+
 
 @article{chiang:2007,
         Author = {David Chiang},
@@ -8,6 +24,8 @@
         Volume = {33},
         Year = {2007}}
 
+
+
 @inproceedings{blunsom:2009,
  author = {Blunsom, Phil and Cohn, Trevor and Goldwater, Sharon and Johnson, Mark},
  title = {A note on the implementation of hierarchical {Dirichlet} processes},
@@ -36,6 +54,7 @@
 }
 
 
+
 @inproceedings{bleu,
         Author = {Kishore Papineni and Salim Roukos and Todd Ward and Wei-Jing Zhu},
         Booktitle = {Proceedings of the 40th Annual Meeting of the ACL},