From 2b8a391ccd64fddaa990a41c66c30d88b2331ad3 Mon Sep 17 00:00:00 2001 From: desaicwtf Date: Tue, 17 Aug 2010 10:43:20 +0000 Subject: git-svn-id: https://ws10smt.googlecode.com/svn/trunk@570 ec762483-ff6d-05da-a07a-a48fb63a330f --- report/pr-clustering/posterior.tex | 41 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/report/pr-clustering/posterior.tex b/report/pr-clustering/posterior.tex index 7597c8e1..ebb05211 100644 --- a/report/pr-clustering/posterior.tex +++ b/report/pr-clustering/posterior.tex @@ -221,4 +221,45 @@ with an intrinsic evaluation of clustering quality by comparing against a supervised CFG parser trained on the tree bank. +We are mainly working on Urdu-English language pair. +Urdu has very +different word ordering from English. +This leaves us room for improvement over +phrase-based systems. +Here in Table \ref{tab:results} +we show BLEU scores as well as +conditional entropy for each of the models above +on Urdu data. Conditional entropy is computed +as the entropy of ``gold'' labelling given +the predicted clustering. ``Gold'' labelling +distribution +is obtained from Collins parser +trained on Penn Treebank. Since not +all phrases are constituents, we ignored +phrases that don't correspond any constituents. +\begin{table}[h] + \centering + \begin{tabular}{ |*{3}{c|} } + \hline + model & BLEU & H(Gold$|$Predicted)\\ + \hline + hiero & 21.1 & 5.77\\ + hiero+POS & & \\ + SAMT & & \\ + EM & & \\ + pr100 & & \\ + agree-language & & \\ + agree direction & &\\ + non-parametric & & \\ + \hline + \end{tabular} + \caption + {Evaluation of PR models. + Left column shows BLEU scores + through the translation pipeline. + Right columns shows conditional entropy + of the + } + \label{tab:results} +\end{table} -- cgit v1.2.3