From 1090a065dc48211dd71f4980cf8ff34e47333ad0 Mon Sep 17 00:00:00 2001 From: redpony Date: Mon, 23 Aug 2010 21:57:46 +0000 Subject: correlation stuff git-svn-id: https://ws10smt.googlecode.com/svn/trunk@617 ec762483-ff6d-05da-a07a-a48fb63a330f --- report/np_clustering.tex | 64 ++++++--- report/pyp_clustering/bleu_condentropy.R | 5 + report/pyp_clustering/bleu_condentropy.dat | 13 ++ report/pyp_clustering/correl.pdf | 215 +++++++++++++++++++++++++++++ 4 files changed, 277 insertions(+), 20 deletions(-) create mode 100644 report/pyp_clustering/bleu_condentropy.R create mode 100644 report/pyp_clustering/bleu_condentropy.dat create mode 100644 report/pyp_clustering/correl.pdf diff --git a/report/np_clustering.tex b/report/np_clustering.tex index 439bfdbe..da564508 100644 --- a/report/np_clustering.tex +++ b/report/np_clustering.tex @@ -64,8 +64,8 @@ The final sample drawn from the model was used to estimate $p(z|\textbf{c},\p)$, \begin{figure} \begin{center} -\includegraphics[scale=0.75]{pyp_clustering/llh.pdf} -\vspace{-0.3cm} +\includegraphics[scale=0.5]{pyp_clustering/llh.pdf} +\vspace{-0.7cm} \end{center} \caption{Log-likelihood versus number of samples with 10 categories (red circles) and 25 categories (blue diamonds) on the Urdu data, 1 target word on either side, hierarchical $\theta_0$, uniform $\phi_0$.} \label{fig:llh} @@ -99,7 +99,7 @@ Num. references & 16 & 4 \label{tab:corpbtecur} \end{table}% -\subsection{Features used} +\subsection{Translation system features} In addition to the language model and word penalty, we made use of the following features to score each rule $\textrm{Y} \rightarrow \langle \textbf{f},\textbf{e} \rangle$ used in a derivation. Some features are meaningful only in the context of multi-category grammars. @@ -116,11 +116,13 @@ In addition to the language model and word penalty, we made use of the following \noindent The above feature weights were tuned using the minimum error rate training algorithm (\textsc{mert}), to optimize the 1-best \textsc{bleu} on a held-out development set. -\subsection{Baseline and benchmark systems} +\subsection{Baseline and supervised benchmark systems} -We provide two baseline systems: a single-category system constructed using the procedure described by \cite{chiang:2007} and a system constructed by assigning categories to each phrasal occurrence in the training data. Additionally, we provide a benchmark system using supervised English (target) language parse trees \citep{samt}. Table~\ref{tab:npbaselines} summarizes these baseline conditions. +We provide a number of baselines to compare our unsupervised syntax systems against. The most important is a single-category system constructed using the procedure described by \cite{chiang:2007} The single-category baseline represents the current state-of-the-art for systems that do not utilize supervised syntax or syntax proxies (like POS tags or syntactic chunks). The random category baselines are provided to give an indication of how a poorly induced syntactic system (for a given $K$) would perform. -\begin{table}[h] +Additionally, we also provide two benchmark systems that use a more sensible set of nonterminal categories. The first uses supervised English (target) language parse trees to annotate the phrases in the grammar as proposed by \cite{samt}. The second (labeled Target POS-only) uses the target language part-of-speech tag for all rules that generate only a single terminal symbol in the target language, and the symbol X otherwise. + +\begin{table} \caption{Baseline systems} \begin{center} \begin{tabular}{r|c|c} @@ -133,16 +135,18 @@ Random ($K=25$) & 55.4 & 19.7 \\ Random ($K=50$) & 55.3 & 19.6 \\ \hline Supervised \citep{samt} & 57.8 & 24.5 \\ -POS-only & 56.2 & 22.3 \\ +Target POS-only ({\emph supervised}) & 56.2 & 22.2 \\ \end{tabular} \end{center} \label{tab:npbaselines} \end{table}% -Because the margin of improvement from the 1-category baseline to the supervised condition is much more substantial in the Urdu-English condition than in the BTEC condition, some experiments were only carried out on Urdu. - \subsection{Number of categories} +The number of categories, $K$, is a free parameter in our nonparametric clustering model. In this section, we report results exploring the effect of $K$ on translation quality on the BTEC and Urdu translation tasks. + +Preliminary experiments indicated that single word (to the left and right) target language contexts learned with with a uniform $\phi_0$ and a hierarchical $\theta_0$ produced useful clusters for translation, so we used this as the definition of the context for this experiment. Table~\ref{tab:npvaryk} summarizes the affect of varying $K$ using a single word of target language context, using a uniform $\phi_0$ and a hierarchical $\theta_0$. + \begin{table}[h] \caption{Effect of varying $K$, single word left and right target language context, uniform $\phi_0$, hierarchical $\theta_0$.} \begin{center} @@ -152,33 +156,53 @@ Because the margin of improvement from the 1-category baseline to the supervised Single category (baseline) & 57.0 & 21.1 \\ \hline $K=10$ & 56.4 & 21.2 \\ -$K=25$ & 57.5 & 22.0 \\ -$K=50$ & 56.2 & \\ +$K=25$ & \textbf{57.5} & \textbf{22.0} \\ +$K=50$ & 56.2 & 21.4 \\ \end{tabular} \end{center} -\label{tab:npbaselines} +\label{tab:npvaryk} \end{table}% \subsection{Context types} +Because the margin of improvement from the 1-category baseline to the supervised condition is much more substantial in the Urdu-English condition than in the BTEC condition, the experiments in this section were carried out only on Urdu. + + \begin{table}[h] -\caption{Effect of varying the context definition and/or smoothing, $K=25$, hierarchical $\theta_0$.} +\caption{Effect of varying the context definition and/or smoothing, $K=25$, hierarchical $\theta_0$; best results in bold. Baseline and benchmark systems also included for reference.} \begin{center} -\begin{tabular}{r|c|c} -& BTEC & Urdu \\ +\begin{tabular}{r|c|c|c|c} +Context Type & $|\textbf{c}|/2$ & $\phi_0$ & \textsc{bleu} & $H(S|Z)$ \\ \hline -Single category (baseline) & 57.0 & 21.1 \\ +Baseline ($K=1$) & -- & -- & 20.9 & 4.49 \\ +\hline +Source word & 1 & uniform & 21.7 & 3.25 \\ +Source word class & 1 & uniform & 20.4 & 3.03 \\ +Target word & 1 & uniform & 22.0 & 2.86 \\ +Target word class & 1 & uniform & \textbf{22.3} & \textbf{2.27} \\ +Source word & 2 & 1-word backoff & 21.3 & 3.41 \\ +Source word class & 2 & 1-class backoff & & 4.20 \\ +Target word & 2 & 1-word backoff & 20.8 & 3.16 \\ +Target word class & 2 & 1-class backoff & 20.1 & 4.06 \\ \hline -1-word target & & \\ -1-word source & & \\ -2-words target & & \\ -2-words source & & \\ +Supervised \citep{samt} & -- & -- & 24.6 & 0 \\ +Target POS-only ({\emph supervised}) & 1 & uniform & 22.2 & 1.85 \\ \end{tabular} \end{center} \label{tab:npbaselines} \end{table}% +\subsection{Correlating the intrinsic metric} + +\begin{figure} +\begin{center} +\includegraphics[scale=0.5]{pyp_clustering/correl.pdf} +\vspace{-0.3cm} +\end{center} +\caption{The intrinsic conditional entropy metric $H(S|Z)$ correlates approximately linearly with \textsc{bleu}.} +\label{fig:intr_correl} +\end{figure} \section{Discussion} diff --git a/report/pyp_clustering/bleu_condentropy.R b/report/pyp_clustering/bleu_condentropy.R new file mode 100644 index 00000000..8c6fb1b7 --- /dev/null +++ b/report/pyp_clustering/bleu_condentropy.R @@ -0,0 +1,5 @@ +data <- read.table("urdu.dat", header=TRUE) +attach(data) +data.lm <- glm(BLEU ~ ConditionalEntropy) +plot(data$ConditionalEntropy,data$BLEU) +abline(data.lm) diff --git a/report/pyp_clustering/bleu_condentropy.dat b/report/pyp_clustering/bleu_condentropy.dat new file mode 100644 index 00000000..125d9424 --- /dev/null +++ b/report/pyp_clustering/bleu_condentropy.dat @@ -0,0 +1,13 @@ +ConditionalEntropy BLEU +4.45 19.7 +4.49 20.9 +0 24.6 +3.25 21.7 +2.86 22.0 +3.03 20.4 +2.29 21.8 +1.85 22.2 +2.27 22.3 +3.41 21.3 +3.16 20.8 +4.06 20.1 diff --git a/report/pyp_clustering/correl.pdf b/report/pyp_clustering/correl.pdf new file mode 100644 index 00000000..5bd563be --- /dev/null +++ b/report/pyp_clustering/correl.pdf @@ -0,0 +1,215 @@ +%PDF-1.4 +%âãÏÓ\r +1 0 obj +<< +/CreationDate (D:20100823175418) +/ModDate (D:20100823175418) +/Title (R Graphics Output) +/Producer (R 2.11.1) +/Creator (R) +>> +endobj +2 0 obj +<< +/Type /Catalog +/Pages 3 0 R +>> +endobj +5 0 obj +<< +/Type /Page +/Parent 3 0 R +/Contents 6 0 R +/Resources 4 0 R +>> +endobj +6 0 obj +<< +/Length 7 0 R +>> +stream +1 J 1 j q +Q q 59.04 73.44 414.72 371.52 re W n +0.000 0.000 0.000 RG +0.75 w +[] 0 d +1 J +1 j +10.00 M +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 452.02 84.60 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 455.44 168.85 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 71.44 428.60 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 349.39 225.01 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 316.04 246.07 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 330.57 133.75 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 267.29 232.03 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 229.66 260.11 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 265.58 267.14 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 363.07 196.93 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 341.69 161.83 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 418.66 112.69 Tm (l) Tj 0 Tr +ET +Q q +0.000 0.000 0.000 RG +0.75 w +[] 0 d +1 J +1 j +10.00 M +74.40 73.44 m 416.49 73.44 l S +74.40 73.44 m 74.40 66.24 l S +159.92 73.44 m 159.92 66.24 l S +245.45 73.44 m 245.45 66.24 l S +330.97 73.44 m 330.97 66.24 l S +416.49 73.44 m 416.49 66.24 l S +BT +0.000 0.000 0.000 rg +/F2 1 Tf 12.00 0.00 -0.00 12.00 71.06 47.52 Tm (0) Tj +ET +BT +/F2 1 Tf 12.00 0.00 -0.00 12.00 156.59 47.52 Tm (1) Tj +ET +BT +/F2 1 Tf 12.00 0.00 -0.00 12.00 242.11 47.52 Tm (2) Tj +ET +BT +/F2 1 Tf 12.00 0.00 -0.00 12.00 327.63 47.52 Tm (3) Tj +ET +BT +/F2 1 Tf 12.00 0.00 -0.00 12.00 413.16 47.52 Tm (4) Tj +ET +59.04 108.26 m 59.04 389.08 l S +59.04 108.26 m 51.84 108.26 l S +59.04 178.47 m 51.84 178.47 l S +59.04 248.67 m 51.84 248.67 l S +59.04 318.87 m 51.84 318.87 l S +59.04 389.08 m 51.84 389.08 l S +BT +/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 101.59 Tm (20) Tj +ET +BT +/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 171.79 Tm (21) Tj +ET +BT +/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 242.00 Tm (22) Tj +ET +BT +/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 312.20 Tm (23) Tj +ET +BT +/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 382.41 Tm (24) Tj +ET +59.04 73.44 m +473.76 73.44 l +473.76 444.96 l +59.04 444.96 l +59.04 73.44 l +S +Q q +BT +0.000 0.000 0.000 rg +/F2 1 Tf 12.00 0.00 -0.00 12.00 215.89 18.72 Tm [(ConditionalEntrop) 30 (y)] TJ +ET +BT +/F2 1 Tf 0.00 12.00 -12.00 0.00 12.96 243.53 Tm (BLEU) Tj +ET +Q q 59.04 73.44 414.72 371.52 re W n +0.000 0.000 0.000 RG +0.75 w +[] 0 d +1 J +1 j +10.00 M +59.04 419.37 m 473.76 96.26 l S +Q +endstream +endobj +7 0 obj +2296 +endobj +3 0 obj +<< +/Type /Pages +/Kids [ +5 0 R +] +/Count 1 +/MediaBox [0 0 504 504] +>> +endobj +4 0 obj +<< +/ProcSet [/PDF /Text] +/Font << /F1 9 0 R /F2 10 0 R >> +/ExtGState << >> +>> +endobj +8 0 obj +<< +/Type /Encoding +/BaseEncoding /WinAnsiEncoding +/Differences [ 45/minus 96/quoteleft +144/dotlessi /grave /acute /circumflex /tilde /macron /breve /dotaccent +/dieresis /.notdef /ring /cedilla /.notdef /hungarumlaut /ogonek /caron /space] +>> +endobj +9 0 obj +<< +/Type /Font +/Subtype /Type1 +/Name /F1 +/BaseFont /ZapfDingbats +>> +endobj +10 0 obj << +/Type /Font +/Subtype /Type1 +/Name /F2 +/BaseFont /Helvetica +/Encoding 8 0 R +>> endobj +xref +0 11 +0000000000 65535 f +0000000021 00000 n +0000000164 00000 n +0000002662 00000 n +0000002745 00000 n +0000000213 00000 n +0000000293 00000 n +0000002642 00000 n +0000002838 00000 n +0000003095 00000 n +0000003178 00000 n +trailer +<< +/Size 11 +/Info 1 0 R +/Root 2 0 R +>> +startxref +3275 +%%EOF -- cgit v1.2.3