From 4037e35c511aec96f780276aa4e3c1493e19eba1 Mon Sep 17 00:00:00 2001 From: "trevor.cohn" Date: Thu, 15 Jul 2010 22:48:44 +0000 Subject: Option to run on single word phrases before moving to larger ones. git-svn-id: https://ws10smt.googlecode.com/svn/trunk@272 ec762483-ff6d-05da-a07a-a48fb63a330f --- .../prjava/src/phrase/PhraseCluster.java | 67 ++++++++++++++++++---- 1 file changed, 55 insertions(+), 12 deletions(-) (limited to 'gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java') diff --git a/gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java b/gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java index a369b319..5efaf52e 100644 --- a/gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java +++ b/gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java @@ -69,16 +69,29 @@ public class PhraseCluster { pool = Executors.newFixedThreadPool(threads); } - public double EM() + public double EM(boolean skipBigPhrases) { double [][][]exp_emit=new double [K][n_positions][n_words]; double [][]exp_pi=new double[n_phrases][K]; + if (skipBigPhrases) + { + for(double [][]i:exp_emit) + for(double []j:i) + Arrays.fill(j, 1e-100); + } + double loglikelihood=0; //E for(int phrase=0; phrase < n_phrases; phrase++) { + if (skipBigPhrases && c.getPhrase(phrase).size() >= 2) + { + System.arraycopy(pi[phrase], 0, exp_pi[phrase], 0, K); + continue; + } + List contexts = c.getEdgesForPhrase(phrase); for (int ctx=0; ctx= 2) + { + System.arraycopy(pi[phrase], 0, exp_pi[phrase], 0, K); + continue; + } + PhraseObjective po = new PhraseObjective(this, phrase, scalePT, (cacheLambda) ? lambdaPT[phrase] : null); boolean ok = po.optimizeWithProjectedGradientDescent(); if (!ok) ++failures; @@ -292,7 +319,7 @@ public class PhraseCluster { return primal; } - public double PREM_phrase_constraints_parallel(final double scalePT) + public double PREM_phrase_constraints_parallel(final double scalePT, boolean skipBigPhrases) { assert(pool != null); @@ -302,10 +329,17 @@ public class PhraseCluster { double [][][]exp_emit=new double [K][n_positions][n_words]; double [][]exp_pi=new double[n_phrases][K]; + if (skipBigPhrases) + { + for(double [][]i:exp_emit) + for(double []j:i) + Arrays.fill(j, 1e-100); + } + double loglikelihood=0, kl=0, l1lmax=0, primal=0; final AtomicInteger failures = new AtomicInteger(0); final AtomicLong elapsed = new AtomicLong(0l); - int iterations=0; + int iterations=0, n=n_phrases; long start = System.currentTimeMillis(); if (lambdaPT == null && cacheLambda) @@ -313,6 +347,12 @@ public class PhraseCluster { //E for(int phrase=0;phrase= 2) + { + n -= 1; + System.arraycopy(pi[phrase], 0, exp_pi[phrase], 0, K); + continue; + } final int p=phrase; pool.execute(new Runnable() { public void run() { @@ -337,7 +377,7 @@ public class PhraseCluster { } // aggregate the expectations as they become available - for(int count=0;count