From 0c901842ddb907fd45d29bdece5b48d42a599616 Mon Sep 17 00:00:00 2001 From: desaicwtf Date: Wed, 21 Jul 2010 14:53:58 +0000 Subject: corpus reads optional tags from data, EM trains with those tags, fix a bug in PhraseCluster where phrase priors are not learned git-svn-id: https://ws10smt.googlecode.com/svn/trunk@354 ec762483-ff6d-05da-a07a-a48fb63a330f --- .../prjava/src/phrase/PhraseCluster.java | 27 +++++++++++++--------- 1 file changed, 16 insertions(+), 11 deletions(-) (limited to 'gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java') diff --git a/gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java b/gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java index 560100d4..93e743fc 100644 --- a/gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java +++ b/gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java @@ -78,13 +78,11 @@ public class PhraseCluster { public double EM(int phraseSizeLimit) { double [][][]exp_emit=new double [K][n_positions][n_words]; - double [][]exp_pi=new double[n_phrases][K]; + double []exp_pi=new double[K]; for(double [][]i:exp_emit) for(double []j:i) Arrays.fill(j, 1e-10); - for(double []j:pi) - Arrays.fill(j, 1e-10); double loglikelihood=0; @@ -93,10 +91,12 @@ public class PhraseCluster { { if (phraseSizeLimit >= 1 && c.getPhrase(phrase).size() > phraseSizeLimit) { - System.arraycopy(pi[phrase], 0, exp_pi[phrase], 0, K); + // System.arraycopy(pi[phrase], 0, exp_pi[phrase], 0, K); continue; } + Arrays.fill(exp_pi, 1e-10); + List contexts = c.getEdgesForPhrase(phrase); for (int ctx=0; ctx=0){ + prob=new double[K]; + prob[edge.getTag()]=1; + return prob; + } + if (edge.getPhraseId() < n_phrases) prob = Arrays.copyOf(pi[edge.getPhraseId()], K); else -- cgit v1.2.3