From 19458770c803510d2cbf0834f90a5b53da606d65 Mon Sep 17 00:00:00 2001 From: "trevor.cohn" Date: Wed, 21 Jul 2010 17:30:41 +0000 Subject: Little bug fix to EM clustering git-svn-id: https://ws10smt.googlecode.com/svn/trunk@357 ec762483-ff6d-05da-a07a-a48fb63a330f --- extools/extractor_monolingual.cc | 2 +- gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java | 7 ++----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/extools/extractor_monolingual.cc b/extools/extractor_monolingual.cc index ea3e128d..049ebc85 100644 --- a/extools/extractor_monolingual.cc +++ b/extools/extractor_monolingual.cc @@ -27,7 +27,7 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) { ("input,i", po::value()->default_value("-"), "Input file") ("phrases,p", po::value(), "File contatining phrases of interest") ("phrase_context_size,S", po::value()->default_value(2), "Use this many words of context on left and write when writing base phrase contexts") - ("combiner_size,c", po::value()->default_value(800000), "Number of unique items to store in cache before writing rule counts. Set to 1 to disable cache. Set to 0 for no limit.") + ("combiner_size,c", po::value()->default_value(30000), "Number of unique items to store in cache before writing rule counts. Set to 1 to disable cache. Set to 0 for no limit.") ("prune", po::value()->default_value(0), "Prune items with count less than threshold; applies each time the cache is dumped.") ("silent", "Write nothing to stderr except errors") ("help,h", "Print this help message and exit"); diff --git a/gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java b/gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java index 93e743fc..13ac14ba 100644 --- a/gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java +++ b/gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java @@ -90,10 +90,7 @@ public class PhraseCluster { for(int phrase=0; phrase < n_phrases; phrase++) { if (phraseSizeLimit >= 1 && c.getPhrase(phrase).size() > phraseSizeLimit) - { - // System.arraycopy(pi[phrase], 0, exp_pi[phrase], 0, K); continue; - } Arrays.fill(exp_pi, 1e-10); @@ -119,8 +116,8 @@ public class PhraseCluster { exp_pi[tag]+=p[tag]*count; } } - arr.F.l1norm(exp_pi); - pi[phrase]=exp_pi; + arr.F.l1normalize(exp_pi); + System.arraycopy(exp_pi, 0, pi[phrase], 0, K); } //M -- cgit v1.2.3