summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortrevor.cohn <trevor.cohn@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-21 17:30:41 +0000
committertrevor.cohn <trevor.cohn@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-21 17:30:41 +0000
commita592b3a5ed2dc96c62d9fe7408948d66b97e1aec (patch)
treec98f736fd521ebd76782712e93b078073c2e35a8
parentd7ec874b9f5bf8cd02e04df6622b5d01cf6b273c (diff)
Little bug fix to EM clustering
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@357 ec762483-ff6d-05da-a07a-a48fb63a330f
-rw-r--r--extools/extractor_monolingual.cc2
-rw-r--r--gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java7
2 files changed, 3 insertions, 6 deletions
diff --git a/extools/extractor_monolingual.cc b/extools/extractor_monolingual.cc
index ea3e128d..049ebc85 100644
--- a/extools/extractor_monolingual.cc
+++ b/extools/extractor_monolingual.cc
@@ -27,7 +27,7 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
("input,i", po::value<string>()->default_value("-"), "Input file")
("phrases,p", po::value<string>(), "File contatining phrases of interest")
("phrase_context_size,S", po::value<int>()->default_value(2), "Use this many words of context on left and write when writing base phrase contexts")
- ("combiner_size,c", po::value<size_t>()->default_value(800000), "Number of unique items to store in cache before writing rule counts. Set to 1 to disable cache. Set to 0 for no limit.")
+ ("combiner_size,c", po::value<size_t>()->default_value(30000), "Number of unique items to store in cache before writing rule counts. Set to 1 to disable cache. Set to 0 for no limit.")
("prune", po::value<size_t>()->default_value(0), "Prune items with count less than threshold; applies each time the cache is dumped.")
("silent", "Write nothing to stderr except errors")
("help,h", "Print this help message and exit");
diff --git a/gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java b/gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java
index 93e743fc..13ac14ba 100644
--- a/gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java
+++ b/gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java
@@ -90,10 +90,7 @@ public class PhraseCluster {
for(int phrase=0; phrase < n_phrases; phrase++)
{
if (phraseSizeLimit >= 1 && c.getPhrase(phrase).size() > phraseSizeLimit)
- {
- // System.arraycopy(pi[phrase], 0, exp_pi[phrase], 0, K);
continue;
- }
Arrays.fill(exp_pi, 1e-10);
@@ -119,8 +116,8 @@ public class PhraseCluster {
exp_pi[tag]+=p[tag]*count;
}
}
- arr.F.l1norm(exp_pi);
- pi[phrase]=exp_pi;
+ arr.F.l1normalize(exp_pi);
+ System.arraycopy(exp_pi, 0, pi[phrase], 0, K);
}
//M