diff options
author | trevor.cohn <trevor.cohn@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-21 17:30:41 +0000 |
---|---|---|
committer | trevor.cohn <trevor.cohn@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-21 17:30:41 +0000 |
commit | a592b3a5ed2dc96c62d9fe7408948d66b97e1aec (patch) | |
tree | c98f736fd521ebd76782712e93b078073c2e35a8 | |
parent | d7ec874b9f5bf8cd02e04df6622b5d01cf6b273c (diff) |
Little bug fix to EM clustering
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@357 ec762483-ff6d-05da-a07a-a48fb63a330f
-rw-r--r-- | extools/extractor_monolingual.cc | 2 | ||||
-rw-r--r-- | gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java | 7 |
2 files changed, 3 insertions, 6 deletions
diff --git a/extools/extractor_monolingual.cc b/extools/extractor_monolingual.cc index ea3e128d..049ebc85 100644 --- a/extools/extractor_monolingual.cc +++ b/extools/extractor_monolingual.cc @@ -27,7 +27,7 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) { ("input,i", po::value<string>()->default_value("-"), "Input file") ("phrases,p", po::value<string>(), "File contatining phrases of interest") ("phrase_context_size,S", po::value<int>()->default_value(2), "Use this many words of context on left and write when writing base phrase contexts") - ("combiner_size,c", po::value<size_t>()->default_value(800000), "Number of unique items to store in cache before writing rule counts. Set to 1 to disable cache. Set to 0 for no limit.") + ("combiner_size,c", po::value<size_t>()->default_value(30000), "Number of unique items to store in cache before writing rule counts. Set to 1 to disable cache. Set to 0 for no limit.") ("prune", po::value<size_t>()->default_value(0), "Prune items with count less than threshold; applies each time the cache is dumped.") ("silent", "Write nothing to stderr except errors") ("help,h", "Print this help message and exit"); diff --git a/gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java b/gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java index 93e743fc..13ac14ba 100644 --- a/gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java +++ b/gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java @@ -90,10 +90,7 @@ public class PhraseCluster { for(int phrase=0; phrase < n_phrases; phrase++)
{
if (phraseSizeLimit >= 1 && c.getPhrase(phrase).size() > phraseSizeLimit)
- {
- // System.arraycopy(pi[phrase], 0, exp_pi[phrase], 0, K);
continue;
- }
Arrays.fill(exp_pi, 1e-10);
@@ -119,8 +116,8 @@ public class PhraseCluster { exp_pi[tag]+=p[tag]*count;
}
}
- arr.F.l1norm(exp_pi);
- pi[phrase]=exp_pi;
+ arr.F.l1normalize(exp_pi);
+ System.arraycopy(exp_pi, 0, pi[phrase], 0, K);
}
//M
|