From 1c6ba93d7f9d46186b05c07cd5208793554c06af Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Thu, 10 Mar 2011 14:01:39 -0500 Subject: configuration for starcluster clusters --- environment/LocalConfig.pm | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/environment/LocalConfig.pm b/environment/LocalConfig.pm index 10933f36..7ea1b416 100644 --- a/environment/LocalConfig.pm +++ b/environment/LocalConfig.pm @@ -12,6 +12,10 @@ my $host = domainname; # keys are: HOST_REGEXP, MERTMem, QSubQueue, QSubMemFlag, QSubExtraFlags my $CCONFIG = { + 'StarCluster' => { + 'HOST_REGEXP' => qr/compute-\d+\.internal$/, + 'QSubMemFlag' => '-l mem', + }, 'LTICluster' => { 'HOST_REGEXP' => qr/^cluster\d+\.lti\.cs\.cmu\.edu$/, 'QSubMemFlag' => '-l h_vmem=', @@ -37,8 +41,8 @@ my $CCONFIG = { 'QSubMemFlag' => '-l pmem=', }, 'LOCAL' => { - 'HOST_REGEXP' => qr/local\.net$/, - 'QSubMemFlag' => '', + 'HOST_REGEXP' => qr/local\./, + 'QSubMemFlag' => ' ', }, }; -- cgit v1.2.3 From faf9e73708e745e43405531c6fc1a33bd192260c Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Thu, 10 Mar 2011 16:10:34 -0500 Subject: Fix broken klm file for de compounding --- compound-split/de/charlm.rev.5gm.de.klm | Bin 14148755 -> 17376695 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/compound-split/de/charlm.rev.5gm.de.klm b/compound-split/de/charlm.rev.5gm.de.klm index 574898dc..e8d114bd 100644 Binary files a/compound-split/de/charlm.rev.5gm.de.klm and b/compound-split/de/charlm.rev.5gm.de.klm differ -- cgit v1.2.3 From f7d23a8ea67e9d7e58e9e5d08465c5d1f70d482d Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Thu, 10 Mar 2011 16:11:31 -0500 Subject: updated test arpa file --- klm/lm/test.arpa | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/klm/lm/test.arpa b/klm/lm/test.arpa index 9d674e83..ef214eae 100644 --- a/klm/lm/test.arpa +++ b/klm/lm/test.arpa @@ -1,17 +1,17 @@ \data\ -ngram 1=34 -ngram 2=43 -ngram 3=8 -ngram 4=5 -ngram 5=3 +ngram 1=37 +ngram 2=47 +ngram 3=11 +ngram 4=6 +ngram 5=4 \1-grams: -1.383514 , -0.30103 -1.139057 . -0.845098 -1.029493 -99 -0.4149733 --1.995635 +-1.995635 -20 -1.285941 a -0.69897 -1.687872 also -0.30103 -1.687872 beyond -0.30103 @@ -41,6 +41,9 @@ ngram 5=3 -1.687872 watching -0.30103 -1.687872 what -0.30103 -1.687872 would -0.30103 +-3.141592 foo +-2.718281 bar 3.0 +-6.535897 baz -0.0 \2-grams: -0.6925742 , . @@ -86,6 +89,10 @@ ngram 5=3 -0.2922095 watching considering -0.2922095 what i -0.2922095 would also +-2 also would -6 +-15 -2 +-4 however -1 +-6 foo bar \3-grams: -0.01916512 more . @@ -96,6 +103,9 @@ ngram 5=3 -0.3488368 looking on -0.4771212 -0.1892331 little more loin -0.04835128 looking on a -0.4771212 +-3 also would consider -7 +-6 however -12 +-7 to look good \4-grams: -0.009249173 looking on a little -0.4771212 @@ -103,10 +113,12 @@ ngram 5=3 -0.005464747 screening a little more -0.1453306 a little more loin -0.01552657 looking on a -0.4771212 +-4 also would consider higher -8 \5-grams: -0.003061223 looking on a little -0.001813953 looking on a little more -0.0432557 on a little more loin +-5 also would consider higher looking \end\ -- cgit v1.2.3 From e7dfcff76b8d53775f753c2367776c5348bd73b5 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Thu, 10 Mar 2011 20:02:07 -0500 Subject: experimental nonlinear feature --- training/augment_grammar.cc | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/training/augment_grammar.cc b/training/augment_grammar.cc index 19120d00..9ad03b6c 100644 --- a/training/augment_grammar.cc +++ b/training/augment_grammar.cc @@ -36,6 +36,7 @@ bool InitCommandLine(int argc, char** argv, po::variables_map* conf) { ("source_lm,l",po::value(),"Source language LM (KLM)") ("collapse_weights,w",po::value(), "Collapse weights into a single feature X using the coefficients from this weights file") ("add_shape_types,s", "Add rule shape types") + ("extra_lex_feature,x", "Experimental nonlinear lexical weighting feature") ("replace_files,r", "Replace files with transformed variants (requires loading full grammar into memory)") ("grammar,g", po::value >(), "Input (also output) grammar file(s)"); po::options_description clo("Command line options"); @@ -85,6 +86,7 @@ template float Score(const vector& str, const Model &model return total; } +bool extra_feature; int kSrcLM; vector col_weights; bool gather_rules; @@ -94,9 +96,15 @@ static void RuleHelper(const TRulePtr& new_rule, const unsigned int ctf_level, c static const int kSrcLM = FD::Convert("SrcLM"); static const int kPC = FD::Convert("PC"); static const int kX = FD::Convert("X"); + static const int kPhraseModel2 = FD::Convert("PhraseModel_1"); + static const int kNewLex = FD::Convert("NewLex"); TRulePtr r; r.reset(new TRule(*new_rule)); if (ngram) r->scores_.set_value(kSrcLM, Score(r->f_, *ngram)); r->scores_.set_value(kPC, 1.0); + if (extra_feature) { + float v = r->scores_.value(kPhraseModel2); + r->scores_.set_value(kNewLex, v*(v+1)); + } if (col_weights.size()) { double score = r->scores_.dot(col_weights); r->scores_.clear(); @@ -122,6 +130,7 @@ int main(int argc, char** argv) { cerr << "Loaded " << (int)ngram->Order() << "-gram KenLM (MapSize=" << word_map.size() << ")\n"; cerr << " = " << kSOS << endl; } else { ngram = NULL; } + extra_feature = conf.count("extra_lex_feature") > 0; if (conf.count("collapse_weights")) { Weights w; w.InitFromFile(conf["collapse_weights"].as()); -- cgit v1.2.3