summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJonathan Clark <jon.h.clark@gmail.com>2011-03-10 20:25:13 -0500
committerJonathan Clark <jon.h.clark@gmail.com>2011-03-10 20:25:13 -0500
commit2015d842afbc2e646e0d1c21ca5de86dd0fe0813 (patch)
tree89b57254cdd7d8ad52f959c31ffc50eea9442d43
parent8056672a26e44fa0fb9be93218597e31d63847ac (diff)
parent1b8a83f39d5402da0273498111ddadfc7cc9d2ee (diff)
Merge branch 'master' of github.com:redpony/cdec
-rw-r--r--compound-split/de/charlm.rev.5gm.de.klmbin14148755 -> 17376695 bytes
-rw-r--r--environment/LocalConfig.pm8
-rw-r--r--klm/lm/test.arpa24
-rw-r--r--training/augment_grammar.cc9
4 files changed, 33 insertions, 8 deletions
diff --git a/compound-split/de/charlm.rev.5gm.de.klm b/compound-split/de/charlm.rev.5gm.de.klm
index 574898dc..e8d114bd 100644
--- a/compound-split/de/charlm.rev.5gm.de.klm
+++ b/compound-split/de/charlm.rev.5gm.de.klm
Binary files differ
diff --git a/environment/LocalConfig.pm b/environment/LocalConfig.pm
index dd3ef761..62ee2dd3 100644
--- a/environment/LocalConfig.pm
+++ b/environment/LocalConfig.pm
@@ -12,6 +12,10 @@ my $host = domainname;
# keys are: HOST_REGEXP, MERTMem, QSubQueue, QSubMemFlag, QSubExtraFlags
my $CCONFIG = {
+ 'StarCluster' => {
+ 'HOST_REGEXP' => qr/compute-\d+\.internal$/,
+ 'QSubMemFlag' => '-l mem',
+ },
'LTICluster' => {
'HOST_REGEXP' => qr/^cluster\d+\.lti\.cs\.cmu\.edu$/,
'QSubMemFlag' => '-l h_vmem=',
@@ -41,8 +45,8 @@ my $CCONFIG = {
'QSubMemFlag' => '-l pmem=',
},
'LOCAL' => {
- 'HOST_REGEXP' => qr/local\.net$/,
- 'QSubMemFlag' => '',
+ 'HOST_REGEXP' => qr/local\./,
+ 'QSubMemFlag' => ' ',
},
};
diff --git a/klm/lm/test.arpa b/klm/lm/test.arpa
index 9d674e83..ef214eae 100644
--- a/klm/lm/test.arpa
+++ b/klm/lm/test.arpa
@@ -1,17 +1,17 @@
\data\
-ngram 1=34
-ngram 2=43
-ngram 3=8
-ngram 4=5
-ngram 5=3
+ngram 1=37
+ngram 2=47
+ngram 3=11
+ngram 4=6
+ngram 5=4
\1-grams:
-1.383514 , -0.30103
-1.139057 . -0.845098
-1.029493 </s>
-99 <s> -0.4149733
--1.995635 <unk>
+-1.995635 <unk> -20
-1.285941 a -0.69897
-1.687872 also -0.30103
-1.687872 beyond -0.30103
@@ -41,6 +41,9 @@ ngram 5=3
-1.687872 watching -0.30103
-1.687872 what -0.30103
-1.687872 would -0.30103
+-3.141592 foo
+-2.718281 bar 3.0
+-6.535897 baz -0.0
\2-grams:
-0.6925742 , .
@@ -86,6 +89,10 @@ ngram 5=3
-0.2922095 watching considering
-0.2922095 what i
-0.2922095 would also
+-2 also would -6
+-15 <unk> <unk> -2
+-4 <unk> however -1
+-6 foo bar
\3-grams:
-0.01916512 more . </s>
@@ -96,6 +103,9 @@ ngram 5=3
-0.3488368 <s> looking on -0.4771212
-0.1892331 little more loin
-0.04835128 looking on a -0.4771212
+-3 also would consider -7
+-6 <unk> however <unk> -12
+-7 to look good
\4-grams:
-0.009249173 looking on a little -0.4771212
@@ -103,10 +113,12 @@ ngram 5=3
-0.005464747 screening a little more
-0.1453306 a little more loin
-0.01552657 <s> looking on a -0.4771212
+-4 also would consider higher -8
\5-grams:
-0.003061223 <s> looking on a little
-0.001813953 looking on a little more
-0.0432557 on a little more loin
+-5 also would consider higher looking
\end\
diff --git a/training/augment_grammar.cc b/training/augment_grammar.cc
index 19120d00..9ad03b6c 100644
--- a/training/augment_grammar.cc
+++ b/training/augment_grammar.cc
@@ -36,6 +36,7 @@ bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
("source_lm,l",po::value<string>(),"Source language LM (KLM)")
("collapse_weights,w",po::value<string>(), "Collapse weights into a single feature X using the coefficients from this weights file")
("add_shape_types,s", "Add rule shape types")
+ ("extra_lex_feature,x", "Experimental nonlinear lexical weighting feature")
("replace_files,r", "Replace files with transformed variants (requires loading full grammar into memory)")
("grammar,g", po::value<vector<string> >(), "Input (also output) grammar file(s)");
po::options_description clo("Command line options");
@@ -85,6 +86,7 @@ template <class Model> float Score(const vector<WordID>& str, const Model &model
return total;
}
+bool extra_feature;
int kSrcLM;
vector<double> col_weights;
bool gather_rules;
@@ -94,9 +96,15 @@ static void RuleHelper(const TRulePtr& new_rule, const unsigned int ctf_level, c
static const int kSrcLM = FD::Convert("SrcLM");
static const int kPC = FD::Convert("PC");
static const int kX = FD::Convert("X");
+ static const int kPhraseModel2 = FD::Convert("PhraseModel_1");
+ static const int kNewLex = FD::Convert("NewLex");
TRulePtr r; r.reset(new TRule(*new_rule));
if (ngram) r->scores_.set_value(kSrcLM, Score(r->f_, *ngram));
r->scores_.set_value(kPC, 1.0);
+ if (extra_feature) {
+ float v = r->scores_.value(kPhraseModel2);
+ r->scores_.set_value(kNewLex, v*(v+1));
+ }
if (col_weights.size()) {
double score = r->scores_.dot(col_weights);
r->scores_.clear();
@@ -122,6 +130,7 @@ int main(int argc, char** argv) {
cerr << "Loaded " << (int)ngram->Order() << "-gram KenLM (MapSize=" << word_map.size() << ")\n";
cerr << " <s> = " << kSOS << endl;
} else { ngram = NULL; }
+ extra_feature = conf.count("extra_lex_feature") > 0;
if (conf.count("collapse_weights")) {
Weights w;
w.InitFromFile(conf["collapse_weights"].as<string>());