diff options
author | graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-15 03:50:05 +0000 |
---|---|---|
committer | graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-15 03:50:05 +0000 |
commit | 27ed3c0fecde089a761ccf718748413bb572a3a4 (patch) | |
tree | 69e84990a9c4842ccbb7783f76e73b2dc1e3a7fa /decoder/ff_lm.cc | |
parent | 12b09cc1069ee4401074e0e0f6d8f9c120318aa0 (diff) |
oracle bleu refactor
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@259 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'decoder/ff_lm.cc')
-rw-r--r-- | decoder/ff_lm.cc | 4 |
1 files changed, 3 insertions, 1 deletions
diff --git a/decoder/ff_lm.cc b/decoder/ff_lm.cc index 5de9c321..bbf63338 100644 --- a/decoder/ff_lm.cc +++ b/decoder/ff_lm.cc @@ -1,6 +1,8 @@ +namespace { char const* usage_name="LanguageModel"; char const* usage_short="srilm.gz [-n FeatureName] [-o StateOrder] [-m LimitLoadOrder]"; char const* usage_verbose="-n determines the name of the feature (and its weight). -o defaults to 3. -m defaults to effectively infinite, otherwise says what order lm probs to use (up to). you could use -o > -m but that would be wasteful. -o < -m means some ngrams are scored longer (whenever a word is inserted by a rule next to a variable) than the state would ordinarily allow. NOTE: multiple LanguageModel features are allowed, but they will wastefully duplicate state, except in the special case of -o 1 (which uses no state). subsequent references to the same a.lm.gz. unless they specify -m, will reuse the same SRI LM in memory; this means that the -m used in the first load of a.lm.gz will take effect."; +} //TODO: backoff wordclasses for named entity xltns, esp. numbers. e.g. digits -> @. idealy rule features would specify replacement lm tokens/classes @@ -513,7 +515,7 @@ bool parse_lmspec(std::string const& in, int &order, string &featurename, string if (order > 0 && !filename.empty()) return true; usage: - cerr<<usage_name<<" specification should be: "<<usage_short<<"; you provided: "<<in<<usage_verbose<<endl; + cerr<<usage_name<<" specification should be: "<<usage_short<<"; you provided: "<<in<<endl<<usage_verbose<<endl; return false; } |