diff options
author | Kenneth Heafield <kheafiel@cluster12.lti.ece.cmu.local> | 2011-05-20 16:19:04 -0400 |
---|---|---|
committer | Kenneth Heafield <kheafiel@cluster12.lti.ece.cmu.local> | 2011-05-20 16:19:04 -0400 |
commit | 461c2670efb0968ccc6789ff0c9ca6f88ab31e80 (patch) | |
tree | 6b344dcf320674213fd449e6a4915236ad78c29f /klm/lm/build_binary.cc | |
parent | 0e7b303879baf95a8167194ad7c75ef738e79f15 (diff) |
kenlm update including being nicer to NFS
Diffstat (limited to 'klm/lm/build_binary.cc')
-rw-r--r-- | klm/lm/build_binary.cc | 15 |
1 files changed, 8 insertions, 7 deletions
diff --git a/klm/lm/build_binary.cc b/klm/lm/build_binary.cc index 920ff080..91ad2fb9 100644 --- a/klm/lm/build_binary.cc +++ b/klm/lm/build_binary.cc @@ -15,10 +15,11 @@ namespace ngram { namespace { void Usage(const char *name) { - std::cerr << "Usage: " << name << " [-u log10_unknown_probability] [-s] [-p probing_multiplier] [-t trie_temporary] [-m trie_building_megabytes] [type] input.arpa output.mmap\n\n" + std::cerr << "Usage: " << name << " [-u log10_unknown_probability] [-s] [-n] [-p probing_multiplier] [-t trie_temporary] [-m trie_building_megabytes] [type] input.arpa output.mmap\n\n" "-u sets the default log10 probability for <unk> if the ARPA file does not have\n" "one.\n" -"-s allows models to be built even if they do not have <s> and </s>.\n\n" +"-s allows models to be built even if they do not have <s> and </s>.\n" +"-i allows buggy models from IRSTLM by mapping positive log probability to 0.\n" "type is one of probing, trie, or sorted:\n\n" "probing uses a probing hash table. It is the fastest but uses the most memory.\n" "-p sets the space multiplier and must be >1.0. The default is 1.5.\n\n" @@ -63,7 +64,6 @@ void ShowSizes(const char *file, const lm::ngram::Config &config) { std::cout << "bytes\n" "probing " << std::setw(length) << probing_size << " assuming -p " << config.probing_multiplier << "\n" "trie " << std::setw(length) << TrieModel::Size(counts, config) << "\n"; -/* "sorted " << std::setw(length) << SortedModel::Size(counts, config) << "\n";*/ } } // namespace ngram @@ -76,7 +76,7 @@ int main(int argc, char *argv[]) { try { lm::ngram::Config config; int opt; - while ((opt = getopt(argc, argv, "su:p:t:m:")) != -1) { + while ((opt = getopt(argc, argv, "siu:p:t:m:")) != -1) { switch(opt) { case 'u': config.unknown_missing_logprob = ParseFloat(optarg); @@ -91,7 +91,10 @@ int main(int argc, char *argv[]) { config.building_memory = ParseUInt(optarg) * 1048576; break; case 's': - config.sentence_marker_missing = lm::ngram::Config::SILENT; + config.sentence_marker_missing = lm::SILENT; + break; + case 'i': + config.positive_log_probability = lm::SILENT; break; default: Usage(argv[0]); @@ -108,8 +111,6 @@ int main(int argc, char *argv[]) { config.write_mmap = argv[optind + 2]; if (!strcmp(model_type, "probing")) { ProbingModel(from_file, config); - } else if (!strcmp(model_type, "sorted")) { - SortedModel(from_file, config); } else if (!strcmp(model_type, "trie")) { TrieModel(from_file, config); } else { |