From 461c2670efb0968ccc6789ff0c9ca6f88ab31e80 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Fri, 20 May 2011 16:19:04 -0400 Subject: kenlm update including being nicer to NFS --- klm/lm/build_binary.cc | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'klm/lm/build_binary.cc') diff --git a/klm/lm/build_binary.cc b/klm/lm/build_binary.cc index 920ff080..91ad2fb9 100644 --- a/klm/lm/build_binary.cc +++ b/klm/lm/build_binary.cc @@ -15,10 +15,11 @@ namespace ngram { namespace { void Usage(const char *name) { - std::cerr << "Usage: " << name << " [-u log10_unknown_probability] [-s] [-p probing_multiplier] [-t trie_temporary] [-m trie_building_megabytes] [type] input.arpa output.mmap\n\n" + std::cerr << "Usage: " << name << " [-u log10_unknown_probability] [-s] [-n] [-p probing_multiplier] [-t trie_temporary] [-m trie_building_megabytes] [type] input.arpa output.mmap\n\n" "-u sets the default log10 probability for if the ARPA file does not have\n" "one.\n" -"-s allows models to be built even if they do not have and .\n\n" +"-s allows models to be built even if they do not have and .\n" +"-i allows buggy models from IRSTLM by mapping positive log probability to 0.\n" "type is one of probing, trie, or sorted:\n\n" "probing uses a probing hash table. It is the fastest but uses the most memory.\n" "-p sets the space multiplier and must be >1.0. The default is 1.5.\n\n" @@ -63,7 +64,6 @@ void ShowSizes(const char *file, const lm::ngram::Config &config) { std::cout << "bytes\n" "probing " << std::setw(length) << probing_size << " assuming -p " << config.probing_multiplier << "\n" "trie " << std::setw(length) << TrieModel::Size(counts, config) << "\n"; -/* "sorted " << std::setw(length) << SortedModel::Size(counts, config) << "\n";*/ } } // namespace ngram @@ -76,7 +76,7 @@ int main(int argc, char *argv[]) { try { lm::ngram::Config config; int opt; - while ((opt = getopt(argc, argv, "su:p:t:m:")) != -1) { + while ((opt = getopt(argc, argv, "siu:p:t:m:")) != -1) { switch(opt) { case 'u': config.unknown_missing_logprob = ParseFloat(optarg); @@ -91,7 +91,10 @@ int main(int argc, char *argv[]) { config.building_memory = ParseUInt(optarg) * 1048576; break; case 's': - config.sentence_marker_missing = lm::ngram::Config::SILENT; + config.sentence_marker_missing = lm::SILENT; + break; + case 'i': + config.positive_log_probability = lm::SILENT; break; default: Usage(argv[0]); @@ -108,8 +111,6 @@ int main(int argc, char *argv[]) { config.write_mmap = argv[optind + 2]; if (!strcmp(model_type, "probing")) { ProbingModel(from_file, config); - } else if (!strcmp(model_type, "sorted")) { - SortedModel(from_file, config); } else if (!strcmp(model_type, "trie")) { TrieModel(from_file, config); } else { -- cgit v1.2.3