summaryrefslogtreecommitdiff
path: root/klm/lm/build_binary.cc
diff options
context:
space:
mode:
authorKenneth Heafield <kheafiel@cluster12.lti.ece.cmu.local>2011-05-20 16:19:04 -0400
committerKenneth Heafield <kheafiel@cluster12.lti.ece.cmu.local>2011-05-20 16:19:04 -0400
commit461c2670efb0968ccc6789ff0c9ca6f88ab31e80 (patch)
tree6b344dcf320674213fd449e6a4915236ad78c29f /klm/lm/build_binary.cc
parent0e7b303879baf95a8167194ad7c75ef738e79f15 (diff)
kenlm update including being nicer to NFS
Diffstat (limited to 'klm/lm/build_binary.cc')
-rw-r--r--klm/lm/build_binary.cc15
1 files changed, 8 insertions, 7 deletions
diff --git a/klm/lm/build_binary.cc b/klm/lm/build_binary.cc
index 920ff080..91ad2fb9 100644
--- a/klm/lm/build_binary.cc
+++ b/klm/lm/build_binary.cc
@@ -15,10 +15,11 @@ namespace ngram {
namespace {
void Usage(const char *name) {
- std::cerr << "Usage: " << name << " [-u log10_unknown_probability] [-s] [-p probing_multiplier] [-t trie_temporary] [-m trie_building_megabytes] [type] input.arpa output.mmap\n\n"
+ std::cerr << "Usage: " << name << " [-u log10_unknown_probability] [-s] [-n] [-p probing_multiplier] [-t trie_temporary] [-m trie_building_megabytes] [type] input.arpa output.mmap\n\n"
"-u sets the default log10 probability for <unk> if the ARPA file does not have\n"
"one.\n"
-"-s allows models to be built even if they do not have <s> and </s>.\n\n"
+"-s allows models to be built even if they do not have <s> and </s>.\n"
+"-i allows buggy models from IRSTLM by mapping positive log probability to 0.\n"
"type is one of probing, trie, or sorted:\n\n"
"probing uses a probing hash table. It is the fastest but uses the most memory.\n"
"-p sets the space multiplier and must be >1.0. The default is 1.5.\n\n"
@@ -63,7 +64,6 @@ void ShowSizes(const char *file, const lm::ngram::Config &config) {
std::cout << "bytes\n"
"probing " << std::setw(length) << probing_size << " assuming -p " << config.probing_multiplier << "\n"
"trie " << std::setw(length) << TrieModel::Size(counts, config) << "\n";
-/* "sorted " << std::setw(length) << SortedModel::Size(counts, config) << "\n";*/
}
} // namespace ngram
@@ -76,7 +76,7 @@ int main(int argc, char *argv[]) {
try {
lm::ngram::Config config;
int opt;
- while ((opt = getopt(argc, argv, "su:p:t:m:")) != -1) {
+ while ((opt = getopt(argc, argv, "siu:p:t:m:")) != -1) {
switch(opt) {
case 'u':
config.unknown_missing_logprob = ParseFloat(optarg);
@@ -91,7 +91,10 @@ int main(int argc, char *argv[]) {
config.building_memory = ParseUInt(optarg) * 1048576;
break;
case 's':
- config.sentence_marker_missing = lm::ngram::Config::SILENT;
+ config.sentence_marker_missing = lm::SILENT;
+ break;
+ case 'i':
+ config.positive_log_probability = lm::SILENT;
break;
default:
Usage(argv[0]);
@@ -108,8 +111,6 @@ int main(int argc, char *argv[]) {
config.write_mmap = argv[optind + 2];
if (!strcmp(model_type, "probing")) {
ProbingModel(from_file, config);
- } else if (!strcmp(model_type, "sorted")) {
- SortedModel(from_file, config);
} else if (!strcmp(model_type, "trie")) {
TrieModel(from_file, config);
} else {