summaryrefslogtreecommitdiff
path: root/klm/lm/build_binary.cc
diff options
context:
space:
mode:
Diffstat (limited to 'klm/lm/build_binary.cc')
-rw-r--r--klm/lm/build_binary.cc102
1 files changed, 48 insertions, 54 deletions
diff --git a/klm/lm/build_binary.cc b/klm/lm/build_binary.cc
index d6dd5994..920ff080 100644
--- a/klm/lm/build_binary.cc
+++ b/klm/lm/build_binary.cc
@@ -15,8 +15,9 @@ namespace ngram {
namespace {
void Usage(const char *name) {
- std::cerr << "Usage: " << name << " [-u unknown_probability] [-s] [-p probing_multiplier] [-t trie_temporary] [-m trie_building_megabytes] [type] input.arpa output.mmap\n\n"
-"-u sets the default probability for <unk> if the ARPA file does not have one.\n"
+ std::cerr << "Usage: " << name << " [-u log10_unknown_probability] [-s] [-p probing_multiplier] [-t trie_temporary] [-m trie_building_megabytes] [type] input.arpa output.mmap\n\n"
+"-u sets the default log10 probability for <unk> if the ARPA file does not have\n"
+"one.\n"
"-s allows models to be built even if they do not have <s> and </s>.\n\n"
"type is one of probing, trie, or sorted:\n\n"
"probing uses a probing hash table. It is the fastest but uses the most memory.\n"
@@ -69,65 +70,58 @@ void ShowSizes(const char *file, const lm::ngram::Config &config) {
} // namespace lm
} // namespace
-void terminate_handler() {
- try { throw; }
- catch(const std::exception& e) {
- std::cerr << e.what() << std::endl;
- }
- catch(...) {
- std::cerr << "A non-standard exception was thrown." << std::endl;
- }
- std::abort();
-}
-
int main(int argc, char *argv[]) {
using namespace lm::ngram;
- std::set_terminate(terminate_handler);
-
- lm::ngram::Config config;
- int opt;
- while ((opt = getopt(argc, argv, "su:p:t:m:")) != -1) {
- switch(opt) {
- case 'u':
- config.unknown_missing_prob = ParseFloat(optarg);
- break;
- case 'p':
- config.probing_multiplier = ParseFloat(optarg);
- break;
- case 't':
- config.temporary_directory_prefix = optarg;
- break;
- case 'm':
- config.building_memory = ParseUInt(optarg) * 1048576;
- break;
- case 's':
- config.sentence_marker_missing = lm::ngram::Config::SILENT;
- break;
- default:
- Usage(argv[0]);
+ try {
+ lm::ngram::Config config;
+ int opt;
+ while ((opt = getopt(argc, argv, "su:p:t:m:")) != -1) {
+ switch(opt) {
+ case 'u':
+ config.unknown_missing_logprob = ParseFloat(optarg);
+ break;
+ case 'p':
+ config.probing_multiplier = ParseFloat(optarg);
+ break;
+ case 't':
+ config.temporary_directory_prefix = optarg;
+ break;
+ case 'm':
+ config.building_memory = ParseUInt(optarg) * 1048576;
+ break;
+ case 's':
+ config.sentence_marker_missing = lm::ngram::Config::SILENT;
+ break;
+ default:
+ Usage(argv[0]);
+ }
}
- }
- if (optind + 1 == argc) {
- ShowSizes(argv[optind], config);
- } else if (optind + 2 == argc) {
- config.write_mmap = argv[optind + 1];
- ProbingModel(argv[optind], config);
- } else if (optind + 3 == argc) {
- const char *model_type = argv[optind];
- const char *from_file = argv[optind + 1];
- config.write_mmap = argv[optind + 2];
- if (!strcmp(model_type, "probing")) {
- ProbingModel(from_file, config);
- } else if (!strcmp(model_type, "sorted")) {
- SortedModel(from_file, config);
- } else if (!strcmp(model_type, "trie")) {
- TrieModel(from_file, config);
+ if (optind + 1 == argc) {
+ ShowSizes(argv[optind], config);
+ } else if (optind + 2 == argc) {
+ config.write_mmap = argv[optind + 1];
+ ProbingModel(argv[optind], config);
+ } else if (optind + 3 == argc) {
+ const char *model_type = argv[optind];
+ const char *from_file = argv[optind + 1];
+ config.write_mmap = argv[optind + 2];
+ if (!strcmp(model_type, "probing")) {
+ ProbingModel(from_file, config);
+ } else if (!strcmp(model_type, "sorted")) {
+ SortedModel(from_file, config);
+ } else if (!strcmp(model_type, "trie")) {
+ TrieModel(from_file, config);
+ } else {
+ Usage(argv[0]);
+ }
} else {
Usage(argv[0]);
}
- } else {
- Usage(argv[0]);
+ }
+ catch (std::exception &e) {
+ std::cerr << e.what() << std::endl;
+ abort();
}
return 0;
}