diff options
Diffstat (limited to 'klm/lm/build_binary.cc')
-rw-r--r-- | klm/lm/build_binary.cc | 26 |
1 files changed, 23 insertions, 3 deletions
diff --git a/klm/lm/build_binary.cc b/klm/lm/build_binary.cc index 144c57e0..d6dd5994 100644 --- a/klm/lm/build_binary.cc +++ b/klm/lm/build_binary.cc @@ -1,6 +1,8 @@ #include "lm/model.hh" #include "util/file_piece.hh" +#include <cstdlib> +#include <exception> #include <iostream> #include <iomanip> @@ -13,8 +15,10 @@ namespace ngram { namespace { void Usage(const char *name) { - std::cerr << "Usage: " << name << " [-u unknown_probability] [-p probing_multiplier] [-t trie_temporary] [-m trie_building_megabytes] [type] input.arpa output.mmap\n\n" -"Where type is one of probing, trie, or sorted:\n\n" + std::cerr << "Usage: " << name << " [-u unknown_probability] [-s] [-p probing_multiplier] [-t trie_temporary] [-m trie_building_megabytes] [type] input.arpa output.mmap\n\n" +"-u sets the default probability for <unk> if the ARPA file does not have one.\n" +"-s allows models to be built even if they do not have <s> and </s>.\n\n" +"type is one of probing, trie, or sorted:\n\n" "probing uses a probing hash table. It is the fastest but uses the most memory.\n" "-p sets the space multiplier and must be >1.0. The default is 1.5.\n\n" "trie is a straightforward trie with bit-level packing. It uses the least\n" @@ -65,12 +69,25 @@ void ShowSizes(const char *file, const lm::ngram::Config &config) { } // namespace lm } // namespace +void terminate_handler() { + try { throw; } + catch(const std::exception& e) { + std::cerr << e.what() << std::endl; + } + catch(...) { + std::cerr << "A non-standard exception was thrown." << std::endl; + } + std::abort(); +} + int main(int argc, char *argv[]) { using namespace lm::ngram; + std::set_terminate(terminate_handler); + lm::ngram::Config config; int opt; - while ((opt = getopt(argc, argv, "u:p:t:m:")) != -1) { + while ((opt = getopt(argc, argv, "su:p:t:m:")) != -1) { switch(opt) { case 'u': config.unknown_missing_prob = ParseFloat(optarg); @@ -84,6 +101,9 @@ int main(int argc, char *argv[]) { case 'm': config.building_memory = ParseUInt(optarg) * 1048576; break; + case 's': + config.sentence_marker_missing = lm::ngram::Config::SILENT; + break; default: Usage(argv[0]); } |