summaryrefslogtreecommitdiff
path: root/klm/lm/build_binary.cc
diff options
context:
space:
mode:
Diffstat (limited to 'klm/lm/build_binary.cc')
-rw-r--r--klm/lm/build_binary.cc26
1 files changed, 23 insertions, 3 deletions
diff --git a/klm/lm/build_binary.cc b/klm/lm/build_binary.cc
index 144c57e0..d6dd5994 100644
--- a/klm/lm/build_binary.cc
+++ b/klm/lm/build_binary.cc
@@ -1,6 +1,8 @@
#include "lm/model.hh"
#include "util/file_piece.hh"
+#include <cstdlib>
+#include <exception>
#include <iostream>
#include <iomanip>
@@ -13,8 +15,10 @@ namespace ngram {
namespace {
void Usage(const char *name) {
- std::cerr << "Usage: " << name << " [-u unknown_probability] [-p probing_multiplier] [-t trie_temporary] [-m trie_building_megabytes] [type] input.arpa output.mmap\n\n"
-"Where type is one of probing, trie, or sorted:\n\n"
+ std::cerr << "Usage: " << name << " [-u unknown_probability] [-s] [-p probing_multiplier] [-t trie_temporary] [-m trie_building_megabytes] [type] input.arpa output.mmap\n\n"
+"-u sets the default probability for <unk> if the ARPA file does not have one.\n"
+"-s allows models to be built even if they do not have <s> and </s>.\n\n"
+"type is one of probing, trie, or sorted:\n\n"
"probing uses a probing hash table. It is the fastest but uses the most memory.\n"
"-p sets the space multiplier and must be >1.0. The default is 1.5.\n\n"
"trie is a straightforward trie with bit-level packing. It uses the least\n"
@@ -65,12 +69,25 @@ void ShowSizes(const char *file, const lm::ngram::Config &config) {
} // namespace lm
} // namespace
+void terminate_handler() {
+ try { throw; }
+ catch(const std::exception& e) {
+ std::cerr << e.what() << std::endl;
+ }
+ catch(...) {
+ std::cerr << "A non-standard exception was thrown." << std::endl;
+ }
+ std::abort();
+}
+
int main(int argc, char *argv[]) {
using namespace lm::ngram;
+ std::set_terminate(terminate_handler);
+
lm::ngram::Config config;
int opt;
- while ((opt = getopt(argc, argv, "u:p:t:m:")) != -1) {
+ while ((opt = getopt(argc, argv, "su:p:t:m:")) != -1) {
switch(opt) {
case 'u':
config.unknown_missing_prob = ParseFloat(optarg);
@@ -84,6 +101,9 @@ int main(int argc, char *argv[]) {
case 'm':
config.building_memory = ParseUInt(optarg) * 1048576;
break;
+ case 's':
+ config.sentence_marker_missing = lm::ngram::Config::SILENT;
+ break;
default:
Usage(argv[0]);
}