From 5aee54869aa19cfe9be965e67a472e94449d16da Mon Sep 17 00:00:00 2001
From: Kenneth Heafield <github@kheafield.com>
Date: Wed, 24 Apr 2013 10:12:41 +0100
Subject: KenLM 0831569c3137536165b107c6841603c725dfa2b1

---
 klm/lm/builder/lmplz_main.cc | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

(limited to 'klm/lm/builder/lmplz_main.cc')
diff --git a/klm/lm/builder/lmplz_main.cc b/klm/lm/builder/lmplz_main.cc
index 90b9dca2..1e086dcc 100644
--- a/klm/lm/builder/lmplz_main.cc
+++ b/klm/lm/builder/lmplz_main.cc
@@ -6,6 +6,7 @@
 #include <iostream>
 
 #include <boost/program_options.hpp>
+#include <boost/version.hpp>
 
 namespace {
 class SizeNotify {
@@ -33,13 +34,17 @@ int main(int argc, char *argv[]) {
     lm::builder::PipelineConfig pipeline;
 
     options.add_options()
-      ("order,o", po::value<std::size_t>(&pipeline.order)->required(), "Order of the model")
+      ("order,o", po::value<std::size_t>(&pipeline.order)
+#if BOOST_VERSION >= 104200
+         ->required()
+#endif
+         , "Order of the model")
       ("interpolate_unigrams", po::bool_switch(&pipeline.initial_probs.interpolate_unigrams), "Interpolate the unigrams (default: emulate SRILM by not interpolating)")
       ("temp_prefix,T", po::value<std::string>(&pipeline.sort.temp_prefix)->default_value("/tmp/lm"), "Temporary file prefix")
       ("memory,S", SizeOption(pipeline.sort.total_memory, util::GuessPhysicalMemory() ? "80%" : "1G"), "Sorting memory")
-      ("vocab_memory", SizeOption(pipeline.assume_vocab_hash_size, "50M"), "Assume that the vocabulary hash table will use this much memory for purposes of calculating total memory in the count step")
       ("minimum_block", SizeOption(pipeline.minimum_block, "8K"), "Minimum block size to allow")
       ("sort_block", SizeOption(pipeline.sort.buffer_size, "64M"), "Size of IO operations for sort (determines arity)")
+      ("vocab_estimate", po::value<lm::WordIndex>(&pipeline.vocab_estimate)->default_value(1000000), "Assume this vocabulary size for purposes of calculating memory in step 1 (corpus count) and pre-sizing the hash table")
       ("block_count", po::value<std::size_t>(&pipeline.block_count)->default_value(2), "Block count (per order)")
       ("vocab_file", po::value<std::string>(&pipeline.vocab_file)->default_value(""), "Location to write vocabulary file")
       ("verbose_header", po::bool_switch(&pipeline.verbose_header), "Add a verbose header to the ARPA file that includes information such as token count, smoothing type, etc.");
@@ -68,6 +73,14 @@ int main(int argc, char *argv[]) {
     po::store(po::parse_command_line(argc, argv, options), vm);
     po::notify(vm);
 
+    // required() appeared in Boost 1.42.0.
+#if BOOST_VERSION < 104200
+    if (!vm.count("order")) {
+      std::cerr << "the option '--order' is required but missing" << std::endl;
+      return 1;
+    }
+#endif
+
     util::NormalizeTempPrefix(pipeline.sort.temp_prefix);
 
     lm::builder::InitialProbabilitiesConfig &initial = pipeline.initial_probs;
-- 
cgit v1.2.3