summaryrefslogtreecommitdiff
path: root/klm/lm/builder/lmplz_main.cc
diff options
context:
space:
mode:
Diffstat (limited to 'klm/lm/builder/lmplz_main.cc')
-rw-r--r--klm/lm/builder/lmplz_main.cc17
1 files changed, 15 insertions, 2 deletions
diff --git a/klm/lm/builder/lmplz_main.cc b/klm/lm/builder/lmplz_main.cc
index 90b9dca2..1e086dcc 100644
--- a/klm/lm/builder/lmplz_main.cc
+++ b/klm/lm/builder/lmplz_main.cc
@@ -6,6 +6,7 @@
#include <iostream>
#include <boost/program_options.hpp>
+#include <boost/version.hpp>
namespace {
class SizeNotify {
@@ -33,13 +34,17 @@ int main(int argc, char *argv[]) {
lm::builder::PipelineConfig pipeline;
options.add_options()
- ("order,o", po::value<std::size_t>(&pipeline.order)->required(), "Order of the model")
+ ("order,o", po::value<std::size_t>(&pipeline.order)
+#if BOOST_VERSION >= 104200
+ ->required()
+#endif
+ , "Order of the model")
("interpolate_unigrams", po::bool_switch(&pipeline.initial_probs.interpolate_unigrams), "Interpolate the unigrams (default: emulate SRILM by not interpolating)")
("temp_prefix,T", po::value<std::string>(&pipeline.sort.temp_prefix)->default_value("/tmp/lm"), "Temporary file prefix")
("memory,S", SizeOption(pipeline.sort.total_memory, util::GuessPhysicalMemory() ? "80%" : "1G"), "Sorting memory")
- ("vocab_memory", SizeOption(pipeline.assume_vocab_hash_size, "50M"), "Assume that the vocabulary hash table will use this much memory for purposes of calculating total memory in the count step")
("minimum_block", SizeOption(pipeline.minimum_block, "8K"), "Minimum block size to allow")
("sort_block", SizeOption(pipeline.sort.buffer_size, "64M"), "Size of IO operations for sort (determines arity)")
+ ("vocab_estimate", po::value<lm::WordIndex>(&pipeline.vocab_estimate)->default_value(1000000), "Assume this vocabulary size for purposes of calculating memory in step 1 (corpus count) and pre-sizing the hash table")
("block_count", po::value<std::size_t>(&pipeline.block_count)->default_value(2), "Block count (per order)")
("vocab_file", po::value<std::string>(&pipeline.vocab_file)->default_value(""), "Location to write vocabulary file")
("verbose_header", po::bool_switch(&pipeline.verbose_header), "Add a verbose header to the ARPA file that includes information such as token count, smoothing type, etc.");
@@ -68,6 +73,14 @@ int main(int argc, char *argv[]) {
po::store(po::parse_command_line(argc, argv, options), vm);
po::notify(vm);
+ // required() appeared in Boost 1.42.0.
+#if BOOST_VERSION < 104200
+ if (!vm.count("order")) {
+ std::cerr << "the option '--order' is required but missing" << std::endl;
+ return 1;
+ }
+#endif
+
util::NormalizeTempPrefix(pipeline.sort.temp_prefix);
lm::builder::InitialProbabilitiesConfig &initial = pipeline.initial_probs;