diff options
author | Kenneth Heafield <github@kheafield.com> | 2013-04-24 10:12:41 +0100 |
---|---|---|
committer | Kenneth Heafield <github@kheafield.com> | 2013-04-24 10:12:41 +0100 |
commit | db960a8bba81df3217660ec5a96d73e0d6baa01b (patch) | |
tree | 7d84cff7fc47fda4ce28ca5164ab74ebf7f6ece8 /klm/lm/builder/lmplz_main.cc | |
parent | bf10ad9d1d3a17ae82804f947616db89f41d4f28 (diff) |
KenLM 0831569c3137536165b107c6841603c725dfa2b1
Diffstat (limited to 'klm/lm/builder/lmplz_main.cc')
-rw-r--r-- | klm/lm/builder/lmplz_main.cc | 17 |
1 files changed, 15 insertions, 2 deletions
diff --git a/klm/lm/builder/lmplz_main.cc b/klm/lm/builder/lmplz_main.cc index 90b9dca2..1e086dcc 100644 --- a/klm/lm/builder/lmplz_main.cc +++ b/klm/lm/builder/lmplz_main.cc @@ -6,6 +6,7 @@ #include <iostream> #include <boost/program_options.hpp> +#include <boost/version.hpp> namespace { class SizeNotify { @@ -33,13 +34,17 @@ int main(int argc, char *argv[]) { lm::builder::PipelineConfig pipeline; options.add_options() - ("order,o", po::value<std::size_t>(&pipeline.order)->required(), "Order of the model") + ("order,o", po::value<std::size_t>(&pipeline.order) +#if BOOST_VERSION >= 104200 + ->required() +#endif + , "Order of the model") ("interpolate_unigrams", po::bool_switch(&pipeline.initial_probs.interpolate_unigrams), "Interpolate the unigrams (default: emulate SRILM by not interpolating)") ("temp_prefix,T", po::value<std::string>(&pipeline.sort.temp_prefix)->default_value("/tmp/lm"), "Temporary file prefix") ("memory,S", SizeOption(pipeline.sort.total_memory, util::GuessPhysicalMemory() ? "80%" : "1G"), "Sorting memory") - ("vocab_memory", SizeOption(pipeline.assume_vocab_hash_size, "50M"), "Assume that the vocabulary hash table will use this much memory for purposes of calculating total memory in the count step") ("minimum_block", SizeOption(pipeline.minimum_block, "8K"), "Minimum block size to allow") ("sort_block", SizeOption(pipeline.sort.buffer_size, "64M"), "Size of IO operations for sort (determines arity)") + ("vocab_estimate", po::value<lm::WordIndex>(&pipeline.vocab_estimate)->default_value(1000000), "Assume this vocabulary size for purposes of calculating memory in step 1 (corpus count) and pre-sizing the hash table") ("block_count", po::value<std::size_t>(&pipeline.block_count)->default_value(2), "Block count (per order)") ("vocab_file", po::value<std::string>(&pipeline.vocab_file)->default_value(""), "Location to write vocabulary file") ("verbose_header", po::bool_switch(&pipeline.verbose_header), "Add a verbose header to the ARPA file that includes information such as token count, smoothing type, etc."); @@ -68,6 +73,14 @@ int main(int argc, char *argv[]) { po::store(po::parse_command_line(argc, argv, options), vm); po::notify(vm); + // required() appeared in Boost 1.42.0. +#if BOOST_VERSION < 104200 + if (!vm.count("order")) { + std::cerr << "the option '--order' is required but missing" << std::endl; + return 1; + } +#endif + util::NormalizeTempPrefix(pipeline.sort.temp_prefix); lm::builder::InitialProbabilitiesConfig &initial = pipeline.initial_probs; |