diff options
author | Kenneth Heafield <github@kheafield.com> | 2013-04-24 10:12:41 +0100 |
---|---|---|
committer | Kenneth Heafield <github@kheafield.com> | 2013-04-24 10:12:41 +0100 |
commit | db960a8bba81df3217660ec5a96d73e0d6baa01b (patch) | |
tree | 7d84cff7fc47fda4ce28ca5164ab74ebf7f6ece8 /klm/lm/builder/pipeline.hh | |
parent | bf10ad9d1d3a17ae82804f947616db89f41d4f28 (diff) |
KenLM 0831569c3137536165b107c6841603c725dfa2b1
Diffstat (limited to 'klm/lm/builder/pipeline.hh')
-rw-r--r-- | klm/lm/builder/pipeline.hh | 9 |
1 files changed, 5 insertions, 4 deletions
diff --git a/klm/lm/builder/pipeline.hh b/klm/lm/builder/pipeline.hh index f1d6c5f6..845e5481 100644 --- a/klm/lm/builder/pipeline.hh +++ b/klm/lm/builder/pipeline.hh @@ -3,6 +3,7 @@ #include "lm/builder/initial_probabilities.hh" #include "lm/builder/header_info.hh" +#include "lm/word_index.hh" #include "util/stream/config.hh" #include "util/file_piece.hh" @@ -19,9 +20,9 @@ struct PipelineConfig { util::stream::ChainConfig read_backoffs; bool verbose_header; - // Amount of memory to assume that the vocabulary hash table will use. This - // is subtracted from total memory for CorpusCount. - std::size_t assume_vocab_hash_size; + // Estimated vocabulary size. Used for sizing CorpusCount memory and + // initial probing hash table sizing, also in CorpusCount. + lm::WordIndex vocab_estimate; // Minimum block size to tolerate. std::size_t minimum_block; @@ -33,7 +34,7 @@ struct PipelineConfig { std::size_t TotalMemory() const { return sort.total_memory; } }; -// Takes ownership of text_file. +// Takes ownership of text_file and out_arpa. void Pipeline(PipelineConfig config, int text_file, int out_arpa); }} // namespaces |