diff options
Diffstat (limited to 'klm/lm/builder/pipeline.hh')
-rw-r--r-- | klm/lm/builder/pipeline.hh | 9 |
1 files changed, 5 insertions, 4 deletions
diff --git a/klm/lm/builder/pipeline.hh b/klm/lm/builder/pipeline.hh index f1d6c5f6..845e5481 100644 --- a/klm/lm/builder/pipeline.hh +++ b/klm/lm/builder/pipeline.hh @@ -3,6 +3,7 @@ #include "lm/builder/initial_probabilities.hh" #include "lm/builder/header_info.hh" +#include "lm/word_index.hh" #include "util/stream/config.hh" #include "util/file_piece.hh" @@ -19,9 +20,9 @@ struct PipelineConfig { util::stream::ChainConfig read_backoffs; bool verbose_header; - // Amount of memory to assume that the vocabulary hash table will use. This - // is subtracted from total memory for CorpusCount. - std::size_t assume_vocab_hash_size; + // Estimated vocabulary size. Used for sizing CorpusCount memory and + // initial probing hash table sizing, also in CorpusCount. + lm::WordIndex vocab_estimate; // Minimum block size to tolerate. std::size_t minimum_block; @@ -33,7 +34,7 @@ struct PipelineConfig { std::size_t TotalMemory() const { return sort.total_memory; } }; -// Takes ownership of text_file. +// Takes ownership of text_file and out_arpa. void Pipeline(PipelineConfig config, int text_file, int out_arpa); }} // namespaces |