From db960a8bba81df3217660ec5a96d73e0d6baa01b Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Wed, 24 Apr 2013 10:12:41 +0100 Subject: KenLM 0831569c3137536165b107c6841603c725dfa2b1 --- klm/lm/builder/pipeline.hh | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'klm/lm/builder/pipeline.hh') diff --git a/klm/lm/builder/pipeline.hh b/klm/lm/builder/pipeline.hh index f1d6c5f6..845e5481 100644 --- a/klm/lm/builder/pipeline.hh +++ b/klm/lm/builder/pipeline.hh @@ -3,6 +3,7 @@ #include "lm/builder/initial_probabilities.hh" #include "lm/builder/header_info.hh" +#include "lm/word_index.hh" #include "util/stream/config.hh" #include "util/file_piece.hh" @@ -19,9 +20,9 @@ struct PipelineConfig { util::stream::ChainConfig read_backoffs; bool verbose_header; - // Amount of memory to assume that the vocabulary hash table will use. This - // is subtracted from total memory for CorpusCount. - std::size_t assume_vocab_hash_size; + // Estimated vocabulary size. Used for sizing CorpusCount memory and + // initial probing hash table sizing, also in CorpusCount. + lm::WordIndex vocab_estimate; // Minimum block size to tolerate. std::size_t minimum_block; @@ -33,7 +34,7 @@ struct PipelineConfig { std::size_t TotalMemory() const { return sort.total_memory; } }; -// Takes ownership of text_file. +// Takes ownership of text_file and out_arpa. void Pipeline(PipelineConfig config, int text_file, int out_arpa); }} // namespaces -- cgit v1.2.3