diff options
author | Paul Baltescu <pauldb89@gmail.com> | 2013-04-24 17:18:10 +0100 |
---|---|---|
committer | Paul Baltescu <pauldb89@gmail.com> | 2013-04-24 17:18:10 +0100 |
commit | e8b412577b9d3fe2090b9f48443f919cd268c809 (patch) | |
tree | b46a7b51d365519dfb5170d71bac33be6d3e29b9 /klm/lm/builder/pipeline.hh | |
parent | d189426a7ea56b71eb6e25ed02a7b0993cfb56a8 (diff) | |
parent | 5aee54869aa19cfe9be965e67a472e94449d16da (diff) |
Merge branch 'master' of https://github.com/redpony/cdec
Diffstat (limited to 'klm/lm/builder/pipeline.hh')
-rw-r--r-- | klm/lm/builder/pipeline.hh | 9 |
1 files changed, 5 insertions, 4 deletions
diff --git a/klm/lm/builder/pipeline.hh b/klm/lm/builder/pipeline.hh index f1d6c5f6..845e5481 100644 --- a/klm/lm/builder/pipeline.hh +++ b/klm/lm/builder/pipeline.hh @@ -3,6 +3,7 @@ #include "lm/builder/initial_probabilities.hh" #include "lm/builder/header_info.hh" +#include "lm/word_index.hh" #include "util/stream/config.hh" #include "util/file_piece.hh" @@ -19,9 +20,9 @@ struct PipelineConfig { util::stream::ChainConfig read_backoffs; bool verbose_header; - // Amount of memory to assume that the vocabulary hash table will use. This - // is subtracted from total memory for CorpusCount. - std::size_t assume_vocab_hash_size; + // Estimated vocabulary size. Used for sizing CorpusCount memory and + // initial probing hash table sizing, also in CorpusCount. + lm::WordIndex vocab_estimate; // Minimum block size to tolerate. std::size_t minimum_block; @@ -33,7 +34,7 @@ struct PipelineConfig { std::size_t TotalMemory() const { return sort.total_memory; } }; -// Takes ownership of text_file. +// Takes ownership of text_file and out_arpa. void Pipeline(PipelineConfig config, int text_file, int out_arpa); }} // namespaces |