summaryrefslogtreecommitdiff
path: root/klm/lm/builder/pipeline.hh
diff options
context:
space:
mode:
authorPaul Baltescu <pauldb89@gmail.com>2013-04-24 17:18:10 +0100
committerPaul Baltescu <pauldb89@gmail.com>2013-04-24 17:18:10 +0100
commite8b412577b9d3fe2090b9f48443f919cd268c809 (patch)
treeb46a7b51d365519dfb5170d71bac33be6d3e29b9 /klm/lm/builder/pipeline.hh
parentd189426a7ea56b71eb6e25ed02a7b0993cfb56a8 (diff)
parent5aee54869aa19cfe9be965e67a472e94449d16da (diff)
Merge branch 'master' of https://github.com/redpony/cdec
Diffstat (limited to 'klm/lm/builder/pipeline.hh')
-rw-r--r--klm/lm/builder/pipeline.hh9
1 files changed, 5 insertions, 4 deletions
diff --git a/klm/lm/builder/pipeline.hh b/klm/lm/builder/pipeline.hh
index f1d6c5f6..845e5481 100644
--- a/klm/lm/builder/pipeline.hh
+++ b/klm/lm/builder/pipeline.hh
@@ -3,6 +3,7 @@
#include "lm/builder/initial_probabilities.hh"
#include "lm/builder/header_info.hh"
+#include "lm/word_index.hh"
#include "util/stream/config.hh"
#include "util/file_piece.hh"
@@ -19,9 +20,9 @@ struct PipelineConfig {
util::stream::ChainConfig read_backoffs;
bool verbose_header;
- // Amount of memory to assume that the vocabulary hash table will use. This
- // is subtracted from total memory for CorpusCount.
- std::size_t assume_vocab_hash_size;
+ // Estimated vocabulary size. Used for sizing CorpusCount memory and
+ // initial probing hash table sizing, also in CorpusCount.
+ lm::WordIndex vocab_estimate;
// Minimum block size to tolerate.
std::size_t minimum_block;
@@ -33,7 +34,7 @@ struct PipelineConfig {
std::size_t TotalMemory() const { return sort.total_memory; }
};
-// Takes ownership of text_file.
+// Takes ownership of text_file and out_arpa.
void Pipeline(PipelineConfig config, int text_file, int out_arpa);
}} // namespaces