diff options
author | Kenneth Heafield <github@kheafield.com> | 2013-01-18 17:12:51 +0000 |
---|---|---|
committer | Kenneth Heafield <github@kheafield.com> | 2013-01-18 17:12:51 +0000 |
commit | d884099e0db8b4510847ec106b59ef7dca3c245b (patch) | |
tree | b45a3f17eb002e224a7b728e0f985a15e2503196 /klm/lm/builder/pipeline.hh | |
parent | bae5fe99037ae7e101953ad0df118127191c711c (diff) |
KenLM dffafbf with lmplz source (but not built)
Diffstat (limited to 'klm/lm/builder/pipeline.hh')
-rw-r--r-- | klm/lm/builder/pipeline.hh | 40 |
1 files changed, 40 insertions, 0 deletions
diff --git a/klm/lm/builder/pipeline.hh b/klm/lm/builder/pipeline.hh new file mode 100644 index 00000000..f1d6c5f6 --- /dev/null +++ b/klm/lm/builder/pipeline.hh @@ -0,0 +1,40 @@ +#ifndef LM_BUILDER_PIPELINE__ +#define LM_BUILDER_PIPELINE__ + +#include "lm/builder/initial_probabilities.hh" +#include "lm/builder/header_info.hh" +#include "util/stream/config.hh" +#include "util/file_piece.hh" + +#include <string> +#include <cstddef> + +namespace lm { namespace builder { + +struct PipelineConfig { + std::size_t order; + std::string vocab_file; + util::stream::SortConfig sort; + InitialProbabilitiesConfig initial_probs; + util::stream::ChainConfig read_backoffs; + bool verbose_header; + + // Amount of memory to assume that the vocabulary hash table will use. This + // is subtracted from total memory for CorpusCount. + std::size_t assume_vocab_hash_size; + + // Minimum block size to tolerate. + std::size_t minimum_block; + + // Number of blocks to use. This will be overridden to 1 if everything fits. + std::size_t block_count; + + const std::string &TempPrefix() const { return sort.temp_prefix; } + std::size_t TotalMemory() const { return sort.total_memory; } +}; + +// Takes ownership of text_file. +void Pipeline(PipelineConfig config, int text_file, int out_arpa); + +}} // namespaces +#endif // LM_BUILDER_PIPELINE__ |