diff options
author | Patrick Simianer <p@simianer.de> | 2013-01-21 12:29:43 +0100 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2013-01-21 12:29:43 +0100 |
commit | 0d23f8aecbfaf982cd165ebfc2a1611cefcc7275 (patch) | |
tree | 8eafa6ea43224ff70635cadd4d6f027d28f4986f /klm/lm/builder/pipeline.hh | |
parent | dbc66cd3944321961c5e11d5254fd914f05a98ad (diff) | |
parent | 7cac43b858f3b681555bf0578f54b1f822c43207 (diff) |
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'klm/lm/builder/pipeline.hh')
-rw-r--r-- | klm/lm/builder/pipeline.hh | 40 |
1 files changed, 40 insertions, 0 deletions
diff --git a/klm/lm/builder/pipeline.hh b/klm/lm/builder/pipeline.hh new file mode 100644 index 00000000..f1d6c5f6 --- /dev/null +++ b/klm/lm/builder/pipeline.hh @@ -0,0 +1,40 @@ +#ifndef LM_BUILDER_PIPELINE__ +#define LM_BUILDER_PIPELINE__ + +#include "lm/builder/initial_probabilities.hh" +#include "lm/builder/header_info.hh" +#include "util/stream/config.hh" +#include "util/file_piece.hh" + +#include <string> +#include <cstddef> + +namespace lm { namespace builder { + +struct PipelineConfig { + std::size_t order; + std::string vocab_file; + util::stream::SortConfig sort; + InitialProbabilitiesConfig initial_probs; + util::stream::ChainConfig read_backoffs; + bool verbose_header; + + // Amount of memory to assume that the vocabulary hash table will use. This + // is subtracted from total memory for CorpusCount. + std::size_t assume_vocab_hash_size; + + // Minimum block size to tolerate. + std::size_t minimum_block; + + // Number of blocks to use. This will be overridden to 1 if everything fits. + std::size_t block_count; + + const std::string &TempPrefix() const { return sort.temp_prefix; } + std::size_t TotalMemory() const { return sort.total_memory; } +}; + +// Takes ownership of text_file. +void Pipeline(PipelineConfig config, int text_file, int out_arpa); + +}} // namespaces +#endif // LM_BUILDER_PIPELINE__ |