summaryrefslogtreecommitdiff
path: root/klm/lm/builder/pipeline.hh
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2013-01-21 12:29:43 +0100
committerPatrick Simianer <p@simianer.de>2013-01-21 12:29:43 +0100
commit50f22047eb1b7f2d60e85cdcf0fcd86342e50523 (patch)
tree730dabaf2fa57b1e4536d40f036b46795d37f289 /klm/lm/builder/pipeline.hh
parent8b399cb09513cd79ed4182be9f75882c1e1b336a (diff)
parent608886384da40aedfabd629c882b8ea9b3f6348e (diff)
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'klm/lm/builder/pipeline.hh')
-rw-r--r--klm/lm/builder/pipeline.hh40
1 files changed, 40 insertions, 0 deletions
diff --git a/klm/lm/builder/pipeline.hh b/klm/lm/builder/pipeline.hh
new file mode 100644
index 00000000..f1d6c5f6
--- /dev/null
+++ b/klm/lm/builder/pipeline.hh
@@ -0,0 +1,40 @@
+#ifndef LM_BUILDER_PIPELINE__
+#define LM_BUILDER_PIPELINE__
+
+#include "lm/builder/initial_probabilities.hh"
+#include "lm/builder/header_info.hh"
+#include "util/stream/config.hh"
+#include "util/file_piece.hh"
+
+#include <string>
+#include <cstddef>
+
+namespace lm { namespace builder {
+
+struct PipelineConfig {
+ std::size_t order;
+ std::string vocab_file;
+ util::stream::SortConfig sort;
+ InitialProbabilitiesConfig initial_probs;
+ util::stream::ChainConfig read_backoffs;
+ bool verbose_header;
+
+ // Amount of memory to assume that the vocabulary hash table will use. This
+ // is subtracted from total memory for CorpusCount.
+ std::size_t assume_vocab_hash_size;
+
+ // Minimum block size to tolerate.
+ std::size_t minimum_block;
+
+ // Number of blocks to use. This will be overridden to 1 if everything fits.
+ std::size_t block_count;
+
+ const std::string &TempPrefix() const { return sort.temp_prefix; }
+ std::size_t TotalMemory() const { return sort.total_memory; }
+};
+
+// Takes ownership of text_file.
+void Pipeline(PipelineConfig config, int text_file, int out_arpa);
+
+}} // namespaces
+#endif // LM_BUILDER_PIPELINE__