blob: f1d6c5f61366c9e5cf9a5936929939f7632f8f03 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
|
#ifndef LM_BUILDER_PIPELINE__
#define LM_BUILDER_PIPELINE__
#include "lm/builder/initial_probabilities.hh"
#include "lm/builder/header_info.hh"
#include "util/stream/config.hh"
#include "util/file_piece.hh"
#include <string>
#include <cstddef>
namespace lm { namespace builder {
struct PipelineConfig {
std::size_t order;
std::string vocab_file;
util::stream::SortConfig sort;
InitialProbabilitiesConfig initial_probs;
util::stream::ChainConfig read_backoffs;
bool verbose_header;
// Amount of memory to assume that the vocabulary hash table will use. This
// is subtracted from total memory for CorpusCount.
std::size_t assume_vocab_hash_size;
// Minimum block size to tolerate.
std::size_t minimum_block;
// Number of blocks to use. This will be overridden to 1 if everything fits.
std::size_t block_count;
const std::string &TempPrefix() const { return sort.temp_prefix; }
std::size_t TotalMemory() const { return sort.total_memory; }
};
// Takes ownership of text_file.
void Pipeline(PipelineConfig config, int text_file, int out_arpa);
}} // namespaces
#endif // LM_BUILDER_PIPELINE__
|