diff options
author | armatthews <armatthe@cmu.edu> | 2014-10-13 14:59:23 -0400 |
---|---|---|
committer | armatthews <armatthe@cmu.edu> | 2014-10-13 14:59:23 -0400 |
commit | b26cda84e05d4523eee069234a975a0153bf8608 (patch) | |
tree | 61c9da4f8dd6070f27c8e81812a76fc0a8cf2d8d /klm/lm/builder/corpus_count.hh | |
parent | cd7bc67f475fdfd07fba003ac4cca40e83944740 (diff) | |
parent | b1ed81ef3216b212295afa76c5d20a56fb647204 (diff) |
Merge branch 'master' of github.com:redpony/cdec
Diffstat (limited to 'klm/lm/builder/corpus_count.hh')
-rw-r--r-- | klm/lm/builder/corpus_count.hh | 11 |
1 files changed, 7 insertions, 4 deletions
diff --git a/klm/lm/builder/corpus_count.hh b/klm/lm/builder/corpus_count.hh index aa0ed8ed..da4ff9fc 100644 --- a/klm/lm/builder/corpus_count.hh +++ b/klm/lm/builder/corpus_count.hh @@ -1,6 +1,7 @@ -#ifndef LM_BUILDER_CORPUS_COUNT__ -#define LM_BUILDER_CORPUS_COUNT__ +#ifndef LM_BUILDER_CORPUS_COUNT_H +#define LM_BUILDER_CORPUS_COUNT_H +#include "lm/lm_exception.hh" #include "lm/word_index.hh" #include "util/scoped.hh" @@ -28,7 +29,7 @@ class CorpusCount { // token_count: out. // type_count aka vocabulary size. Initialize to an estimate. It is set to the exact value. - CorpusCount(util::FilePiece &from, int vocab_write, uint64_t &token_count, WordIndex &type_count, std::size_t entries_per_block); + CorpusCount(util::FilePiece &from, int vocab_write, uint64_t &token_count, WordIndex &type_count, std::size_t entries_per_block, WarningAction disallowed_symbol); void Run(const util::stream::ChainPosition &position); @@ -40,8 +41,10 @@ class CorpusCount { std::size_t dedupe_mem_size_; util::scoped_malloc dedupe_mem_; + + WarningAction disallowed_symbol_action_; }; } // namespace builder } // namespace lm -#endif // LM_BUILDER_CORPUS_COUNT__ +#endif // LM_BUILDER_CORPUS_COUNT_H |