summaryrefslogtreecommitdiff
path: root/klm/lm/builder/corpus_count.hh
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2014-10-13 19:03:48 +0100
committerPatrick Simianer <p@simianer.de>2014-10-13 19:03:48 +0100
commitcb9fb7088dde35881516c088db402abe747d49fa (patch)
treea91e4935a7941f1b261f76d88ab41fa3078a1891 /klm/lm/builder/corpus_count.hh
parent0a00e57e921c8eca8e02364db7d2e6607bfdcebc (diff)
parentb1ed81ef3216b212295afa76c5d20a56fb647204 (diff)
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'klm/lm/builder/corpus_count.hh')
-rw-r--r--klm/lm/builder/corpus_count.hh11
1 files changed, 7 insertions, 4 deletions
diff --git a/klm/lm/builder/corpus_count.hh b/klm/lm/builder/corpus_count.hh
index aa0ed8ed..da4ff9fc 100644
--- a/klm/lm/builder/corpus_count.hh
+++ b/klm/lm/builder/corpus_count.hh
@@ -1,6 +1,7 @@
-#ifndef LM_BUILDER_CORPUS_COUNT__
-#define LM_BUILDER_CORPUS_COUNT__
+#ifndef LM_BUILDER_CORPUS_COUNT_H
+#define LM_BUILDER_CORPUS_COUNT_H
+#include "lm/lm_exception.hh"
#include "lm/word_index.hh"
#include "util/scoped.hh"
@@ -28,7 +29,7 @@ class CorpusCount {
// token_count: out.
// type_count aka vocabulary size. Initialize to an estimate. It is set to the exact value.
- CorpusCount(util::FilePiece &from, int vocab_write, uint64_t &token_count, WordIndex &type_count, std::size_t entries_per_block);
+ CorpusCount(util::FilePiece &from, int vocab_write, uint64_t &token_count, WordIndex &type_count, std::size_t entries_per_block, WarningAction disallowed_symbol);
void Run(const util::stream::ChainPosition &position);
@@ -40,8 +41,10 @@ class CorpusCount {
std::size_t dedupe_mem_size_;
util::scoped_malloc dedupe_mem_;
+
+ WarningAction disallowed_symbol_action_;
};
} // namespace builder
} // namespace lm
-#endif // LM_BUILDER_CORPUS_COUNT__
+#endif // LM_BUILDER_CORPUS_COUNT_H