summaryrefslogtreecommitdiff
path: root/klm/lm/ngram_config.hh
diff options
context:
space:
mode:
Diffstat (limited to 'klm/lm/ngram_config.hh')
-rw-r--r--klm/lm/ngram_config.hh58
1 files changed, 58 insertions, 0 deletions
diff --git a/klm/lm/ngram_config.hh b/klm/lm/ngram_config.hh
new file mode 100644
index 00000000..a7b3afae
--- /dev/null
+++ b/klm/lm/ngram_config.hh
@@ -0,0 +1,58 @@
+#ifndef LM_NGRAM_CONFIG__
+#define LM_NGRAM_CONFIG__
+
+/* Configuration for ngram model. Separate header to reduce pollution. */
+
+#include <iostream>
+
+namespace lm { namespace ngram {
+
+struct Config {
+ /* EFFECTIVE FOR BOTH ARPA AND BINARY READS */
+ // Where to log messages including the progress bar. Set to NULL for
+ // silence.
+ std::ostream *messages;
+
+
+
+ /* ONLY EFFECTIVE WHEN READING ARPA */
+
+ // What to do when <unk> isn't in the provided model.
+ typedef enum {THROW_UP, COMPLAIN, SILENT} UnknownMissing;
+ UnknownMissing unknown_missing;
+
+ // The probability to substitute for <unk> if it's missing from the model.
+ // No effect if the model has <unk> or unknown_missing == THROW_UP.
+ float unknown_missing_prob;
+
+ // Size multiplier for probing hash table. Must be > 1. Space is linear in
+ // this. Time is probing_multiplier / (probing_multiplier - 1). No effect
+ // for sorted variant.
+ // If you find yourself setting this to a low number, consider using the
+ // Sorted version instead which has lower memory consumption.
+ float probing_multiplier;
+
+ // While loading an ARPA file, also write out this binary format file. Set
+ // to NULL to disable.
+ const char *write_mmap;
+
+
+
+ /* ONLY EFFECTIVE WHEN READING BINARY */
+ bool prefault;
+
+
+
+ // Defaults.
+ Config() :
+ messages(&std::cerr),
+ unknown_missing(COMPLAIN),
+ unknown_missing_prob(0.0),
+ probing_multiplier(1.5),
+ write_mmap(NULL),
+ prefault(false) {}
+};
+
+} /* namespace ngram */ } /* namespace lm */
+
+#endif // LM_NGRAM_CONFIG__