summaryrefslogtreecommitdiff
path: root/klm/lm/binary_format.hh
diff options
context:
space:
mode:
authorKenneth Heafield <github@kheafield.com>2011-08-18 12:14:01 +0100
committerKenneth Heafield <github@kheafield.com>2011-08-18 12:14:01 +0100
commit7607b0a7873f52d6e3ea387bf88c773cbb55f8ee (patch)
tree908fd94fea8d09725bc86ec9b3752b89c78338e5 /klm/lm/binary_format.hh
parentd92124ccc866192e4cdc689f2b41f0324d35dd3b (diff)
KenLM update: Bhiksha's trick, simple test for lms without unk, auto-detect binary files instead of requiring them to be specified at runtime.
Diffstat (limited to 'klm/lm/binary_format.hh')
-rw-r--r--klm/lm/binary_format.hh9
1 files changed, 7 insertions, 2 deletions
diff --git a/klm/lm/binary_format.hh b/klm/lm/binary_format.hh
index 2b32b450..d28cb6c5 100644
--- a/klm/lm/binary_format.hh
+++ b/klm/lm/binary_format.hh
@@ -16,7 +16,12 @@
namespace lm {
namespace ngram {
-typedef enum {HASH_PROBING=0, HASH_SORTED=1, TRIE_SORTED=2, QUANT_TRIE_SORTED=3} ModelType;
+/* Not the best numbering system, but it grew this way for historical reasons
+ * and I want to preserve existing binary files. */
+typedef enum {HASH_PROBING=0, HASH_SORTED=1, TRIE_SORTED=2, QUANT_TRIE_SORTED=3, ARRAY_TRIE_SORTED=4, QUANT_ARRAY_TRIE_SORTED=5} ModelType;
+
+const static ModelType kQuantAdd = static_cast<ModelType>(QUANT_TRIE_SORTED - TRIE_SORTED);
+const static ModelType kArrayAdd = static_cast<ModelType>(ARRAY_TRIE_SORTED - TRIE_SORTED);
/*Inspect a file to determine if it is a binary lm. If not, return false.
* If so, return true and set recognized to the type. This is the only API in
@@ -55,7 +60,7 @@ void AdvanceOrThrow(int fd, off_t off);
// Create just enough of a binary file to write vocabulary to it.
uint8_t *SetupJustVocab(const Config &config, uint8_t order, std::size_t memory_size, Backing &backing);
// Grow the binary file for the search data structure and set backing.search, returning the memory address where the search data structure should begin.
-uint8_t *GrowForSearch(const Config &config, std::size_t memory_size, Backing &backing);
+uint8_t *GrowForSearch(const Config &config, std::size_t vocab_pad, std::size_t memory_size, Backing &backing);
// Write header to binary file. This is done last to prevent incomplete files
// from loading.