summaryrefslogtreecommitdiff
path: root/klm/lm/binary_format.hh
diff options
context:
space:
mode:
authorKenneth Heafield <kheafiel@cluster12.lti.ece.cmu.local>2011-05-20 16:19:04 -0400
committerKenneth Heafield <kheafiel@cluster12.lti.ece.cmu.local>2011-05-20 16:19:04 -0400
commit461c2670efb0968ccc6789ff0c9ca6f88ab31e80 (patch)
tree6b344dcf320674213fd449e6a4915236ad78c29f /klm/lm/binary_format.hh
parent0e7b303879baf95a8167194ad7c75ef738e79f15 (diff)
kenlm update including being nicer to NFS
Diffstat (limited to 'klm/lm/binary_format.hh')
-rw-r--r--klm/lm/binary_format.hh13
1 files changed, 10 insertions, 3 deletions
diff --git a/klm/lm/binary_format.hh b/klm/lm/binary_format.hh
index 72d8c159..1fc71be4 100644
--- a/klm/lm/binary_format.hh
+++ b/klm/lm/binary_format.hh
@@ -18,6 +18,12 @@ namespace ngram {
typedef enum {HASH_PROBING=0, HASH_SORTED=1, TRIE_SORTED=2} ModelType;
+/*Inspect a file to determine if it is a binary lm. If not, return false.
+ * If so, return true and set recognized to the type. This is the only API in
+ * this header designed for use by decoder authors.
+ */
+bool RecognizeBinary(const char *file, ModelType &recognized);
+
struct FixedWidthParameters {
unsigned char order;
float probing_multiplier;
@@ -27,6 +33,7 @@ struct FixedWidthParameters {
bool has_vocabulary;
};
+// Parameters stored in the header of a binary file.
struct Parameters {
FixedWidthParameters fixed;
std::vector<uint64_t> counts;
@@ -41,10 +48,13 @@ struct Backing {
util::scoped_memory search;
};
+// Create just enough of a binary file to write vocabulary to it.
uint8_t *SetupJustVocab(const Config &config, uint8_t order, std::size_t memory_size, Backing &backing);
// Grow the binary file for the search data structure and set backing.search, returning the memory address where the search data structure should begin.
uint8_t *GrowForSearch(const Config &config, std::size_t memory_size, Backing &backing);
+// Write header to binary file. This is done last to prevent incomplete files
+// from loading.
void FinishFile(const Config &config, ModelType model_type, const std::vector<uint64_t> &counts, Backing &backing);
namespace detail {
@@ -61,8 +71,6 @@ void ComplainAboutARPA(const Config &config, ModelType model_type);
} // namespace detail
-bool RecognizeBinary(const char *file, ModelType &recognized);
-
template <class To> void LoadLM(const char *file, const Config &config, To &to) {
Backing &backing = to.MutableBacking();
backing.file.reset(util::OpenReadOrThrow(file));
@@ -86,7 +94,6 @@ template <class To> void LoadLM(const char *file, const Config &config, To &to)
e << " File: " << file;
throw;
}
-
}
} // namespace ngram