summaryrefslogtreecommitdiff
path: root/klm/lm/search_trie.hh
diff options
context:
space:
mode:
authorKenneth Heafield <github@kheafield.com>2014-01-27 17:42:19 -0800
committerKenneth Heafield <github@kheafield.com>2014-01-27 17:42:19 -0800
commiteecc4cbf5a6e28f910130cd4a58444e9d4701ea9 (patch)
treecb32ace0bef145fd797c1d43f21768b111dbbcab /klm/lm/search_trie.hh
parent5ab6eb44d67a48ea5b366d8b2878f3da7ef960e4 (diff)
KenLM 5cc905bc2d214efa7de2db56a9a672b749a95591
Diffstat (limited to 'klm/lm/search_trie.hh')
-rw-r--r--klm/lm/search_trie.hh23
1 files changed, 11 insertions, 12 deletions
diff --git a/klm/lm/search_trie.hh b/klm/lm/search_trie.hh
index 60be416b..299262a5 100644
--- a/klm/lm/search_trie.hh
+++ b/klm/lm/search_trie.hh
@@ -11,18 +11,19 @@
#include "util/file_piece.hh"
#include <vector>
+#include <cstdlib>
#include <assert.h>
namespace lm {
namespace ngram {
-struct Backing;
+class BinaryFormat;
class SortedVocabulary;
namespace trie {
template <class Quant, class Bhiksha> class TrieSearch;
class SortedFiles;
-template <class Quant, class Bhiksha> void BuildTrie(SortedFiles &files, std::vector<uint64_t> &counts, const Config &config, TrieSearch<Quant, Bhiksha> &out, Quant &quant, const SortedVocabulary &vocab, Backing &backing);
+template <class Quant, class Bhiksha> void BuildTrie(SortedFiles &files, std::vector<uint64_t> &counts, const Config &config, TrieSearch<Quant, Bhiksha> &out, Quant &quant, SortedVocabulary &vocab, BinaryFormat &backing);
template <class Quant, class Bhiksha> class TrieSearch {
public:
@@ -38,11 +39,11 @@ template <class Quant, class Bhiksha> class TrieSearch {
static const unsigned int kVersion = 1;
- static void UpdateConfigFromBinary(int fd, const std::vector<uint64_t> &counts, Config &config) {
- Quant::UpdateConfigFromBinary(fd, counts, config);
- util::AdvanceOrThrow(fd, Quant::Size(counts.size(), config) + Unigram::Size(counts[0]));
+ static void UpdateConfigFromBinary(const BinaryFormat &file, const std::vector<uint64_t> &counts, uint64_t offset, Config &config) {
+ Quant::UpdateConfigFromBinary(file, offset, config);
// Currently the unigram pointers are not compresssed, so there will only be a header for order > 2.
- if (counts.size() > 2) Bhiksha::UpdateConfigFromBinary(fd, config);
+ if (counts.size() > 2)
+ Bhiksha::UpdateConfigFromBinary(file, offset + Quant::Size(counts.size(), config) + Unigram::Size(counts[0]), config);
}
static uint64_t Size(const std::vector<uint64_t> &counts, const Config &config) {
@@ -59,9 +60,7 @@ template <class Quant, class Bhiksha> class TrieSearch {
uint8_t *SetupMemory(uint8_t *start, const std::vector<uint64_t> &counts, const Config &config);
- void LoadedBinary();
-
- void InitializeFromARPA(const char *file, util::FilePiece &f, std::vector<uint64_t> &counts, const Config &config, SortedVocabulary &vocab, Backing &backing);
+ void InitializeFromARPA(const char *file, util::FilePiece &f, std::vector<uint64_t> &counts, const Config &config, SortedVocabulary &vocab, BinaryFormat &backing);
unsigned char Order() const {
return middle_end_ - middle_begin_ + 2;
@@ -102,14 +101,14 @@ template <class Quant, class Bhiksha> class TrieSearch {
}
private:
- friend void BuildTrie<Quant, Bhiksha>(SortedFiles &files, std::vector<uint64_t> &counts, const Config &config, TrieSearch<Quant, Bhiksha> &out, Quant &quant, const SortedVocabulary &vocab, Backing &backing);
+ friend void BuildTrie<Quant, Bhiksha>(SortedFiles &files, std::vector<uint64_t> &counts, const Config &config, TrieSearch<Quant, Bhiksha> &out, Quant &quant, SortedVocabulary &vocab, BinaryFormat &backing);
- // Middles are managed manually so we can delay construction and they don't have to be copyable.
+ // Middles are managed manually so we can delay construction and they don't have to be copyable.
void FreeMiddles() {
for (const Middle *i = middle_begin_; i != middle_end_; ++i) {
i->~Middle();
}
- free(middle_begin_);
+ std::free(middle_begin_);
}
typedef trie::BitPackedMiddle<Bhiksha> Middle;