summaryrefslogtreecommitdiff
path: root/klm/lm/vocab.hh
diff options
context:
space:
mode:
Diffstat (limited to 'klm/lm/vocab.hh')
-rw-r--r--klm/lm/vocab.hh11
1 files changed, 7 insertions, 4 deletions
diff --git a/klm/lm/vocab.hh b/klm/lm/vocab.hh
index a25432f9..3902f117 100644
--- a/klm/lm/vocab.hh
+++ b/klm/lm/vocab.hh
@@ -4,6 +4,7 @@
#include "lm/enumerate_vocab.hh"
#include "lm/lm_exception.hh"
#include "lm/virtual_interface.hh"
+#include "util/pool.hh"
#include "util/probing_hash_table.hh"
#include "util/sorted_uniform.hh"
#include "util/string_piece.hh"
@@ -35,7 +36,7 @@ class WriteWordsWrapper : public EnumerateVocab {
void Add(WordIndex index, const StringPiece &str);
- void Write(int fd);
+ void Write(int fd, uint64_t start);
private:
EnumerateVocab *inner_;
@@ -62,7 +63,7 @@ class SortedVocabulary : public base::Vocabulary {
}
// Size for purposes of file writing
- static size_t Size(std::size_t entries, const Config &config);
+ static uint64_t Size(uint64_t entries, const Config &config);
// Vocab words are [0, Bound()) Only valid after FinishedLoading/LoadedBinary.
WordIndex Bound() const { return bound_; }
@@ -96,7 +97,9 @@ class SortedVocabulary : public base::Vocabulary {
EnumerateVocab *enumerate_;
// Actual strings. Used only when loading from ARPA and enumerate_ != NULL
- std::vector<std::string> strings_to_enumerate_;
+ util::Pool string_backing_;
+
+ std::vector<StringPiece> strings_to_enumerate_;
};
#pragma pack(push)
@@ -129,7 +132,7 @@ class ProbingVocabulary : public base::Vocabulary {
return lookup_.Find(detail::HashForVocab(str), i) ? i->value : 0;
}
- static size_t Size(std::size_t entries, const Config &config);
+ static uint64_t Size(uint64_t entries, const Config &config);
// Vocab words are [0, Bound()).
WordIndex Bound() const { return bound_; }