summaryrefslogtreecommitdiff
path: root/klm/lm/vocab.cc
diff options
context:
space:
mode:
Diffstat (limited to 'klm/lm/vocab.cc')
-rw-r--r--klm/lm/vocab.cc14
1 files changed, 9 insertions, 5 deletions
diff --git a/klm/lm/vocab.cc b/klm/lm/vocab.cc
index c30428b2..ae79c727 100644
--- a/klm/lm/vocab.cc
+++ b/klm/lm/vocab.cc
@@ -68,15 +68,19 @@ void WriteOrThrow(int fd, const void *data_void, std::size_t size) {
} // namespace
-WriteWordsWrapper::WriteWordsWrapper(EnumerateVocab *inner, int fd) : inner_(inner), fd_(fd) {}
+WriteWordsWrapper::WriteWordsWrapper(EnumerateVocab *inner) : inner_(inner) {}
WriteWordsWrapper::~WriteWordsWrapper() {}
void WriteWordsWrapper::Add(WordIndex index, const StringPiece &str) {
if (inner_) inner_->Add(index, str);
- WriteOrThrow(fd_, str.data(), str.size());
- char null_byte = 0;
- // Inefficient because it's unbuffered. Sue me.
- WriteOrThrow(fd_, &null_byte, 1);
+ buffer_.append(str.data(), str.size());
+ buffer_.push_back(0);
+}
+
+void WriteWordsWrapper::Write(int fd) {
+ if ((off_t)-1 == lseek(fd, 0, SEEK_END))
+ UTIL_THROW(util::ErrnoException, "Failed to seek in binary to vocab words");
+ WriteOrThrow(fd, buffer_.data(), buffer_.size());
}
SortedVocabulary::SortedVocabulary() : begin_(NULL), end_(NULL), enumerate_(NULL) {}