From db960a8bba81df3217660ec5a96d73e0d6baa01b Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Wed, 24 Apr 2013 10:12:41 +0100 Subject: KenLM 0831569c3137536165b107c6841603c725dfa2b1 --- klm/lm/builder/print.cc | 74 ++----------------------------------------------- 1 file changed, 3 insertions(+), 71 deletions(-) (limited to 'klm/lm/builder/print.cc') diff --git a/klm/lm/builder/print.cc b/klm/lm/builder/print.cc index b0323221..84bd81ca 100644 --- a/klm/lm/builder/print.cc +++ b/klm/lm/builder/print.cc @@ -1,15 +1,11 @@ #include "lm/builder/print.hh" -#include "util/double-conversion/double-conversion.h" -#include "util/double-conversion/utils.h" +#include "util/fake_ofstream.hh" #include "util/file.hh" #include "util/mmap.hh" #include "util/scoped.hh" #include "util/stream/timer.hh" -#define BOOST_LEXICAL_CAST_ASSUME_C_LOCALE -#include - #include #include @@ -28,71 +24,6 @@ VocabReconstitute::VocabReconstitute(int fd) { map_.push_back(i); } -namespace { -class OutputManager { - public: - static const std::size_t kOutBuf = 1048576; - - // Does not take ownership of out. - explicit OutputManager(int out) - : buf_(util::MallocOrThrow(kOutBuf)), - builder_(static_cast(buf_.get()), kOutBuf), - // Mostly the default but with inf instead. And no flags. - convert_(double_conversion::DoubleToStringConverter::NO_FLAGS, "inf", "NaN", 'e', -6, 21, 6, 0), - fd_(out) {} - - ~OutputManager() { - Flush(); - } - - OutputManager &operator<<(float value) { - // Odd, but this is the largest number found in the comments. - EnsureRemaining(double_conversion::DoubleToStringConverter::kMaxPrecisionDigits + 8); - convert_.ToShortestSingle(value, &builder_); - return *this; - } - - OutputManager &operator<<(StringPiece str) { - if (str.size() > kOutBuf) { - Flush(); - util::WriteOrThrow(fd_, str.data(), str.size()); - } else { - EnsureRemaining(str.size()); - builder_.AddSubstring(str.data(), str.size()); - } - return *this; - } - - // Inefficient! - OutputManager &operator<<(unsigned val) { - return *this << boost::lexical_cast(val); - } - - OutputManager &operator<<(char c) { - EnsureRemaining(1); - builder_.AddCharacter(c); - return *this; - } - - void Flush() { - util::WriteOrThrow(fd_, buf_.get(), builder_.position()); - builder_.Reset(); - } - - private: - void EnsureRemaining(std::size_t amount) { - if (static_cast(builder_.size() - builder_.position()) < amount) { - Flush(); - } - } - - util::scoped_malloc buf_; - double_conversion::StringBuilder builder_; - double_conversion::DoubleToStringConverter convert_; - int fd_; -}; -} // namespace - PrintARPA::PrintARPA(const VocabReconstitute &vocab, const std::vector &counts, const HeaderInfo* header_info, int out_fd) : vocab_(vocab), out_fd_(out_fd) { std::stringstream stream; @@ -112,8 +43,9 @@ PrintARPA::PrintARPA(const VocabReconstitute &vocab, const std::vector } void PrintARPA::Run(const ChainPositions &positions) { + util::scoped_fd closer(out_fd_); UTIL_TIMER("(%w s) Wrote ARPA file\n"); - OutputManager out(out_fd_); + util::FakeOFStream out(out_fd_); for (unsigned order = 1; order <= positions.size(); ++order) { out << "\\" << order << "-grams:" << '\n'; for (NGramStream stream(positions[order - 1]); stream; ++stream) { -- cgit v1.2.3