diff options
author | Kenneth Heafield <github@kheafield.com> | 2013-05-19 10:02:45 -0400 |
---|---|---|
committer | Kenneth Heafield <github@kheafield.com> | 2013-05-19 10:02:45 -0400 |
commit | 596033ec83d1365d7a5df31d28f91347320fe155 (patch) | |
tree | 522933405aa0f0a4eac8b0b103df5c76a9327160 | |
parent | 1d96c9c9c521edfca1d10e4d8c5064f79fda3ec5 (diff) |
KenLM 10ddf7d92335 Comment some asserts, update uusage
-rw-r--r-- | klm/lm/search_trie.hh | 3 | ||||
-rw-r--r-- | klm/util/double-conversion/utils.h | 9 | ||||
-rw-r--r-- | klm/util/usage.cc | 65 |
3 files changed, 55 insertions, 22 deletions
diff --git a/klm/lm/search_trie.hh b/klm/lm/search_trie.hh index 1264baf5..60be416b 100644 --- a/klm/lm/search_trie.hh +++ b/klm/lm/search_trie.hh @@ -41,7 +41,8 @@ template <class Quant, class Bhiksha> class TrieSearch { static void UpdateConfigFromBinary(int fd, const std::vector<uint64_t> &counts, Config &config) { Quant::UpdateConfigFromBinary(fd, counts, config); util::AdvanceOrThrow(fd, Quant::Size(counts.size(), config) + Unigram::Size(counts[0])); - Bhiksha::UpdateConfigFromBinary(fd, config); + // Currently the unigram pointers are not compresssed, so there will only be a header for order > 2. + if (counts.size() > 2) Bhiksha::UpdateConfigFromBinary(fd, config); } static uint64_t Size(const std::vector<uint64_t> &counts, const Config &config) { diff --git a/klm/util/double-conversion/utils.h b/klm/util/double-conversion/utils.h index 767094b8..2bd71605 100644 --- a/klm/util/double-conversion/utils.h +++ b/klm/util/double-conversion/utils.h @@ -218,7 +218,8 @@ class StringBuilder { // 0-characters; use the Finalize() method to terminate the string // instead. void AddCharacter(char c) { - ASSERT(c != '\0'); + // I just extract raw data not a cstr so null is fine. + //ASSERT(c != '\0'); ASSERT(!is_finalized() && position_ < buffer_.length()); buffer_[position_++] = c; } @@ -233,7 +234,8 @@ class StringBuilder { // builder. The input string must have enough characters. void AddSubstring(const char* s, int n) { ASSERT(!is_finalized() && position_ + n < buffer_.length()); - ASSERT(static_cast<size_t>(n) <= strlen(s)); + // I just extract raw data not a cstr so null is fine. + //ASSERT(static_cast<size_t>(n) <= strlen(s)); memmove(&buffer_[position_], s, n * kCharSize); position_ += n; } @@ -253,7 +255,8 @@ class StringBuilder { buffer_[position_] = '\0'; // Make sure nobody managed to add a 0-character to the // buffer while building the string. - ASSERT(strlen(buffer_.start()) == static_cast<size_t>(position_)); + // I just extract raw data not a cstr so null is fine. + //ASSERT(strlen(buffer_.start()) == static_cast<size_t>(position_)); position_ = -1; ASSERT(is_finalized()); return buffer_.start(); diff --git a/klm/util/usage.cc b/klm/util/usage.cc index ad4dc7b4..5fa3cc9a 100644 --- a/klm/util/usage.cc +++ b/klm/util/usage.cc @@ -5,51 +5,80 @@ #include <fstream> #include <ostream> #include <sstream> +#include <set> +#include <string> #include <string.h> #include <ctype.h> #if !defined(_WIN32) && !defined(_WIN64) #include <sys/resource.h> #include <sys/time.h> +#include <time.h> #include <unistd.h> #endif namespace util { -namespace { #if !defined(_WIN32) && !defined(_WIN64) +namespace { float FloatSec(const struct timeval &tv) { return static_cast<float>(tv.tv_sec) + (static_cast<float>(tv.tv_usec) / 1000000.0); } -#endif +float FloatSec(const struct timespec &tv) { + return static_cast<float>(tv.tv_sec) + (static_cast<float>(tv.tv_nsec) / 1000000000.0); +} const char *SkipSpaces(const char *at) { - for (; *at == ' '; ++at) {} + for (; *at == ' ' || *at == '\t'; ++at) {} return at; } + +class RecordStart { + public: + RecordStart() { + clock_gettime(CLOCK_MONOTONIC, &started_); + } + + const struct timespec &Started() const { + return started_; + } + + private: + struct timespec started_; +}; + +const RecordStart kRecordStart; } // namespace +#endif void PrintUsage(std::ostream &out) { #if !defined(_WIN32) && !defined(_WIN64) + // Linux doesn't set memory usage in getrusage :-( + std::set<std::string> headers; + headers.insert("VmPeak:"); + headers.insert("VmRSS:"); + headers.insert("Name:"); + + std::ifstream status("/proc/self/status", std::ios::in); + std::string header, value; + while ((status >> header) && getline(status, value)) { + if (headers.find(header) != headers.end()) { + out << header << SkipSpaces(value.c_str()) << '\t'; + } + } + struct rusage usage; - if (getrusage(RUSAGE_SELF, &usage)) { + if (getrusage(RUSAGE_CHILDREN, &usage)) { perror("getrusage"); return; } - out << "user\t" << FloatSec(usage.ru_utime) << "\nsys\t" << FloatSec(usage.ru_stime) << '\n'; - out << "CPU\t" << (FloatSec(usage.ru_utime) + FloatSec(usage.ru_stime)) << '\n'; - // Linux doesn't set memory usage :-(. - std::ifstream status("/proc/self/status", std::ios::in); - std::string line; - while (getline(status, line)) { - if (!strncmp(line.c_str(), "VmRSS:\t", 7)) { - out << "RSSCur\t" << SkipSpaces(line.c_str() + 7) << '\n'; - break; - } else if (!strncmp(line.c_str(), "VmPeak:\t", 8)) { - out << "VmPeak\t" << SkipSpaces(line.c_str() + 8) << '\n'; - } - } - out << "RSSMax\t" << usage.ru_maxrss << " kB" << '\n'; + out << "RSSMax:" << usage.ru_maxrss << " kB" << '\t'; + out << "user:" << FloatSec(usage.ru_utime) << "\tsys:" << FloatSec(usage.ru_stime) << '\t'; + out << "CPU:" << (FloatSec(usage.ru_utime) + FloatSec(usage.ru_stime)); + + struct timespec current; + clock_gettime(CLOCK_MONOTONIC, ¤t); + out << "\treal:" << (FloatSec(current) - FloatSec(kRecordStart.Started())) << '\n'; #endif } |