summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKenneth Heafield <github@kheafield.com>2013-05-19 10:02:45 -0400
committerKenneth Heafield <github@kheafield.com>2013-05-19 10:02:45 -0400
commit596033ec83d1365d7a5df31d28f91347320fe155 (patch)
tree522933405aa0f0a4eac8b0b103df5c76a9327160
parent1d96c9c9c521edfca1d10e4d8c5064f79fda3ec5 (diff)
KenLM 10ddf7d92335 Comment some asserts, update uusage
-rw-r--r--klm/lm/search_trie.hh3
-rw-r--r--klm/util/double-conversion/utils.h9
-rw-r--r--klm/util/usage.cc65
3 files changed, 55 insertions, 22 deletions
diff --git a/klm/lm/search_trie.hh b/klm/lm/search_trie.hh
index 1264baf5..60be416b 100644
--- a/klm/lm/search_trie.hh
+++ b/klm/lm/search_trie.hh
@@ -41,7 +41,8 @@ template <class Quant, class Bhiksha> class TrieSearch {
static void UpdateConfigFromBinary(int fd, const std::vector<uint64_t> &counts, Config &config) {
Quant::UpdateConfigFromBinary(fd, counts, config);
util::AdvanceOrThrow(fd, Quant::Size(counts.size(), config) + Unigram::Size(counts[0]));
- Bhiksha::UpdateConfigFromBinary(fd, config);
+ // Currently the unigram pointers are not compresssed, so there will only be a header for order > 2.
+ if (counts.size() > 2) Bhiksha::UpdateConfigFromBinary(fd, config);
}
static uint64_t Size(const std::vector<uint64_t> &counts, const Config &config) {
diff --git a/klm/util/double-conversion/utils.h b/klm/util/double-conversion/utils.h
index 767094b8..2bd71605 100644
--- a/klm/util/double-conversion/utils.h
+++ b/klm/util/double-conversion/utils.h
@@ -218,7 +218,8 @@ class StringBuilder {
// 0-characters; use the Finalize() method to terminate the string
// instead.
void AddCharacter(char c) {
- ASSERT(c != '\0');
+ // I just extract raw data not a cstr so null is fine.
+ //ASSERT(c != '\0');
ASSERT(!is_finalized() && position_ < buffer_.length());
buffer_[position_++] = c;
}
@@ -233,7 +234,8 @@ class StringBuilder {
// builder. The input string must have enough characters.
void AddSubstring(const char* s, int n) {
ASSERT(!is_finalized() && position_ + n < buffer_.length());
- ASSERT(static_cast<size_t>(n) <= strlen(s));
+ // I just extract raw data not a cstr so null is fine.
+ //ASSERT(static_cast<size_t>(n) <= strlen(s));
memmove(&buffer_[position_], s, n * kCharSize);
position_ += n;
}
@@ -253,7 +255,8 @@ class StringBuilder {
buffer_[position_] = '\0';
// Make sure nobody managed to add a 0-character to the
// buffer while building the string.
- ASSERT(strlen(buffer_.start()) == static_cast<size_t>(position_));
+ // I just extract raw data not a cstr so null is fine.
+ //ASSERT(strlen(buffer_.start()) == static_cast<size_t>(position_));
position_ = -1;
ASSERT(is_finalized());
return buffer_.start();
diff --git a/klm/util/usage.cc b/klm/util/usage.cc
index ad4dc7b4..5fa3cc9a 100644
--- a/klm/util/usage.cc
+++ b/klm/util/usage.cc
@@ -5,51 +5,80 @@
#include <fstream>
#include <ostream>
#include <sstream>
+#include <set>
+#include <string>
#include <string.h>
#include <ctype.h>
#if !defined(_WIN32) && !defined(_WIN64)
#include <sys/resource.h>
#include <sys/time.h>
+#include <time.h>
#include <unistd.h>
#endif
namespace util {
-namespace {
#if !defined(_WIN32) && !defined(_WIN64)
+namespace {
float FloatSec(const struct timeval &tv) {
return static_cast<float>(tv.tv_sec) + (static_cast<float>(tv.tv_usec) / 1000000.0);
}
-#endif
+float FloatSec(const struct timespec &tv) {
+ return static_cast<float>(tv.tv_sec) + (static_cast<float>(tv.tv_nsec) / 1000000000.0);
+}
const char *SkipSpaces(const char *at) {
- for (; *at == ' '; ++at) {}
+ for (; *at == ' ' || *at == '\t'; ++at) {}
return at;
}
+
+class RecordStart {
+ public:
+ RecordStart() {
+ clock_gettime(CLOCK_MONOTONIC, &started_);
+ }
+
+ const struct timespec &Started() const {
+ return started_;
+ }
+
+ private:
+ struct timespec started_;
+};
+
+const RecordStart kRecordStart;
} // namespace
+#endif
void PrintUsage(std::ostream &out) {
#if !defined(_WIN32) && !defined(_WIN64)
+ // Linux doesn't set memory usage in getrusage :-(
+ std::set<std::string> headers;
+ headers.insert("VmPeak:");
+ headers.insert("VmRSS:");
+ headers.insert("Name:");
+
+ std::ifstream status("/proc/self/status", std::ios::in);
+ std::string header, value;
+ while ((status >> header) && getline(status, value)) {
+ if (headers.find(header) != headers.end()) {
+ out << header << SkipSpaces(value.c_str()) << '\t';
+ }
+ }
+
struct rusage usage;
- if (getrusage(RUSAGE_SELF, &usage)) {
+ if (getrusage(RUSAGE_CHILDREN, &usage)) {
perror("getrusage");
return;
}
- out << "user\t" << FloatSec(usage.ru_utime) << "\nsys\t" << FloatSec(usage.ru_stime) << '\n';
- out << "CPU\t" << (FloatSec(usage.ru_utime) + FloatSec(usage.ru_stime)) << '\n';
- // Linux doesn't set memory usage :-(.
- std::ifstream status("/proc/self/status", std::ios::in);
- std::string line;
- while (getline(status, line)) {
- if (!strncmp(line.c_str(), "VmRSS:\t", 7)) {
- out << "RSSCur\t" << SkipSpaces(line.c_str() + 7) << '\n';
- break;
- } else if (!strncmp(line.c_str(), "VmPeak:\t", 8)) {
- out << "VmPeak\t" << SkipSpaces(line.c_str() + 8) << '\n';
- }
- }
- out << "RSSMax\t" << usage.ru_maxrss << " kB" << '\n';
+ out << "RSSMax:" << usage.ru_maxrss << " kB" << '\t';
+ out << "user:" << FloatSec(usage.ru_utime) << "\tsys:" << FloatSec(usage.ru_stime) << '\t';
+ out << "CPU:" << (FloatSec(usage.ru_utime) + FloatSec(usage.ru_stime));
+
+ struct timespec current;
+ clock_gettime(CLOCK_MONOTONIC, &current);
+ out << "\treal:" << (FloatSec(current) - FloatSec(kRecordStart.Started())) << '\n';
#endif
}