diff options
author | Kenneth Heafield <github@kheafield.com> | 2012-10-22 12:07:20 +0100 |
---|---|---|
committer | Kenneth Heafield <github@kheafield.com> | 2012-10-22 12:07:20 +0100 |
commit | 5f98fe5c4f2a2090eeb9d30c030305a70a8347d1 (patch) | |
tree | 9b6002f850e6dea1e3400c6b19bb31a9cdf3067f /gi/pf/unigrams.cc | |
parent | cf9994131993b40be62e90e213b1e11e6b550143 (diff) | |
parent | 21825a09d97c2e0afd20512f306fb25fed55e529 (diff) |
Merge remote branch 'upstream/master'
Conflicts:
Jamroot
bjam
decoder/Jamfile
decoder/cdec.cc
dpmert/Jamfile
jam-files/sanity.jam
klm/lm/Jamfile
klm/util/Jamfile
mira/Jamfile
Diffstat (limited to 'gi/pf/unigrams.cc')
-rw-r--r-- | gi/pf/unigrams.cc | 80 |
1 files changed, 0 insertions, 80 deletions
diff --git a/gi/pf/unigrams.cc b/gi/pf/unigrams.cc deleted file mode 100644 index 40829775..00000000 --- a/gi/pf/unigrams.cc +++ /dev/null @@ -1,80 +0,0 @@ -#include "unigrams.h" - -#include <string> -#include <cmath> - -#include "stringlib.h" -#include "filelib.h" - -using namespace std; - -void UnigramModel::LoadUnigrams(const string& fname) { - cerr << "Loading unigram probabilities from " << fname << " ..." << endl; - ReadFile rf(fname); - string line; - istream& in = *rf.stream(); - assert(in); - getline(in, line); - assert(line.empty()); - getline(in, line); - assert(line == "\\data\\"); - getline(in, line); - size_t pos = line.find("ngram 1="); - assert(pos == 0); - assert(line.size() > 8); - const size_t num_unigrams = atoi(&line[8]); - getline(in, line); - assert(line.empty()); - getline(in, line); - assert(line == "\\1-grams:"); - for (size_t i = 0; i < num_unigrams; ++i) { - getline(in, line); - assert(line.size() > 0); - pos = line.find('\t'); - assert(pos > 0); - assert(pos + 1 < line.size()); - const WordID w = TD::Convert(line.substr(pos + 1)); - line[pos] = 0; - float p = atof(&line[0]); - if (w < probs_.size()) probs_[w].logeq(p * log(10)); else cerr << "WARNING: don't know about '" << TD::Convert(w) << "'\n"; - } -} - -void UnigramWordModel::LoadUnigrams(const string& fname) { - cerr << "Loading unigram probabilities from " << fname << " ..." << endl; - ReadFile rf(fname); - string line; - istream& in = *rf.stream(); - assert(in); - getline(in, line); - assert(line.empty()); - getline(in, line); - assert(line == "\\data\\"); - getline(in, line); - size_t pos = line.find("ngram 1="); - assert(pos == 0); - assert(line.size() > 8); - const size_t num_unigrams = atoi(&line[8]); - getline(in, line); - assert(line.empty()); - getline(in, line); - assert(line == "\\1-grams:"); - for (size_t i = 0; i < num_unigrams; ++i) { - getline(in, line); - assert(line.size() > 0); - pos = line.find('\t'); - assert(pos > 0); - assert(pos + 1 < line.size()); - size_t cur = pos + 1; - vector<WordID> w; - while (cur < line.size()) { - const size_t len = UTF8Len(line[cur]); - w.push_back(TD::Convert(line.substr(cur, len))); - cur += len; - } - line[pos] = 0; - float p = atof(&line[0]); - probs_[w].logeq(p * log(10.0)); - } -} - |