summaryrefslogtreecommitdiff
path: root/gi/pf/unigrams.cc
diff options
context:
space:
mode:
authorKenneth Heafield <github@kheafield.com>2012-10-22 12:07:20 +0100
committerKenneth Heafield <github@kheafield.com>2012-10-22 12:07:20 +0100
commit5f98fe5c4f2a2090eeb9d30c030305a70a8347d1 (patch)
tree9b6002f850e6dea1e3400c6b19bb31a9cdf3067f /gi/pf/unigrams.cc
parentcf9994131993b40be62e90e213b1e11e6b550143 (diff)
parent21825a09d97c2e0afd20512f306fb25fed55e529 (diff)
Merge remote branch 'upstream/master'
Conflicts: Jamroot bjam decoder/Jamfile decoder/cdec.cc dpmert/Jamfile jam-files/sanity.jam klm/lm/Jamfile klm/util/Jamfile mira/Jamfile
Diffstat (limited to 'gi/pf/unigrams.cc')
-rw-r--r--gi/pf/unigrams.cc80
1 files changed, 0 insertions, 80 deletions
diff --git a/gi/pf/unigrams.cc b/gi/pf/unigrams.cc
deleted file mode 100644
index 40829775..00000000
--- a/gi/pf/unigrams.cc
+++ /dev/null
@@ -1,80 +0,0 @@
-#include "unigrams.h"
-
-#include <string>
-#include <cmath>
-
-#include "stringlib.h"
-#include "filelib.h"
-
-using namespace std;
-
-void UnigramModel::LoadUnigrams(const string& fname) {
- cerr << "Loading unigram probabilities from " << fname << " ..." << endl;
- ReadFile rf(fname);
- string line;
- istream& in = *rf.stream();
- assert(in);
- getline(in, line);
- assert(line.empty());
- getline(in, line);
- assert(line == "\\data\\");
- getline(in, line);
- size_t pos = line.find("ngram 1=");
- assert(pos == 0);
- assert(line.size() > 8);
- const size_t num_unigrams = atoi(&line[8]);
- getline(in, line);
- assert(line.empty());
- getline(in, line);
- assert(line == "\\1-grams:");
- for (size_t i = 0; i < num_unigrams; ++i) {
- getline(in, line);
- assert(line.size() > 0);
- pos = line.find('\t');
- assert(pos > 0);
- assert(pos + 1 < line.size());
- const WordID w = TD::Convert(line.substr(pos + 1));
- line[pos] = 0;
- float p = atof(&line[0]);
- if (w < probs_.size()) probs_[w].logeq(p * log(10)); else cerr << "WARNING: don't know about '" << TD::Convert(w) << "'\n";
- }
-}
-
-void UnigramWordModel::LoadUnigrams(const string& fname) {
- cerr << "Loading unigram probabilities from " << fname << " ..." << endl;
- ReadFile rf(fname);
- string line;
- istream& in = *rf.stream();
- assert(in);
- getline(in, line);
- assert(line.empty());
- getline(in, line);
- assert(line == "\\data\\");
- getline(in, line);
- size_t pos = line.find("ngram 1=");
- assert(pos == 0);
- assert(line.size() > 8);
- const size_t num_unigrams = atoi(&line[8]);
- getline(in, line);
- assert(line.empty());
- getline(in, line);
- assert(line == "\\1-grams:");
- for (size_t i = 0; i < num_unigrams; ++i) {
- getline(in, line);
- assert(line.size() > 0);
- pos = line.find('\t');
- assert(pos > 0);
- assert(pos + 1 < line.size());
- size_t cur = pos + 1;
- vector<WordID> w;
- while (cur < line.size()) {
- const size_t len = UTF8Len(line[cur]);
- w.push_back(TD::Convert(line.substr(cur, len)));
- cur += len;
- }
- line[pos] = 0;
- float p = atof(&line[0]);
- probs_[w].logeq(p * log(10.0));
- }
-}
-