summaryrefslogtreecommitdiff
path: root/gi/pf/unigrams.cc
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cab.ark.cs.cmu.edu>2012-10-02 00:19:43 -0400
committerChris Dyer <cdyer@cab.ark.cs.cmu.edu>2012-10-02 00:19:43 -0400
commite26434979adc33bd949566ba7bf02dff64e80a3e (patch)
treed1c72495e3af6301bd28e7e66c42de0c7a944d1f /gi/pf/unigrams.cc
parent0870d4a1f5e14cc7daf553b180d599f09f6614a2 (diff)
cdec cleanup, remove bayesian stuff, parsing stuff
Diffstat (limited to 'gi/pf/unigrams.cc')
-rw-r--r--gi/pf/unigrams.cc80
1 files changed, 0 insertions, 80 deletions
diff --git a/gi/pf/unigrams.cc b/gi/pf/unigrams.cc
deleted file mode 100644
index 40829775..00000000
--- a/gi/pf/unigrams.cc
+++ /dev/null
@@ -1,80 +0,0 @@
-#include "unigrams.h"
-
-#include <string>
-#include <cmath>
-
-#include "stringlib.h"
-#include "filelib.h"
-
-using namespace std;
-
-void UnigramModel::LoadUnigrams(const string& fname) {
- cerr << "Loading unigram probabilities from " << fname << " ..." << endl;
- ReadFile rf(fname);
- string line;
- istream& in = *rf.stream();
- assert(in);
- getline(in, line);
- assert(line.empty());
- getline(in, line);
- assert(line == "\\data\\");
- getline(in, line);
- size_t pos = line.find("ngram 1=");
- assert(pos == 0);
- assert(line.size() > 8);
- const size_t num_unigrams = atoi(&line[8]);
- getline(in, line);
- assert(line.empty());
- getline(in, line);
- assert(line == "\\1-grams:");
- for (size_t i = 0; i < num_unigrams; ++i) {
- getline(in, line);
- assert(line.size() > 0);
- pos = line.find('\t');
- assert(pos > 0);
- assert(pos + 1 < line.size());
- const WordID w = TD::Convert(line.substr(pos + 1));
- line[pos] = 0;
- float p = atof(&line[0]);
- if (w < probs_.size()) probs_[w].logeq(p * log(10)); else cerr << "WARNING: don't know about '" << TD::Convert(w) << "'\n";
- }
-}
-
-void UnigramWordModel::LoadUnigrams(const string& fname) {
- cerr << "Loading unigram probabilities from " << fname << " ..." << endl;
- ReadFile rf(fname);
- string line;
- istream& in = *rf.stream();
- assert(in);
- getline(in, line);
- assert(line.empty());
- getline(in, line);
- assert(line == "\\data\\");
- getline(in, line);
- size_t pos = line.find("ngram 1=");
- assert(pos == 0);
- assert(line.size() > 8);
- const size_t num_unigrams = atoi(&line[8]);
- getline(in, line);
- assert(line.empty());
- getline(in, line);
- assert(line == "\\1-grams:");
- for (size_t i = 0; i < num_unigrams; ++i) {
- getline(in, line);
- assert(line.size() > 0);
- pos = line.find('\t');
- assert(pos > 0);
- assert(pos + 1 < line.size());
- size_t cur = pos + 1;
- vector<WordID> w;
- while (cur < line.size()) {
- const size_t len = UTF8Len(line[cur]);
- w.push_back(TD::Convert(line.substr(cur, len)));
- cur += len;
- }
- line[pos] = 0;
- float p = atof(&line[0]);
- probs_[w].logeq(p * log(10.0));
- }
-}
-