cdec cleanup, remove bayesian stuff, parsing stuff

author: Chris Dyer <cdyer@cab.ark.cs.cmu.edu> 2012-10-02 00:19:43 -0400
committer: Chris Dyer <cdyer@cab.ark.cs.cmu.edu> 2012-10-02 00:19:43 -0400
commit: e26434979adc33bd949566ba7bf02dff64e80a3e (patch)
tree: d1c72495e3af6301bd28e7e66c42de0c7a944d1f /gi/pf/unigrams.cc
parent: 0870d4a1f5e14cc7daf553b180d599f09f6614a2 (diff)
1 files changed, 0 insertions, 80 deletions
diff --git a/gi/pf/unigrams.cc b/gi/pf/unigrams.cc
deleted file mode 100644
index 40829775..00000000
--- a/gi/pf/unigrams.cc
+++ /dev/null
@@ -1,80 +0,0 @@
-#include "unigrams.h"
-
-#include <string>
-#include <cmath>
-
-#include "stringlib.h"
-#include "filelib.h"
-
-using namespace std;
-
-void UnigramModel::LoadUnigrams(const string& fname) {
-  cerr << "Loading unigram probabilities from " << fname << " ..." << endl;
-  ReadFile rf(fname);
-  string line;
-  istream& in = *rf.stream();
-  assert(in);
-  getline(in, line);
-  assert(line.empty());
-  getline(in, line);
-  assert(line == "\\data\\");
-  getline(in, line);
-  size_t pos = line.find("ngram 1=");
-  assert(pos == 0);
-  assert(line.size() > 8);
-  const size_t num_unigrams = atoi(&line[8]);
-  getline(in, line);
-  assert(line.empty());
-  getline(in, line);
-  assert(line == "\\1-grams:");
-  for (size_t i = 0; i < num_unigrams; ++i) {
-    getline(in, line);
-    assert(line.size() > 0);
-    pos = line.find('\t');
-    assert(pos > 0);
-    assert(pos + 1 < line.size());
-    const WordID w = TD::Convert(line.substr(pos + 1));
-    line[pos] = 0;
-    float p = atof(&line[0]);
-    if (w < probs_.size()) probs_[w].logeq(p * log(10)); else cerr << "WARNING: don't know about '" << TD::Convert(w) << "'\n";
-  }
-}
-
-void UnigramWordModel::LoadUnigrams(const string& fname) {
-  cerr << "Loading unigram probabilities from " << fname << " ..." << endl;
-  ReadFile rf(fname);
-  string line;
-  istream& in = *rf.stream();
-  assert(in);
-  getline(in, line);
-  assert(line.empty());
-  getline(in, line);
-  assert(line == "\\data\\");
-  getline(in, line);
-  size_t pos = line.find("ngram 1=");
-  assert(pos == 0);
-  assert(line.size() > 8);
-  const size_t num_unigrams = atoi(&line[8]);
-  getline(in, line);
-  assert(line.empty());
-  getline(in, line);
-  assert(line == "\\1-grams:");
-  for (size_t i = 0; i < num_unigrams; ++i) {
-    getline(in, line);
-    assert(line.size() > 0);
-    pos = line.find('\t');
-    assert(pos > 0);
-    assert(pos + 1 < line.size());
-    size_t cur = pos + 1;
-    vector<WordID> w;
-    while (cur < line.size()) {
-      const size_t len = UTF8Len(line[cur]);
-      w.push_back(TD::Convert(line.substr(cur, len)));
-      cur += len;
-    }
-    line[pos] = 0;
-    float p = atof(&line[0]);
-    probs_[w].logeq(p * log(10.0));
-  }
-}
-
author	Chris Dyer <cdyer@cab.ark.cs.cmu.edu>	2012-10-02 00:19:43 -0400
committer	Chris Dyer <cdyer@cab.ark.cs.cmu.edu>	2012-10-02 00:19:43 -0400
commit	e26434979adc33bd949566ba7bf02dff64e80a3e (patch)
tree	d1c72495e3af6301bd28e7e66c42de0c7a944d1f /gi/pf/unigrams.cc
parent	0870d4a1f5e14cc7daf553b180d599f09f6614a2 (diff)