summaryrefslogtreecommitdiff
path: root/gi/pf/base_measures.cc
diff options
context:
space:
mode:
Diffstat (limited to 'gi/pf/base_measures.cc')
-rw-r--r--gi/pf/base_measures.cc47
1 files changed, 47 insertions, 0 deletions
diff --git a/gi/pf/base_measures.cc b/gi/pf/base_measures.cc
index 97b4e698..7894d3e7 100644
--- a/gi/pf/base_measures.cc
+++ b/gi/pf/base_measures.cc
@@ -6,6 +6,53 @@
using namespace std;
+TableLookupBase::TableLookupBase(const string& fname) {
+ cerr << "TableLookupBase reading from " << fname << " ..." << endl;
+ ReadFile rf(fname);
+ istream& in = *rf.stream();
+ string line;
+ unsigned lc = 0;
+ const WordID kDIV = TD::Convert("|||");
+ vector<WordID> tmp;
+ vector<int> le, lf;
+ TRule x;
+ x.lhs_ = -TD::Convert("X");
+ bool flag = false;
+ while(getline(in, line)) {
+ ++lc;
+ if (lc % 1000000 == 0) { cerr << " [" << lc << ']' << endl; flag = false; }
+ else if (lc % 25000 == 0) { cerr << '.' << flush; flag = true; }
+ tmp.clear();
+ TD::ConvertSentence(line, &tmp);
+ x.f_.clear();
+ x.e_.clear();
+ size_t pos = 0;
+ int cc = 0;
+ while(pos < tmp.size()) {
+ const WordID cur = tmp[pos++];
+ if (cur == kDIV) {
+ ++cc;
+ } else if (cc == 0) {
+ x.f_.push_back(cur);
+ } else if (cc == 1) {
+ x.e_.push_back(cur);
+ } else if (cc == 2) {
+ table[x] = atof(TD::Convert(cur));
+ ++cc;
+ } else {
+ if (flag) cerr << endl;
+ cerr << "Bad format in " << lc << ": " << line << endl; abort();
+ }
+ }
+ if (cc != 3) {
+ if (flag) cerr << endl;
+ cerr << "Bad format in " << lc << ": " << line << endl; abort();
+ }
+ }
+ if (flag) cerr << endl;
+ cerr << " read " << lc << " entries\n";
+}
+
prob_t PhraseConditionalUninformativeUnigramBase::p0(const vector<WordID>& vsrc,
const vector<WordID>& vtrg,
int start_src, int start_trg) const {