diff options
author | Chris Dyer <cdyer@cs.cmu.edu> | 2012-01-23 15:47:29 -0500 |
---|---|---|
committer | Chris Dyer <cdyer@cs.cmu.edu> | 2012-01-23 15:47:29 -0500 |
commit | 26d9ad04bd81508163d75c99726f970dd75f5127 (patch) | |
tree | 9fc49e4cbe8bdabbdd57ee616dc90094f114f255 /gi/pf/base_measures.cc | |
parent | f0bdd4de6455855d705d9056deb2e90c999dc740 (diff) |
more alignment stuff
Diffstat (limited to 'gi/pf/base_measures.cc')
-rw-r--r-- | gi/pf/base_measures.cc | 47 |
1 files changed, 47 insertions, 0 deletions
diff --git a/gi/pf/base_measures.cc b/gi/pf/base_measures.cc index 97b4e698..7894d3e7 100644 --- a/gi/pf/base_measures.cc +++ b/gi/pf/base_measures.cc @@ -6,6 +6,53 @@ using namespace std; +TableLookupBase::TableLookupBase(const string& fname) { + cerr << "TableLookupBase reading from " << fname << " ..." << endl; + ReadFile rf(fname); + istream& in = *rf.stream(); + string line; + unsigned lc = 0; + const WordID kDIV = TD::Convert("|||"); + vector<WordID> tmp; + vector<int> le, lf; + TRule x; + x.lhs_ = -TD::Convert("X"); + bool flag = false; + while(getline(in, line)) { + ++lc; + if (lc % 1000000 == 0) { cerr << " [" << lc << ']' << endl; flag = false; } + else if (lc % 25000 == 0) { cerr << '.' << flush; flag = true; } + tmp.clear(); + TD::ConvertSentence(line, &tmp); + x.f_.clear(); + x.e_.clear(); + size_t pos = 0; + int cc = 0; + while(pos < tmp.size()) { + const WordID cur = tmp[pos++]; + if (cur == kDIV) { + ++cc; + } else if (cc == 0) { + x.f_.push_back(cur); + } else if (cc == 1) { + x.e_.push_back(cur); + } else if (cc == 2) { + table[x] = atof(TD::Convert(cur)); + ++cc; + } else { + if (flag) cerr << endl; + cerr << "Bad format in " << lc << ": " << line << endl; abort(); + } + } + if (cc != 3) { + if (flag) cerr << endl; + cerr << "Bad format in " << lc << ": " << line << endl; abort(); + } + } + if (flag) cerr << endl; + cerr << " read " << lc << " entries\n"; +} + prob_t PhraseConditionalUninformativeUnigramBase::p0(const vector<WordID>& vsrc, const vector<WordID>& vtrg, int start_src, int start_trg) const { |