diff options
author | Patrick Simianer <p@simianer.de> | 2014-02-16 00:13:17 +0100 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2014-02-16 00:13:17 +0100 |
commit | ab71c44e61d00c788e84b44156d0be16191e267d (patch) | |
tree | 27d1c7e74e8b07276312766a5908853465a3ed18 /mteval/ns_docscorer.cc | |
parent | 4494c2cae3bed81f9d2d24d749e99bf66a734bc5 (diff) | |
parent | 9e2f7fcfa76213f5e41abb4f4c9a264ebe8f9d8c (diff) |
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'mteval/ns_docscorer.cc')
-rw-r--r-- | mteval/ns_docscorer.cc | 34 |
1 files changed, 34 insertions, 0 deletions
diff --git a/mteval/ns_docscorer.cc b/mteval/ns_docscorer.cc index 83bd1a29..242f134a 100644 --- a/mteval/ns_docscorer.cc +++ b/mteval/ns_docscorer.cc @@ -13,6 +13,40 @@ DocumentScorer::~DocumentScorer() {} DocumentScorer::DocumentScorer() {} +DocumentScorer::DocumentScorer(const EvaluationMetric* metric, + const string& src_ref_file) { + const WordID kDIV = TD::Convert("|||"); + assert(!src_ref_file.empty()); + cerr << "Loading source and references from " << src_ref_file << "...\n"; + ReadFile rf(src_ref_file); + istream& in = *rf.stream(); + unsigned lc = 0; + string src_ref; + vector<WordID> tmp; + vector<vector<WordID> > refs; + while(getline(in, src_ref)) { + ++lc; + size_t end_src = src_ref.find(" ||| "); + if (end_src == string::npos) { + cerr << "Expected SRC ||| REF [||| REF2 ||| REF3 ...] in line " << lc << endl; + abort(); + } + refs.clear(); + tmp.clear(); + TD::ConvertSentence(src_ref, &tmp, end_src + 5); + unsigned last = 0; + for (unsigned j = 0; j < tmp.size(); ++j) { + if (tmp[j] == kDIV) { + refs.push_back(vector<WordID>(tmp.begin() + last, tmp.begin() + j)); + last = j + 1; + } + } + refs.push_back(vector<WordID>(tmp.begin() + last, tmp.end())); + scorers_.push_back(metric->CreateSegmentEvaluator(refs)); + scorers_.back()->src = src_ref.substr(0, end_src); + } +} + void DocumentScorer::Init(const EvaluationMetric* metric, const vector<string>& ref_files, const string& src_file, |