diff options
Diffstat (limited to 'vest/scorer.cc')
| -rw-r--r-- | vest/scorer.cc | 85 | 
1 files changed, 61 insertions, 24 deletions
| diff --git a/vest/scorer.cc b/vest/scorer.cc index e8e9608a..d8628418 100644 --- a/vest/scorer.cc +++ b/vest/scorer.cc @@ -1,5 +1,8 @@  #include "scorer.h" +#define DEBUG_SCORER + +#include <boost/lexical_cast.hpp>  #include <map>  #include <sstream>  #include <iostream> @@ -121,9 +124,13 @@ class SERScore : public Score {    int correct, total;  }; +std::string SentenceScorer::verbose_desc() const { +  return desc+",ref0={ "+TD::GetString(refs[0])+" }"; +} +  class SERScorer : public SentenceScorer {   public: -  SERScorer(const vector<vector<WordID> >& references) : refs_(references) {} +  SERScorer(const vector<vector<WordID> >& references) : SentenceScorer("SERScorer",references),refs_(references) {}    Score* ScoreCCandidate(const vector<WordID>& /* hyp */) const {      Score* a = NULL;      return a; @@ -180,7 +187,7 @@ class BLEUScore : public Score {  class BLEUScorerBase : public SentenceScorer {   public:    BLEUScorerBase(const vector<vector<WordID> >& references, -             int n +                 int n               );    Score* ScoreCandidate(const vector<WordID>& hyp) const;    Score* ScoreCCandidate(const vector<WordID>& hyp) const; @@ -353,6 +360,33 @@ SentenceScorer* SentenceScorer::CreateSentenceScorer(const ScoreType type,    }  } +Score* SentenceScorer::GetOne() const { +  Sentence s; +  return ScoreCCandidate(s)->GetOne(); +} + +Score* SentenceScorer::GetZero() const { +  Sentence s; +  return ScoreCCandidate(s)->GetZero(); +} + +Score* Score::GetOne(ScoreType type) { +  std::vector<SentenceScorer::Sentence > refs; +  SentenceScorer *ps=SentenceScorer::CreateSentenceScorer(type,refs); +  Score *s=ps->GetOne(); +  delete ps; +  return s; +} + +Score* Score::GetZero(ScoreType type) { +  std::vector<SentenceScorer::Sentence > refs; +  SentenceScorer *ps=SentenceScorer::CreateSentenceScorer(type,refs); +  Score *s=ps->GetZero(); +  delete ps; +  return s; +} + +  Score* SentenceScorer::CreateScoreFromString(const ScoreType type, const string& in) {    switch (type) {      case IBM_BLEU: @@ -562,6 +596,7 @@ Score* BLEUScore::GetOne() const {    return new BLEUScore(hyp_ngram_counts.size(),1);  } +  void BLEUScore::Encode(string* out) const {    ostringstream os;    const int n = correct_ngram_hit_counts.size(); @@ -572,7 +607,7 @@ void BLEUScore::Encode(string* out) const {  }  BLEUScorerBase::BLEUScorerBase(const vector<vector<WordID> >& references, -                       int n) : n_(n) { +                               int n) : SentenceScorer("BLEU"+boost::lexical_cast<string>(n),references),n_(n) {    for (vector<vector<WordID> >::const_iterator ci = references.begin();         ci != references.end(); ++ci) {      lengths_.push_back(ci->size()); @@ -603,42 +638,40 @@ Score* BLEUScorerBase::ScoreCCandidate(const vector<WordID>& hyp) const {  DocScorer::~DocScorer() { -  for (int i=0; i < scorers_.size(); ++i) -    delete scorers_[i];  } -DocScorer::DocScorer( +void DocScorer::Init(        const ScoreType type,        const vector<string>& ref_files,        const string& src_file) { +  scorers_.clear();    // TODO stop using valarray, start using ReadFile    cerr << "Loading references (" << ref_files.size() << " files)\n"; -  shared_ptr<ReadFile> srcrf; +  ReadFile srcrf;    if (type == AER && src_file.size() > 0) {      cerr << "  (source=" << src_file << ")\n"; -    srcrf.reset(new ReadFile(src_file)); -  } -  valarray<ifstream> ifs(ref_files.size()); -  for (int i=0; i < ref_files.size(); ++i) { -     ifs[i].open(ref_files[i].c_str()); -     assert(ifs[i].good()); +    srcrf.Init(src_file);    } +  std::vector<ReadFile> ifs(ref_files.begin(),ref_files.end()); +  for (int i=0; i < ref_files.size(); ++i) ifs[i].Init(ref_files[i]);    char buf[64000];    bool expect_eof = false; -  while (!ifs[0].eof()) { +  int line=0; +  while (ifs[0].get()) {      vector<vector<WordID> > refs(ref_files.size());      for (int i=0; i < ref_files.size(); ++i) { -      if (ifs[i].eof()) break; -      ifs[i].getline(buf, 64000); +      istream &in=ifs[i].get(); +      if (in.eof()) break; +      in.getline(buf, 64000);        refs[i].clear();        if (strlen(buf) == 0) { -        if (ifs[i].eof()) { -	  if (!expect_eof) { -	    assert(i == 0); -	    expect_eof = true; -	  } +        if (in.eof()) { +          if (!expect_eof) { +            assert(i == 0); +            expect_eof = true; +          }            break; -	} +        }        } else {          TD::ConvertSentence(buf, &refs[i]);          assert(!refs[i].empty()); @@ -648,11 +681,15 @@ DocScorer::DocScorer(      if (!expect_eof) {        string src_line;        if (srcrf) { -        getline(*srcrf->stream(), src_line); +        getline(srcrf.get(), src_line);          map<string,string> dummy;          ProcessAndStripSGML(&src_line, &dummy);        } -      scorers_.push_back(SentenceScorer::CreateSentenceScorer(type, refs, src_line)); +      scorers_.push_back(ScorerP(SentenceScorer::CreateSentenceScorer(type, refs, src_line))); +#ifdef DEBUG_SCORER +      cerr<<"doc_scorer["<<line<<"] = "<<scorers_.back()->verbose_desc()<<endl; +#endif +      ++line;      }    }    cerr << "Loaded reference translations for " << scorers_.size() << " sentences.\n"; | 
