major refactor, break bad circular deps

git-svn-id: https://ws10smt.googlecode.com/svn/trunk@509 ec762483-ff6d-05da-a07a-a48fb63a330f
author: redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> 2010-08-11 02:37:10 +0000
committer: redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> 2010-08-11 02:37:10 +0000
commit: a53461650fbdcd3cfe7543d28af9647ac3e5e47e (patch)
tree: e812756c733b34f9c16894265204acfa9f9998a9 /vest/scorer.cc
parent: 19b59489bb600f438ad96f04ec5d5c5b6616c9c2 (diff)
1 files changed, 0 insertions, 708 deletions
diff --git a/vest/scorer.cc b/vest/scorer.cc
deleted file mode 100644
index 70fdef34..00000000
--- a/vest/scorer.cc
+++ /dev/null
@@ -1,708 +0,0 @@
-#include "scorer.h"
-
-#include <boost/lexical_cast.hpp>
-#include <map>
-#include <sstream>
-#include <iostream>
-#include <fstream>
-#include <cstdio>
-#include <valarray>
-#include <algorithm>
-
-#include <boost/shared_ptr.hpp>
-
-#include "filelib.h"
-#include "aligner.h"
-#include "viterbi_envelope.h"
-#include "error_surface.h"
-#include "ter.h"
-#include "aer_scorer.h"
-#include "comb_scorer.h"
-#include "tdict.h"
-#include "stringlib.h"
-#include "lattice.h"
-
-
-using boost::shared_ptr;
-using namespace std;
-
-const bool minimize_segments = true;    // if adjacent segments have equal scores, merge them
-
-void Score::TimesEquals(float scale) {
-  cerr<<"UNIMPLEMENTED except for BLEU (for MIRA): Score::TimesEquals"<<endl;abort();
-}
-
-ScoreType ScoreTypeFromString(const string& st) {
-  const string sl = LowercaseString(st);
-  if (sl == "ser")
-    return SER;
-  if (sl == "ter")
-    return TER;
-  if (sl == "aer")
-    return AER;
-  if (sl == "bleu" || sl == "ibm_bleu")
-    return IBM_BLEU;
-  if (sl == "ibm_bleu_3")
-    return IBM_BLEU_3;
-  if (sl == "nist_bleu")
-    return NIST_BLEU;
-  if (sl == "koehn_bleu")
-    return Koehn_BLEU;
-  if (sl == "combi")
-    return BLEU_minus_TER_over_2;
-  cerr << "Don't understand score type '" << st << "', defaulting to ibm_bleu.\n";
-  return IBM_BLEU;
-}
-
-static char const* score_names[]={
-  "IBM_BLEU", "NIST_BLEU", "Koehn_BLEU", "TER", "BLEU_minus_TER_over_2", "SER", "AER", "IBM_BLEU_3"
-};
-
-std::string StringFromScoreType(ScoreType st) {
-  assert(st>=0 && st<sizeof(score_names)/sizeof(score_names[0]));
-  return score_names[(int)st];
-}
-
-
-Score::~Score() {}
-SentenceScorer::~SentenceScorer() {}
-
-struct length_accum {
-  template <class S>
-  float operator()(float sum,S const& ref) const {
-    return sum+ref.size();
-  }
-};
-
-template <class S>
-float avg_reflength(vector<S> refs) {
-  unsigned n=refs.size();
-  return n?accumulate(refs.begin(),refs.end(),0.,length_accum())/n:0.;
-}
-
-
-float SentenceScorer::ComputeRefLength(const Sentence &hyp) const {
-  return hyp.size(); // reasonable default? :)
-}
-
-const std::string* SentenceScorer::GetSource() const { return NULL; }
-
-class SERScore : public ScoreBase<SERScore> {
-  friend class SERScorer;
- public:
-  SERScore() : correct(0), total(0) {}
-  float ComputePartialScore() const { return 0.0;}
-  float ComputeScore() const {
-    return static_cast<float>(correct) / static_cast<float>(total);
-  }
-  void ScoreDetails(string* details) const {
-    ostringstream os;
-    os << "SER= " << ComputeScore() << " (" << correct << '/' << total << ')';
-    *details = os.str();
-  }
-  void PlusPartialEquals(const Score& /* delta */, int /* oracle_e_cover */, int /* oracle_f_cover */, int /* src_len */){}
-
-  void PlusEquals(const Score& delta, const float scale) {
-    correct += scale*static_cast<const SERScore&>(delta).correct;
-    total += scale*static_cast<const SERScore&>(delta).total;
-  }
-  void PlusEquals(const Score& delta) {
-    correct += static_cast<const SERScore&>(delta).correct;
-    total += static_cast<const SERScore&>(delta).total;
-    }
-  ScoreP GetZero() const { return ScoreP(new SERScore); }
-  ScoreP GetOne() const { return ScoreP(new SERScore); }
-  void Subtract(const Score& rhs, Score* res) const {
-    SERScore* r = static_cast<SERScore*>(res);
-    r->correct = correct - static_cast<const SERScore&>(rhs).correct;
-    r->total = total - static_cast<const SERScore&>(rhs).total;
-  }
-  void Encode(string* out) const {
-    assert(!"not implemented");
-  }
-  bool IsAdditiveIdentity() const {
-    return (total == 0 && correct == 0);  // correct is always 0 <= n <= total
-  }
- private:
-  int correct, total;
-};
-
-std::string SentenceScorer::verbose_desc() const {
-  return desc+",ref0={ "+TD::GetString(refs[0])+" }";
-}
-
-class SERScorer : public SentenceScorer {
- public:
-  SERScorer(const vector<vector<WordID> >& references) : SentenceScorer("SERScorer",references),refs_(references) {}
-  ScoreP ScoreCCandidate(const vector<WordID>& /* hyp */) const {
-    return ScoreP();
-  }
-  ScoreP ScoreCandidate(const vector<WordID>& hyp) const {
-    SERScore* res = new SERScore;
-    res->total = 1;
-    for (int i = 0; i < refs_.size(); ++i)
-      if (refs_[i] == hyp) res->correct = 1;
-    return ScoreP(res);
-  }
-  static ScoreP ScoreFromString(const string& data) {
-    assert(!"Not implemented");
-  }
- private:
-  vector<vector<WordID> > refs_;
-};
-
-class BLEUScore : public ScoreBase<BLEUScore> {
-  friend class BLEUScorerBase;
- public:
-  BLEUScore(int n) : correct_ngram_hit_counts(float(0),n), hyp_ngram_counts(float(0),n) {
-    ref_len = 0;
-    hyp_len = 0; }
-  BLEUScore(int n, int k) :  correct_ngram_hit_counts(float(k),n), hyp_ngram_counts(float(k),n) {
-    ref_len = k;
-    hyp_len = k; }
-  float ComputeScore() const;
-  float ComputePartialScore() const;
-  void ScoreDetails(string* details) const;
-  void TimesEquals(float scale);
-  void PlusEquals(const Score& delta);
-  void PlusEquals(const Score& delta, const float scale);
-  void PlusPartialEquals(const Score& delta, int oracle_e_cover, int oracle_f_cover, int src_len);
-  ScoreP GetZero() const;
-  ScoreP GetOne() const;
-  void Subtract(const Score& rhs, Score* res) const;
-  void Encode(string* out) const;
-  bool IsAdditiveIdentity() const {
-    if (fabs(ref_len) > 0.1f || hyp_len != 0) return false;
-    for (int i = 0; i < correct_ngram_hit_counts.size(); ++i)
-      if (hyp_ngram_counts[i] != 0 ||
-        correct_ngram_hit_counts[i] != 0) return false;
-    return true;
-  }
- private:
-  int N() const {
-    return hyp_ngram_counts.size();
-  }
-  float ComputeScore(vector<float>* precs, float* bp) const;
-  float ComputePartialScore(vector<float>* prec, float* bp) const;
-  valarray<float> correct_ngram_hit_counts;
-  valarray<float> hyp_ngram_counts;
-  float ref_len;
-  float hyp_len;
-};
-
-class BLEUScorerBase : public SentenceScorer {
- public:
-  BLEUScorerBase(const vector<vector<WordID> >& references,
-                 int n
-             );
-  ScoreP ScoreCandidate(const vector<WordID>& hyp) const;
-  ScoreP ScoreCCandidate(const vector<WordID>& hyp) const;
-  static ScoreP ScoreFromString(const string& in);
-
-  virtual float ComputeRefLength(const vector<WordID>& hyp) const = 0;
- private:
-  struct NGramCompare {
-    int operator() (const vector<WordID>& a, const vector<WordID>& b) {
-      size_t as = a.size();
-      size_t bs = b.size();
-      const size_t s = (as < bs ? as : bs);
-      for (size_t i = 0; i < s; ++i) {
-         int d = a[i] - b[i];
-         if (d < 0) return true;
-	 if (d > 0) return false;
-      }
-      return as < bs;
-    }
-  };
-  typedef map<vector<WordID>, pair<int,int>, NGramCompare> NGramCountMap;
-  void CountRef(const vector<WordID>& ref) {
-    NGramCountMap tc;
-    vector<WordID> ngram(n_);
-    int s = ref.size();
-    for (int j=0; j<s; ++j) {
-      int remaining = s-j;
-      int k = (n_ < remaining ? n_ : remaining);
-      ngram.clear();
-      for (int i=1; i<=k; ++i) {
-	ngram.push_back(ref[j + i - 1]);
-        tc[ngram].first++;
-      }
-    }
-    for (NGramCountMap::iterator i = tc.begin(); i != tc.end(); ++i) {
-      pair<int,int>& p = ngrams_[i->first];
-      if (p.first < i->second.first)
-        p = i->second;
-    }
-  }
-
-  void ComputeNgramStats(const vector<WordID>& sent,
-			 valarray<float>* correct,
-			 valarray<float>* hyp,
-			 bool clip_counts)
-    const {
-    assert(correct->size() == n_);
-    assert(hyp->size() == n_);
-    vector<WordID> ngram(n_);
-    (*correct) *= 0;
-    (*hyp) *= 0;
-    int s = sent.size();
-    for (int j=0; j<s; ++j) {
-      int remaining = s-j;
-      int k = (n_ < remaining ? n_ : remaining);
-      ngram.clear();
-      for (int i=1; i<=k; ++i) {
-	ngram.push_back(sent[j + i - 1]);
-        pair<int,int>& p = ngrams_[ngram];
-	if(clip_counts){
-	  if (p.second < p.first) {
-	    ++p.second;
-	    (*correct)[i-1]++;
-	  }}
-	else {
-	  ++p.second;
-	  (*correct)[i-1]++;
-	}
-	// if the 1 gram isn't found, don't try to match don't need to match any 2- 3- .. grams:
-	if (!p.first) {
-	  for (; i<=k; ++i)
-	    (*hyp)[i-1]++;
-	} else {
-          (*hyp)[i-1]++;
-        }
-      }
-    }
-  }
-
-  mutable NGramCountMap ngrams_;
-  int n_;
-  vector<int> lengths_;
-};
-
-ScoreP BLEUScorerBase::ScoreFromString(const string& in) {
-  istringstream is(in);
-  int n;
-  is >> n;
-  BLEUScore* r = new BLEUScore(n);
-  is >> r->ref_len >> r->hyp_len;
-
-  for (int i = 0; i < n; ++i) {
-    is >> r->correct_ngram_hit_counts[i];
-    is >> r->hyp_ngram_counts[i];
-  }
-  return ScoreP(r);
-}
-
-class IBM_BLEUScorer : public BLEUScorerBase {
- public:
-    IBM_BLEUScorer(const vector<vector<WordID> >& references,
-		   int n=4) : BLEUScorerBase(references, n), lengths_(references.size()) {
-   for (int i=0; i < references.size(); ++i)
-     lengths_[i] = references[i].size();
- }
-  float ComputeRefLength(const vector<WordID>& hyp) const {
-    if (lengths_.size() == 1) return lengths_[0];
-    int bestd = 2000000;
-    int hl = hyp.size();
-    int bl = -1;
-    for (vector<int>::const_iterator ci = lengths_.begin(); ci != lengths_.end(); ++ci) {
-      int cl = *ci;
-      if (abs(cl - hl) < bestd) {
-        bestd = abs(cl - hl);
-        bl = cl;
-      }
-    }
-    return bl;
-  }
- private:
-  vector<int> lengths_;
-};
-
-class NIST_BLEUScorer : public BLEUScorerBase {
- public:
-    NIST_BLEUScorer(const vector<vector<WordID> >& references,
-                    int n=4) : BLEUScorerBase(references, n),
-		    shortest_(references[0].size()) {
-   for (int i=1; i < references.size(); ++i)
-     if (references[i].size() < shortest_)
-       shortest_ = references[i].size();
- }
-  float ComputeRefLength(const vector<WordID>& /* hyp */) const {
-    return shortest_;
-  }
- private:
-  float shortest_;
-};
-
-class Koehn_BLEUScorer : public BLEUScorerBase {
- public:
-    Koehn_BLEUScorer(const vector<vector<WordID> >& references,
-                     int n=4) : BLEUScorerBase(references, n),
-                     avg_(0) {
-   for (int i=0; i < references.size(); ++i)
-     avg_ += references[i].size();
-   avg_ /= references.size();
- }
-  float ComputeRefLength(const vector<WordID>& /* hyp */) const {
-    return avg_;
-  }
- private:
-  float avg_;
-};
-
-ScorerP SentenceScorer::CreateSentenceScorer(const ScoreType type,
-      const vector<vector<WordID> >& refs,
-      const string& src)
-{
-  SentenceScorer *r=0;
-  switch (type) {
-  case IBM_BLEU: r = new IBM_BLEUScorer(refs, 4);break;
-  case IBM_BLEU_3 : r = new IBM_BLEUScorer(refs,3);break;
-    case NIST_BLEU: r = new NIST_BLEUScorer(refs, 4);break;
-    case Koehn_BLEU: r = new Koehn_BLEUScorer(refs, 4);break;
-    case AER: r = new AERScorer(refs, src);break;
-    case TER: r = new TERScorer(refs);break;
-    case SER: r = new SERScorer(refs);break;
-    case BLEU_minus_TER_over_2: r = new BLEUTERCombinationScorer(refs);break;
-    default:
-      assert(!"Not implemented!");
-  }
-  return ScorerP(r);
-}
-
-ScoreP SentenceScorer::GetOne() const {
-  Sentence s;
-  return ScoreCCandidate(s)->GetOne();
-}
-
-ScoreP SentenceScorer::GetZero() const {
-  Sentence s;
-  return ScoreCCandidate(s)->GetZero();
-}
-
-ScoreP Score::GetOne(ScoreType type) {
-  std::vector<SentenceScorer::Sentence > refs;
-  return SentenceScorer::CreateSentenceScorer(type,refs)->GetOne();
-}
-
-ScoreP Score::GetZero(ScoreType type) {
-  std::vector<SentenceScorer::Sentence > refs;
-  return SentenceScorer::CreateSentenceScorer(type,refs)->GetZero();
-}
-
-
-ScoreP SentenceScorer::CreateScoreFromString(const ScoreType type, const string& in) {
-  switch (type) {
-    case IBM_BLEU:
-  case IBM_BLEU_3:
-    case NIST_BLEU:
-    case Koehn_BLEU:
-      return BLEUScorerBase::ScoreFromString(in);
-    case TER:
-      return TERScorer::ScoreFromString(in);
-    case AER:
-      return AERScorer::ScoreFromString(in);
-    case SER:
-      return SERScorer::ScoreFromString(in);
-    case BLEU_minus_TER_over_2:
-      return BLEUTERCombinationScorer::ScoreFromString(in);
-    default:
-      assert(!"Not implemented!");
-  }
-}
-
-void SentenceScorer::ComputeErrorSurface(const ViterbiEnvelope& ve, ErrorSurface* env, const ScoreType type, const Hypergraph& hg) const {
-  vector<WordID> prev_trans;
-  const vector<shared_ptr<Segment> >& ienv = ve.GetSortedSegs();
-  env->resize(ienv.size());
-  ScoreP prev_score;
-  int j = 0;
-  for (int i = 0; i < ienv.size(); ++i) {
-    const Segment& seg = *ienv[i];
-    vector<WordID> trans;
-    if (type == AER) {
-      vector<bool> edges(hg.edges_.size(), false);
-      seg.CollectEdgesUsed(&edges);  // get the set of edges in the viterbi
-                                     // alignment
-      ostringstream os;
-      const string* psrc = this->GetSource();
-      if (psrc == NULL) {
-        cerr << "AER scoring in VEST requires source, but it is missing!\n";
-        abort();
-      }
-      size_t pos = psrc->rfind(" ||| ");
-      if (pos == string::npos) {
-        cerr << "Malformed source for AER: expected |||\nINPUT: " << *psrc << endl;
-        abort();
-      }
-      Lattice src;
-      Lattice ref;
-      LatticeTools::ConvertTextOrPLF(psrc->substr(0, pos), &src);
-      LatticeTools::ConvertTextOrPLF(psrc->substr(pos + 5), &ref);
-      AlignerTools::WriteAlignment(src, ref, hg, &os, true, &edges);
-      string tstr = os.str();
-      TD::ConvertSentence(tstr.substr(tstr.rfind(" ||| ") + 5), &trans);
-    } else {
-      seg.ConstructTranslation(&trans);
-    }
-    // cerr << "Scoring: " << TD::GetString(trans) << endl;
-    if (trans == prev_trans) {
-      if (!minimize_segments) {
-        assert(prev_score); // if this fails, it means
-	                    // the decoder can generate null translations
-        ErrorSegment& out = (*env)[j];
-        out.delta = prev_score->GetZero();
-        out.x = seg.x;
-	++j;
-      }
-      // cerr << "Identical translation, skipping scoring\n";
-    } else {
-      ScoreP score = ScoreCandidate(trans);
-      // cerr << "score= " << score->ComputeScore() << "\n";
-      ScoreP cur_delta_p = score->GetZero();
-      Score* cur_delta = cur_delta_p.get();
-      // just record the score diffs
-      if (!prev_score)
-        prev_score = score->GetZero();
-
-      score->Subtract(*prev_score, cur_delta);
-      prev_trans.swap(trans);
-      prev_score = score;
-      if ((!minimize_segments) || (!cur_delta->IsAdditiveIdentity())) {
-        ErrorSegment& out = (*env)[j];
-        out.delta = cur_delta_p;
-        out.x = seg.x;
-        ++j;
-      }
-    }
-  }
-  // cerr << " In segments: " << ienv.size() << endl;
-  // cerr << "Out segments: " << j << endl;
-  assert(j > 0);
-  env->resize(j);
-}
-
-void BLEUScore::ScoreDetails(string* details) const {
-  char buf[2000];
-  vector<float> precs(max(N(),4));
-  float bp;
-  float bleu = ComputeScore(&precs, &bp);
-  for (int i=N();i<4;++i)
-    precs[i]=0.;
-  char *bufn;
-  bufn=buf+sprintf(buf, "BLEU = %.2f, %.1f|%.1f|%.1f|%.1f (brev=%.3f)",
-       bleu*100.0,
-       precs[0]*100.0,
-       precs[1]*100.0,
-       precs[2]*100.0,
-       precs[3]*100.0,
-       bp);
-  *details = buf;
-}
-
-float BLEUScore::ComputeScore(vector<float>* precs, float* bp) const {
-  float log_bleu = 0;
-  if (precs) precs->clear();
-  int count = 0;
-  for (int i = 0; i < N(); ++i) {
-    if (hyp_ngram_counts[i] > 0) {
-      float lprec = log(correct_ngram_hit_counts[i]) - log(hyp_ngram_counts[i]);
-      if (precs) precs->push_back(exp(lprec));
-      log_bleu += lprec;
-      ++count;
-    }
-  }
-  log_bleu /= static_cast<float>(count);
-  float lbp = 0.0;
-  if (hyp_len < ref_len)
-    lbp = (hyp_len - ref_len) / hyp_len;
-  log_bleu += lbp;
-  if (bp) *bp = exp(lbp);
-  return exp(log_bleu);
-}
-
-
-//comptue scaled score for oracle retrieval
-float BLEUScore::ComputePartialScore(vector<float>* precs, float* bp) const {
-  // cerr << "Then here " << endl;
-  float log_bleu = 0;
-  if (precs) precs->clear();
-  int count = 0;
-  for (int i = 0; i < N(); ++i) {
-    //  cerr << "In CPS " << hyp_ngram_counts[i] << " " << correct_ngram_hit_counts[i] << endl;
-    if (hyp_ngram_counts[i] > 0) {
-      float lprec = log(correct_ngram_hit_counts[i]) - log(hyp_ngram_counts[i]);
-      if (precs) precs->push_back(exp(lprec));
-      log_bleu += lprec;
-      ++count;
-    }
-  }
-  log_bleu /= static_cast<float>(count);
-  float lbp = 0.0;
-  if (hyp_len < ref_len)
-    lbp = (hyp_len - ref_len) / hyp_len;
-  log_bleu += lbp;
-  if (bp) *bp = exp(lbp);
-  return exp(log_bleu);
-}
-
-float BLEUScore::ComputePartialScore() const {
-  // cerr << "In here first " << endl;
-  return ComputePartialScore(NULL, NULL);
-}
-
-float BLEUScore::ComputeScore() const {
-  return ComputeScore(NULL, NULL);
-}
-
-void BLEUScore::Subtract(const Score& rhs, Score* res) const {
-  const BLEUScore& d = static_cast<const BLEUScore&>(rhs);
-  BLEUScore* o = static_cast<BLEUScore*>(res);
-  o->ref_len = ref_len - d.ref_len;
-  o->hyp_len = hyp_len - d.hyp_len;
-  o->correct_ngram_hit_counts = correct_ngram_hit_counts - d.correct_ngram_hit_counts;
-  o->hyp_ngram_counts = hyp_ngram_counts - d.hyp_ngram_counts;
-}
-
-void BLEUScore::PlusEquals(const Score& delta) {
-  const BLEUScore& d = static_cast<const BLEUScore&>(delta);
-  correct_ngram_hit_counts += d.correct_ngram_hit_counts;
-  hyp_ngram_counts += d.hyp_ngram_counts;
-  ref_len += d.ref_len;
-  hyp_len += d.hyp_len;
-}
-
-void BLEUScore::TimesEquals(float scale) {
-  correct_ngram_hit_counts *= scale;
-  hyp_ngram_counts *= scale;
-  ref_len *= scale;
-  hyp_len *= scale;
-}
-
-void BLEUScore::PlusEquals(const Score& delta, const float scale) {
-  const BLEUScore& d = static_cast<const BLEUScore&>(delta);
-  correct_ngram_hit_counts = correct_ngram_hit_counts + (d.correct_ngram_hit_counts * scale);
-  hyp_ngram_counts = hyp_ngram_counts + (d.hyp_ngram_counts * scale);
-  ref_len = ref_len + (d.ref_len * scale);
-  hyp_len = hyp_len + (d.hyp_len * scale);
-}
-
-void BLEUScore::PlusPartialEquals(const Score& delta, int oracle_e_cover, int oracle_f_cover, int src_len){
-  const BLEUScore& d = static_cast<const BLEUScore&>(delta);
-  correct_ngram_hit_counts += d.correct_ngram_hit_counts;
-  hyp_ngram_counts += d.hyp_ngram_counts;
-  //scale the reference length according to the size of the input sentence covered by this rule
-
-  ref_len *= (float)oracle_f_cover / src_len;
-  ref_len += d.ref_len;
-
-  hyp_len = oracle_e_cover;
-  hyp_len += d.hyp_len;
-}
-
-
-ScoreP BLEUScore::GetZero() const {
-  return ScoreP(new BLEUScore(N()));
-}
-
-ScoreP BLEUScore::GetOne() const {
-  return ScoreP(new BLEUScore(N(),1));
-}
-
-
-void BLEUScore::Encode(string* out) const {
-  ostringstream os;
-  const int n = correct_ngram_hit_counts.size();
-  os << n << ' ' << ref_len << ' ' << hyp_len;
-  for (int i = 0; i < n; ++i)
-    os << ' ' << correct_ngram_hit_counts[i] << ' ' << hyp_ngram_counts[i];
-  *out = os.str();
-}
-
-BLEUScorerBase::BLEUScorerBase(const vector<vector<WordID> >& references,
-                               int n) : SentenceScorer("BLEU"+boost::lexical_cast<string>(n),references),n_(n) {
-  for (vector<vector<WordID> >::const_iterator ci = references.begin();
-       ci != references.end(); ++ci) {
-    lengths_.push_back(ci->size());
-    CountRef(*ci);
-  }
-}
-
-ScoreP BLEUScorerBase::ScoreCandidate(const vector<WordID>& hyp) const {
-  BLEUScore* bs = new BLEUScore(n_);
-  for (NGramCountMap::iterator i=ngrams_.begin(); i != ngrams_.end(); ++i)
-    i->second.second = 0;
-  ComputeNgramStats(hyp, &bs->correct_ngram_hit_counts, &bs->hyp_ngram_counts, true);
-  bs->ref_len = ComputeRefLength(hyp);
-  bs->hyp_len = hyp.size();
-  return ScoreP(bs);
-}
-
-ScoreP BLEUScorerBase::ScoreCCandidate(const vector<WordID>& hyp) const {
-  BLEUScore* bs = new BLEUScore(n_);
-  for (NGramCountMap::iterator i=ngrams_.begin(); i != ngrams_.end(); ++i)
-    i->second.second = 0;
-  bool clip = false;
-  ComputeNgramStats(hyp, &bs->correct_ngram_hit_counts, &bs->hyp_ngram_counts,clip);
-  bs->ref_len = ComputeRefLength(hyp);
-  bs->hyp_len = hyp.size();
-  return ScoreP(bs);
-}
-
-
-DocScorer::~DocScorer() {
-}
-
-void DocScorer::Init(
-      const ScoreType type,
-      const vector<string>& ref_files,
-      const string& src_file, bool verbose) {
-  scorers_.clear();
-  // TODO stop using valarray, start using ReadFile
-  cerr << "Loading references (" << ref_files.size() << " files)\n";
-  ReadFile srcrf;
-  if (type == AER && src_file.size() > 0) {
-    cerr << "  (source=" << src_file << ")\n";
-    srcrf.Init(src_file);
-  }
-  std::vector<ReadFile> ifs(ref_files.begin(),ref_files.end());
-  for (int i=0; i < ref_files.size(); ++i) ifs[i].Init(ref_files[i]);
-  char buf[64000];
-  bool expect_eof = false;
-  int line=0;
-  while (ifs[0].get()) {
-    vector<vector<WordID> > refs(ref_files.size());
-    for (int i=0; i < ref_files.size(); ++i) {
-      istream &in=ifs[i].get();
-      if (in.eof()) break;
-      in.getline(buf, 64000);
-      refs[i].clear();
-      if (strlen(buf) == 0) {
-        if (in.eof()) {
-          if (!expect_eof) {
-            assert(i == 0);
-            expect_eof = true;
-          }
-          break;
-        }
-      } else {
-        TD::ConvertSentence(buf, &refs[i]);
-        assert(!refs[i].empty());
-      }
-      assert(!expect_eof);
-    }
-    if (!expect_eof) {
-      string src_line;
-      if (srcrf) {
-        getline(srcrf.get(), src_line);
-        map<string,string> dummy;
-        ProcessAndStripSGML(&src_line, &dummy);
-      }
-      scorers_.push_back(ScorerP(SentenceScorer::CreateSentenceScorer(type, refs, src_line)));
-      if (verbose)
-        cerr<<"doc_scorer["<<line<<"] = "<<scorers_.back()->verbose_desc()<<endl;
-      ++line;
-    }
-  }
-  cerr << "Loaded reference translations for " << scorers_.size() << " sentences.\n";
-}
-
author	redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>	2010-08-11 02:37:10 +0000
committer	redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>	2010-08-11 02:37:10 +0000
commit	a53461650fbdcd3cfe7543d28af9647ac3e5e47e (patch)
tree	e812756c733b34f9c16894265204acfa9f9998a9 /vest/scorer.cc
parent	19b59489bb600f438ad96f04ec5d5c5b6616c9c2 (diff)