diff options
| author | Patrick Simianer <p@simianer.de> | 2012-03-13 09:24:47 +0100 | 
|---|---|---|
| committer | Patrick Simianer <p@simianer.de> | 2012-03-13 09:24:47 +0100 | 
| commit | ef6085e558e26c8819f1735425761103021b6470 (patch) | |
| tree | 5cf70e4c48c64d838e1326b5a505c8c4061bff4a /mteval | |
| parent | 10a232656a0c882b3b955d2bcfac138ce11e8a2e (diff) | |
| parent | dfbc278c1057555fda9312291c8024049e00b7d8 (diff) | |
merge with upstream
Diffstat (limited to 'mteval')
| -rw-r--r-- | mteval/Makefile.am | 2 | ||||
| -rw-r--r-- | mteval/fast_score.cc | 40 | ||||
| -rw-r--r-- | mteval/mbr_kbest.cc | 24 | ||||
| -rw-r--r-- | mteval/ns.cc | 290 | ||||
| -rw-r--r-- | mteval/ns.h | 115 | ||||
| -rw-r--r-- | mteval/ns_comb.cc | 87 | ||||
| -rw-r--r-- | mteval/ns_comb.h | 19 | ||||
| -rw-r--r-- | mteval/ns_docscorer.cc | 60 | ||||
| -rw-r--r-- | mteval/ns_docscorer.h | 31 | ||||
| -rw-r--r-- | mteval/ns_ext.cc | 130 | ||||
| -rw-r--r-- | mteval/ns_ext.h | 21 | ||||
| -rw-r--r-- | mteval/ns_ter.cc | 492 | ||||
| -rw-r--r-- | mteval/ns_ter.h | 21 | ||||
| -rw-r--r-- | mteval/scorer_test.cc | 48 | 
14 files changed, 1352 insertions, 28 deletions
| diff --git a/mteval/Makefile.am b/mteval/Makefile.am index 95845090..e7126675 100644 --- a/mteval/Makefile.am +++ b/mteval/Makefile.am @@ -10,7 +10,7 @@ endif  noinst_LIBRARIES = libmteval.a -libmteval_a_SOURCES = ter.cc comb_scorer.cc aer_scorer.cc scorer.cc external_scorer.cc +libmteval_a_SOURCES = ter.cc comb_scorer.cc aer_scorer.cc scorer.cc external_scorer.cc ns.cc ns_ter.cc ns_ext.cc ns_comb.cc ns_docscorer.cc  fast_score_SOURCES = fast_score.cc  fast_score_LDADD = libmteval.a $(top_srcdir)/utils/libutils.a -lz diff --git a/mteval/fast_score.cc b/mteval/fast_score.cc index 5ee264a6..a271ccc5 100644 --- a/mteval/fast_score.cc +++ b/mteval/fast_score.cc @@ -4,9 +4,11 @@  #include <boost/program_options.hpp>  #include <boost/program_options/variables_map.hpp> +#include "stringlib.h"  #include "filelib.h"  #include "tdict.h" -#include "scorer.h" +#include "ns.h" +#include "ns_docscorer.h"  using namespace std;  namespace po = boost::program_options; @@ -14,8 +16,8 @@ namespace po = boost::program_options;  void InitCommandLine(int argc, char** argv, po::variables_map* conf) {    po::options_description opts("Configuration options");    opts.add_options() -        ("reference,r",po::value<vector<string> >(), "[REQD] Reference translation(s) (tokenized text file)") -        ("loss_function,l",po::value<string>()->default_value("ibm_bleu"), "Scoring metric (ibm_bleu, nist_bleu, koehn_bleu, ter, combi)") +        ("reference,r",po::value<vector<string> >(), "[1 or more required] Reference translation(s) in tokenized text files") +        ("evaluation_metric,m",po::value<string>()->default_value("IBM_BLEU"), "Evaluation metric (ibm_bleu, koehn_bleu, nist_bleu, ter, meteor, etc.)")          ("in_file,i", po::value<string>()->default_value("-"), "Input file")          ("help,h", "Help");    po::options_description dcmdline_options; @@ -35,24 +37,29 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) {  int main(int argc, char** argv) {    po::variables_map conf;    InitCommandLine(argc, argv, &conf); -  const string loss_function = conf["loss_function"].as<string>(); -  ScoreType type = ScoreTypeFromString(loss_function); -  DocScorer ds(type, conf["reference"].as<vector<string> >(), ""); +  string loss_function = UppercaseString(conf["evaluation_metric"].as<string>()); +  if (loss_function == "COMBI") { +    cerr << "WARNING: 'combi' metric is no longer supported, switching to 'COMB:TER=-0.5;IBM_BLEU=0.5'\n"; +    loss_function = "COMB:TER=-0.5;IBM_BLEU=0.5"; +  } else if (loss_function == "BLEU") { +    cerr << "WARNING: 'BLEU' is ambiguous, assuming 'IBM_BLEU'\n"; +    loss_function = "IBM_BLEU"; +  } +  EvaluationMetric* metric = EvaluationMetric::Instance(loss_function); +  DocumentScorer ds(metric, conf["reference"].as<vector<string> >());    cerr << "Loaded " << ds.size() << " references for scoring with " << loss_function << endl;    ReadFile rf(conf["in_file"].as<string>()); -  ScoreP acc; +  SufficientStats acc;    istream& in = *rf.stream();    int lc = 0; -  while(in) { -    string line; -    getline(in, line); -    if (line.empty() && !in) break; +  string line; +  while(getline(in, line)) {      vector<WordID> sent;      TD::ConvertSentence(line, &sent); -    ScoreP sentscore = ds[lc]->ScoreCandidate(sent); -    if (!acc) { acc = sentscore->GetZero(); } -    acc->PlusEquals(*sentscore); +    SufficientStats t; +    ds[lc]->Evaluate(sent, &t); +    acc += t;      ++lc;    }    assert(lc > 0); @@ -63,9 +70,8 @@ int main(int argc, char** argv) {    if (lc != ds.size())      cerr << "Fewer sentences in hyp (" << lc << ") than refs ("           << ds.size() << "): scoring partial set!\n"; -  float score = acc->ComputeScore(); -  string details; -  acc->ScoreDetails(&details); +  float score = metric->ComputeScore(acc); +  const string details = metric->DetailedScore(acc);    cerr << details << endl;    cout << score << endl;    return 0; diff --git a/mteval/mbr_kbest.cc b/mteval/mbr_kbest.cc index 64a6a8bf..2bd31566 100644 --- a/mteval/mbr_kbest.cc +++ b/mteval/mbr_kbest.cc @@ -5,7 +5,7 @@  #include "prob.h"  #include "tdict.h" -#include "scorer.h" +#include "ns.h"  #include "filelib.h"  #include "stringlib.h" @@ -17,7 +17,7 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) {    po::options_description opts("Configuration options");    opts.add_options()          ("scale,a",po::value<double>()->default_value(1.0), "Posterior scaling factor (alpha)") -        ("loss_function,l",po::value<string>()->default_value("bleu"), "Loss function") +        ("evaluation_metric,m",po::value<string>()->default_value("ibm_bleu"), "Evaluation metric")          ("input,i",po::value<string>()->default_value("-"), "File to read k-best lists from")          ("output_list,L", "Show reranked list as output")          ("help,h", "Help"); @@ -75,13 +75,15 @@ bool ReadKBestList(istream* in, string* sent_id, vector<pair<vector<WordID>, pro  int main(int argc, char** argv) {    po::variables_map conf;    InitCommandLine(argc, argv, &conf); -  const string metric = conf["loss_function"].as<string>(); +  const string smetric = conf["evaluation_metric"].as<string>(); +  EvaluationMetric* metric = EvaluationMetric::Instance(smetric); + +  const bool is_loss = (UppercaseString(smetric) == "TER");    const bool output_list = conf.count("output_list") > 0;    const string file = conf["input"].as<string>();    const double mbr_scale = conf["scale"].as<double>();    cerr << "Posterior scaling factor (alpha) = " << mbr_scale << endl; -  ScoreType type = ScoreTypeFromString(metric);    vector<pair<vector<WordID>, prob_t> > list;    ReadFile rf(file);    string sent_id; @@ -99,15 +101,17 @@ int main(int argc, char** argv) {      vector<double> mbr_scores(output_list ? list.size() : 0);      double mbr_loss = numeric_limits<double>::max();      for (int i = 0 ; i < list.size(); ++i) { -      vector<vector<WordID> > refs(1, list[i].first); -      //cerr << i << ": " << list[i].second <<"\t" << TD::GetString(list[i].first) << endl; -      ScorerP scorer = SentenceScorer::CreateSentenceScorer(type, refs); +      const vector<vector<WordID> > refs(1, list[i].first); +      boost::shared_ptr<SegmentEvaluator> segeval = metric-> +          CreateSegmentEvaluator(refs); +        double wl_acc = 0;        for (int j = 0; j < list.size(); ++j) {          if (i != j) { -          ScoreP s = scorer->ScoreCandidate(list[j].first); -          double loss = 1.0 - s->ComputeScore(); -          if (type == TER || type == AER) loss = 1.0 - loss; +          SufficientStats ss; +          segeval->Evaluate(list[j].first, &ss); +          double loss = 1.0 - metric->ComputeScore(ss); +          if (is_loss) loss = 1.0 - loss;            double weighted_loss = loss * (joints[j] / marginal).as_float();            wl_acc += weighted_loss;            if ((!output_list) && wl_acc > mbr_loss) break; diff --git a/mteval/ns.cc b/mteval/ns.cc new file mode 100644 index 00000000..788f809a --- /dev/null +++ b/mteval/ns.cc @@ -0,0 +1,290 @@ +#include "ns.h" +#include "ns_ter.h" +#include "ns_ext.h" +#include "ns_comb.h" + +#include <cstdio> +#include <cassert> +#include <cmath> +#include <cstdlib> +#include <iostream> +#include <sstream> + +#include "tdict.h" +#include "stringlib.h" + +using namespace std; +using boost::shared_ptr; + +map<string, EvaluationMetric*> EvaluationMetric::instances_; + +SegmentEvaluator::~SegmentEvaluator() {} +EvaluationMetric::~EvaluationMetric() {} + +bool EvaluationMetric::IsErrorMetric() const { +  return false; +} + +struct DefaultSegmentEvaluator : public SegmentEvaluator { +  DefaultSegmentEvaluator(const vector<vector<WordID> >& refs, const EvaluationMetric* em) : refs_(refs), em_(em) {} +  void Evaluate(const vector<WordID>& hyp, SufficientStats* out) const { +    em_->ComputeSufficientStatistics(hyp, refs_, out); +    out->id_ = em_->MetricId(); +  } +  const vector<vector<WordID> > refs_; +  const EvaluationMetric* em_; +}; + +shared_ptr<SegmentEvaluator> EvaluationMetric::CreateSegmentEvaluator(const vector<vector<WordID> >& refs) const { +  return shared_ptr<SegmentEvaluator>(new DefaultSegmentEvaluator(refs, this)); +} + +#define MAX_SS_VECTOR_SIZE 50 +unsigned EvaluationMetric::SufficientStatisticsVectorSize() const { +  return MAX_SS_VECTOR_SIZE; +} + +void EvaluationMetric::ComputeSufficientStatistics(const vector<WordID>&, +                                                   const vector<vector<WordID> >&, +                                                   SufficientStats*) const { +  cerr << "Base class ComputeSufficientStatistics should not be called.\n"; +  abort(); +} + +string EvaluationMetric::DetailedScore(const SufficientStats& stats) const { +  ostringstream os; +  os << MetricId() << "=" << ComputeScore(stats); +  return os.str(); +} + +enum BleuType { IBM, Koehn, NIST }; +template <unsigned int N = 4u, BleuType BrevityType = IBM> +struct BleuSegmentEvaluator : public SegmentEvaluator { +  BleuSegmentEvaluator(const vector<vector<WordID> >& refs, const EvaluationMetric* em) : evaluation_metric(em) { +    assert(refs.size() > 0); +    float tot = 0; +    int smallest = 9999999; +    for (vector<vector<WordID> >::const_iterator ci = refs.begin(); +         ci != refs.end(); ++ci) { +      lengths_.push_back(ci->size()); +      tot += lengths_.back(); +      if (lengths_.back() < smallest) smallest = lengths_.back(); +      CountRef(*ci); +    } +    if (BrevityType == Koehn) +      lengths_[0] = tot / refs.size(); +    if (BrevityType == NIST) +      lengths_[0] = smallest; +  } + +  void Evaluate(const vector<WordID>& hyp, SufficientStats* out) const { +    out->fields.resize(N + N + 2); +    out->id_ = evaluation_metric->MetricId(); +    for (unsigned i = 0; i < N+N+2; ++i) out->fields[i] = 0; + +    ComputeNgramStats(hyp, &out->fields[0], &out->fields[N], true); +    float& hyp_len = out->fields[2*N]; +    float& ref_len = out->fields[2*N + 1]; +    hyp_len = hyp.size(); +    ref_len = lengths_[0]; +    if (lengths_.size() > 1 && BrevityType == IBM) { +      float bestd = 2000000; +      float hl = hyp.size(); +      float bl = -1; +      for (vector<float>::const_iterator ci = lengths_.begin(); ci != lengths_.end(); ++ci) { +        if (fabs(*ci - hl) < bestd) { +          bestd = fabs(*ci - hl); +          bl = *ci; +        } +      } +      ref_len = bl; +    } +  } + +  struct NGramCompare { +    int operator() (const vector<WordID>& a, const vector<WordID>& b) { +      const size_t as = a.size(); +      const size_t bs = b.size(); +      const size_t s = (as < bs ? as : bs); +      for (size_t i = 0; i < s; ++i) { +         int d = a[i] - b[i]; +         if (d < 0) return true; +         if (d > 0) return false; +      } +      return as < bs; +    } +  }; +  typedef map<vector<WordID>, pair<int,int>, NGramCompare> NGramCountMap; + +  void CountRef(const vector<WordID>& ref) { +    NGramCountMap tc; +    vector<WordID> ngram(N); +    int s = ref.size(); +    for (int j=0; j<s; ++j) { +      int remaining = s-j; +      int k = (N < remaining ? N : remaining); +      ngram.clear(); +      for (int i=1; i<=k; ++i) { +        ngram.push_back(ref[j + i - 1]); +        tc[ngram].first++; +      } +    } +    for (typename NGramCountMap::iterator i = tc.begin(); i != tc.end(); ++i) { +      pair<int,int>& p = ngrams_[i->first]; +      if (p.first < i->second.first) +        p = i->second; +    } +  } + +  void ComputeNgramStats(const vector<WordID>& sent, +                         float* correct,  // N elements reserved +                         float* hyp,      // N elements reserved +                         bool clip_counts = true) const { +    // clear clipping stats +    for (typename NGramCountMap::iterator it = ngrams_.begin(); it != ngrams_.end(); ++it) +      it->second.second = 0; + +    vector<WordID> ngram(N); +    *correct *= 0; +    *hyp *= 0; +    int s = sent.size(); +    for (int j=0; j<s; ++j) { +      int remaining = s-j; +      int k = (N < remaining ? N : remaining); +      ngram.clear(); +      for (int i=1; i<=k; ++i) { +        ngram.push_back(sent[j + i - 1]); +        pair<int,int>& p = ngrams_[ngram]; +        if(clip_counts){ +          if (p.second < p.first) { +            ++p.second; +            correct[i-1]++; +          } +        } else { +          ++p.second; +          correct[i-1]++; +        } +        // if the 1 gram isn't found, don't try to match don't need to match any 2- 3- .. grams: +        if (!p.first) { +          for (; i<=k; ++i) +            hyp[i-1]++; +        } else { +          hyp[i-1]++; +        } +      } +    } +  } + +  const EvaluationMetric* evaluation_metric; +  vector<float> lengths_; +  mutable NGramCountMap ngrams_; +}; + +template <unsigned int N = 4u, BleuType BrevityType = IBM> +struct BleuMetric : public EvaluationMetric { +  BleuMetric() : EvaluationMetric(BrevityType == IBM ? "IBM_BLEU" : (BrevityType == Koehn ? "KOEHN_BLEU" : "NIST_BLEU")) {} +  unsigned SufficientStatisticsVectorSize() const { return N*2 + 2; } +  shared_ptr<SegmentEvaluator> CreateSegmentEvaluator(const vector<vector<WordID> >& refs) const { +    return shared_ptr<SegmentEvaluator>(new BleuSegmentEvaluator<N,BrevityType>(refs, this)); +  } +  float ComputeBreakdown(const SufficientStats& stats, float* bp, vector<float>* out) const { +    if (out) { out->clear(); } +    float log_bleu = 0; +    int count = 0; +    for (int i = 0; i < N; ++i) { +      if (stats.fields[i+N] > 0) { +        float cor_count = stats.fields[i];  // correct_ngram_hit_counts[i]; +        // smooth bleu +        if (!cor_count) { cor_count = 0.01; } +        float lprec = log(cor_count) - log(stats.fields[i+N]); // log(hyp_ngram_counts[i]); +        if (out) out->push_back(exp(lprec)); +        log_bleu += lprec; +        ++count; +      } +    } +    log_bleu /= count; +    float lbp = 0.0; +    const float& hyp_len = stats.fields[2*N]; +    const float& ref_len = stats.fields[2*N + 1]; +    if (hyp_len < ref_len) +      lbp = (hyp_len - ref_len) / hyp_len; +    log_bleu += lbp; +    if (bp) *bp = exp(lbp); +    return exp(log_bleu); +  } +  string DetailedScore(const SufficientStats& stats) const { +    char buf[2000]; +    vector<float> precs(N); +    float bp; +    float bleu = ComputeBreakdown(stats, &bp, &precs); +    sprintf(buf, "%s = %.2f, %.1f|%.1f|%.1f|%.1f (brev=%.3f)", +       MetricId().c_str(), +       bleu*100.0, +       precs[0]*100.0, +       precs[1]*100.0, +       precs[2]*100.0, +       precs[3]*100.0, +       bp); +    return buf; +  } +  float ComputeScore(const SufficientStats& stats) const { +    return ComputeBreakdown(stats, NULL, NULL); +  } +}; + +EvaluationMetric* EvaluationMetric::Instance(const string& imetric_id) { +  static bool is_first = true; +  if (is_first) { +    instances_["NULL"] = NULL; +    is_first = false; +  } +  const string metric_id = UppercaseString(imetric_id); + +  map<string, EvaluationMetric*>::iterator it = instances_.find(metric_id); +  if (it == instances_.end()) { +    EvaluationMetric* m = NULL;  +    if        (metric_id == "IBM_BLEU") { +      m = new BleuMetric<4, IBM>; +    } else if (metric_id == "NIST_BLEU") { +      m = new BleuMetric<4, NIST>; +    } else if (metric_id == "KOEHN_BLEU") { +      m = new BleuMetric<4, Koehn>; +    } else if (metric_id == "TER") { +      m = new TERMetric; +    } else if (metric_id == "METEOR") { +      m = new ExternalMetric("METEOR", "java -Xmx1536m -jar /Users/cdyer/software/meteor/meteor-1.3.jar - - -mira -lower -t tune -l en"); +    } else if (metric_id.find("COMB:") == 0) { +      m = new CombinationMetric(metric_id); +    } else { +      cerr << "Implement please: " << metric_id << endl; +      abort(); +    } +    if (m->MetricId() != metric_id) { +      cerr << "Registry error: " << metric_id << " vs. " << m->MetricId() << endl; +      abort(); +    } +    return instances_[metric_id] = m; +  } else { +    return it->second; +  } +} + +SufficientStats::SufficientStats(const string& encoded) { +  istringstream is(encoded); +  is >> id_; +  float val; +  while(is >> val) +    fields.push_back(val); +} + +void SufficientStats::Encode(string* out) const { +  ostringstream os; +  if (id_.size() > 0) +    os << id_; +  else +    os << "NULL"; +  for (unsigned i = 0; i < fields.size(); ++i) +    os << ' ' << fields[i]; +  *out = os.str(); +} + diff --git a/mteval/ns.h b/mteval/ns.h new file mode 100644 index 00000000..4e4c6975 --- /dev/null +++ b/mteval/ns.h @@ -0,0 +1,115 @@ +#ifndef _NS_H_ +#define _NS_H_ + +#include <string> +#include <vector> +#include <map> +#include <boost/shared_ptr.hpp> +#include "wordid.h" +#include <iostream> + +class SufficientStats { + public: +  SufficientStats() : id_() {} +  explicit SufficientStats(const std::string& encoded); +  SufficientStats(const std::string& mid, const std::vector<float>& f) : +    id_(mid), fields(f) {} + +  SufficientStats& operator+=(const SufficientStats& delta) { +    if (id_.empty() && delta.id_.size()) id_ = delta.id_; +    if (fields.size() != delta.fields.size()) +      fields.resize(std::max(fields.size(), delta.fields.size())); +    for (unsigned i = 0; i < delta.fields.size(); ++i) +      fields[i] += delta.fields[i]; +    return *this; +  } +  SufficientStats& operator-=(const SufficientStats& delta) { +    if (id_.empty() && delta.id_.size()) id_ = delta.id_; +    if (fields.size() != delta.fields.size()) +      fields.resize(std::max(fields.size(), delta.fields.size())); +    for (unsigned i = 0; i < delta.fields.size(); ++i) +      fields[i] -= delta.fields[i]; +    return *this; +  } +  SufficientStats& operator*=(const double& scalar) { +    for (unsigned i = 0; i < fields.size(); ++i) +      fields[i] *= scalar; +    return *this; +  } +  SufficientStats& operator/=(const double& scalar) { +    for (unsigned i = 0; i < fields.size(); ++i) +      fields[i] /= scalar; +    return *this; +  } +  bool operator==(const SufficientStats& other) const { +    return other.fields == fields; +  } +  bool IsAdditiveIdentity() const { +    for (unsigned i = 0; i < fields.size(); ++i) +      if (fields[i]) return false; +    return true; +  } +  size_t size() const { return fields.size(); } +  float operator[](size_t i) const { +    if (i < fields.size()) return fields[i]; +    return 0; +  } +  void Encode(std::string* out) const; + +  std::string id_; +  std::vector<float> fields; +}; + +inline const SufficientStats operator+(const SufficientStats& a, const SufficientStats& b) { +  SufficientStats res(a); +  return res += b; +} + +inline const SufficientStats operator-(const SufficientStats& a, const SufficientStats& b) { +  SufficientStats res(a); +  return res -= b; +} + +struct SegmentEvaluator { +  virtual ~SegmentEvaluator(); +  virtual void Evaluate(const std::vector<WordID>& hyp, SufficientStats* out) const = 0; +}; + +// Instructions for implementing a new metric +//   To Instance(), add something that creates the metric +//   Implement ComputeScore(const SufficientStats& stats) const; +//   Implement ONE of the following: +//      1) void ComputeSufficientStatistics(const std::vector<std::vector<WordID> >& refs, SufficientStats* out) const; +//      2) a new SegmentEvaluator class AND CreateSegmentEvaluator(const std::vector<std::vector<WordID> >& refs) const; +//    [The later (#2) is only used when it is necessary to precompute per-segment data from a set of refs] +//   OPTIONAL: Override SufficientStatisticsVectorSize() if it is easy to do so +class EvaluationMetric { + public: +  static EvaluationMetric* Instance(const std::string& metric_id = "IBM_BLEU"); + + protected: +  EvaluationMetric(const std::string& id) : name_(id) {} +  virtual ~EvaluationMetric(); + + public: +  const std::string& MetricId() const { return name_; } + +  // returns true for metrics like WER and TER where lower scores are better +  // false for metrics like BLEU and METEOR where higher scores are better +  virtual bool IsErrorMetric() const; + +  virtual unsigned SufficientStatisticsVectorSize() const; +  virtual float ComputeScore(const SufficientStats& stats) const = 0; +  virtual std::string DetailedScore(const SufficientStats& stats) const; +  virtual boost::shared_ptr<SegmentEvaluator> CreateSegmentEvaluator(const std::vector<std::vector<WordID> >& refs) const; +  virtual void ComputeSufficientStatistics(const std::vector<WordID>& hyp, +                                           const std::vector<std::vector<WordID> >& refs, +                                           SufficientStats* out) const; + + private: +  static std::map<std::string, EvaluationMetric*> instances_; +  const std::string name_; +}; + +#endif + diff --git a/mteval/ns_comb.cc b/mteval/ns_comb.cc new file mode 100644 index 00000000..41c634cd --- /dev/null +++ b/mteval/ns_comb.cc @@ -0,0 +1,87 @@ +#include "ns_comb.h" + +#include <iostream> + +#include "stringlib.h" + +using namespace std; + +// e.g. COMB:IBM_BLEU=0.5;TER=0.5 +CombinationMetric::CombinationMetric(const std::string& cmd) : +    EvaluationMetric(cmd), +    total_size() { +  if (cmd.find("COMB:") != 0 || cmd.size() < 9) { +    cerr << "Error in combination metric specifier: " << cmd << endl; +    exit(1); +  } +  string mix = cmd.substr(5); +  vector<string> comps; +  Tokenize(cmd.substr(5), ';', &comps); +  if(comps.size() < 2) { +    cerr << "Error in combination metric specifier: " << cmd << endl; +    exit(1); +  } +  vector<string> cwpairs; +  for (unsigned i = 0; i < comps.size(); ++i) { +    Tokenize(comps[i], '=', &cwpairs); +    if (cwpairs.size() != 2) { cerr << "Error in combination metric specifier: " << cmd << endl; exit(1); } +    metrics.push_back(EvaluationMetric::Instance(cwpairs[0])); +    coeffs.push_back(atof(cwpairs[1].c_str())); +    offsets.push_back(total_size); +    total_size += metrics.back()->SufficientStatisticsVectorSize(); +    cerr << (i > 0 ? " + " : "( ") << coeffs.back() << " * " << cwpairs[0]; +  } +  cerr << " )\n"; +} + +struct CombinationSegmentEvaluator : public SegmentEvaluator { +  CombinationSegmentEvaluator(const string& id, +                              const vector<vector<WordID> >& refs, +                              const vector<EvaluationMetric*>& metrics, +                              const vector<unsigned>& offsets, +                              const unsigned ts) : id_(id), offsets_(offsets), total_size_(ts), component_evaluators_(metrics.size()) { +    for (unsigned i = 0; i < metrics.size(); ++i) +      component_evaluators_[i] = metrics[i]->CreateSegmentEvaluator(refs); +  } +  virtual void Evaluate(const std::vector<WordID>& hyp, SufficientStats* out) const { +    out->id_ = id_; +    out->fields.resize(total_size_); +    for (unsigned i = 0; i < component_evaluators_.size(); ++i) { +      SufficientStats t; +      component_evaluators_[i]->Evaluate(hyp, &t); +      for (unsigned j = 0; j < t.fields.size(); ++j) { +        unsigned op = j + offsets_[i]; +        assert(op < out->fields.size()); +        out->fields[op] = t[j]; +      } +    } +  } +  const string& id_; +  const vector<unsigned>& offsets_; +  const unsigned total_size_; +  vector<boost::shared_ptr<SegmentEvaluator> > component_evaluators_; +}; + +boost::shared_ptr<SegmentEvaluator> CombinationMetric::CreateSegmentEvaluator(const std::vector<std::vector<WordID> >& refs) const { +  boost::shared_ptr<SegmentEvaluator> res; +  res.reset(new CombinationSegmentEvaluator(MetricId(), refs, metrics, offsets, total_size)); +  return res; +} + +float CombinationMetric::ComputeScore(const SufficientStats& stats) const { +  float tot = 0; +  for (unsigned i = 0; i < metrics.size(); ++i) { +    SufficientStats t; +    unsigned next = total_size; +    if (i + 1 < offsets.size()) next = offsets[i+1]; +    for (unsigned j = offsets[i]; j < next; ++j) +      t.fields.push_back(stats[j]); +    tot += metrics[i]->ComputeScore(t) * coeffs[i]; +  } +  return tot; +} + +unsigned CombinationMetric::SufficientStatisticsVectorSize() const { +  return total_size; +} + diff --git a/mteval/ns_comb.h b/mteval/ns_comb.h new file mode 100644 index 00000000..140e7e6a --- /dev/null +++ b/mteval/ns_comb.h @@ -0,0 +1,19 @@ +#ifndef _NS_COMB_H_ +#define _NS_COMB_H_ + +#include "ns.h" + +class CombinationMetric : public EvaluationMetric { + public: +  CombinationMetric(const std::string& cmd); +  virtual boost::shared_ptr<SegmentEvaluator> CreateSegmentEvaluator(const std::vector<std::vector<WordID> >& refs) const; +  virtual float ComputeScore(const SufficientStats& stats) const; +  virtual unsigned SufficientStatisticsVectorSize() const; + private: +  std::vector<EvaluationMetric*> metrics; +  std::vector<float> coeffs; +  std::vector<unsigned> offsets; +  unsigned total_size; +}; + +#endif diff --git a/mteval/ns_docscorer.cc b/mteval/ns_docscorer.cc new file mode 100644 index 00000000..28a2fd09 --- /dev/null +++ b/mteval/ns_docscorer.cc @@ -0,0 +1,60 @@ +#include "ns_docscorer.h" + +#include <iostream> +#include <cstring> + +#include "tdict.h" +#include "filelib.h" +#include "ns.h" + +using namespace std; + +DocumentScorer::~DocumentScorer() {} + +void DocumentScorer::Init(const EvaluationMetric* metric, +            const vector<string>& ref_files, +            const string& src_file, +            bool verbose) { +  scorers_.clear(); +  cerr << "Loading references (" << ref_files.size() << " files)\n"; +  assert(src_file.empty()); +  std::vector<ReadFile> ifs(ref_files.begin(),ref_files.end()); +  for (int i=0; i < ref_files.size(); ++i) ifs[i].Init(ref_files[i]); +  char buf[64000]; +  bool expect_eof = false; +  int line=0; +  while (ifs[0].get()) { +    vector<vector<WordID> > refs(ref_files.size()); +    for (int i=0; i < ref_files.size(); ++i) { +      istream &in=ifs[i].get(); +      if (in.eof()) break; +      in.getline(buf, 64000); +      refs[i].clear(); +      if (strlen(buf) == 0) { +        if (in.eof()) { +          if (!expect_eof) { +            assert(i == 0); +            expect_eof = true; +          } +          break; +        } +      } else { +        TD::ConvertSentence(buf, &refs[i]); +        assert(!refs[i].empty()); +      } +      assert(!expect_eof); +    } +    if (!expect_eof) { +      string src_line; +      //if (srcrf) { +      //  getline(srcrf.get(), src_line); +      //  map<string,string> dummy; +      //  ProcessAndStripSGML(&src_line, &dummy); +      //} +      scorers_.push_back(metric->CreateSegmentEvaluator(refs)); +      ++line; +    } +  } +  cerr << "Loaded reference translations for " << scorers_.size() << " sentences.\n"; +} + diff --git a/mteval/ns_docscorer.h b/mteval/ns_docscorer.h new file mode 100644 index 00000000..170ac627 --- /dev/null +++ b/mteval/ns_docscorer.h @@ -0,0 +1,31 @@ +#ifndef _NS_DOC_SCORER_H_ +#define _NS_DOC_SCORER_H_ + +#include <vector> +#include <string> +#include <boost/shared_ptr.hpp> + +struct EvaluationMetric; +struct SegmentEvaluator; +class DocumentScorer { + public: +  ~DocumentScorer(); +  DocumentScorer() {  } +  DocumentScorer(const EvaluationMetric* metric, +                 const std::vector<std::string>& ref_files, +                 const std::string& src_file = "", +                 bool verbose=false) { +    Init(metric,ref_files,src_file,verbose); +  } +  void Init(const EvaluationMetric* metric, +            const std::vector<std::string>& ref_files, +            const std::string& src_file = "", +            bool verbose=false); + +  int size() const { return scorers_.size(); } +  const SegmentEvaluator* operator[](size_t i) const { return scorers_[i].get(); } + private: +  std::vector<boost::shared_ptr<SegmentEvaluator> > scorers_; +}; + +#endif diff --git a/mteval/ns_ext.cc b/mteval/ns_ext.cc new file mode 100644 index 00000000..956708af --- /dev/null +++ b/mteval/ns_ext.cc @@ -0,0 +1,130 @@ +#include "ns_ext.h" + +#include <cstdio> // popen +#include <cstdlib> +#include <cstring> +#include <unistd.h> +#include <sstream> +#include <iostream> +#include <cassert> + +#include "stringlib.h" +#include "tdict.h" + +using namespace std; + +struct NScoreServer { +  NScoreServer(const std::string& cmd); +  ~NScoreServer(); + +  float ComputeScore(const std::vector<float>& fields); +  void Evaluate(const std::vector<std::vector<WordID> >& refs, const std::vector<WordID>& hyp, std::vector<float>* fields); + + private: +  void RequestResponse(const std::string& request, std::string* response); +  int p2c[2]; +  int c2p[2]; +}; + +NScoreServer::NScoreServer(const string& cmd) { +  cerr << "Invoking " << cmd << " ..." << endl; +  if (pipe(p2c) < 0) { perror("pipe"); exit(1); } +  if (pipe(c2p) < 0) { perror("pipe"); exit(1); } +  pid_t cpid = fork(); +  if (cpid < 0) { perror("fork"); exit(1); } +  if (cpid == 0) {  // child +    close(p2c[1]); +    close(c2p[0]); +    dup2(p2c[0], 0); +    close(p2c[0]); +    dup2(c2p[1], 1); +    close(c2p[1]); +    cerr << "Exec'ing from child " << cmd << endl; +    vector<string> vargs; +    SplitOnWhitespace(cmd, &vargs); +    const char** cargv = static_cast<const char**>(malloc(sizeof(const char*) * vargs.size())); +    for (unsigned i = 1; i < vargs.size(); ++i) cargv[i-1] = vargs[i].c_str(); +    cargv[vargs.size() - 1] = NULL; +    execvp(vargs[0].c_str(), (char* const*)cargv); +  } else { // parent +    close(c2p[1]); +    close(p2c[0]); +  } +  string dummy; +  RequestResponse("SCORE ||| Reference initialization string . ||| Testing initialization string .", &dummy); +  assert(dummy.size() > 0); +  cerr << "Connection established.\n"; +} + +NScoreServer::~NScoreServer() { +  // TODO close stuff, join stuff +} + +float NScoreServer::ComputeScore(const vector<float>& fields) { +  ostringstream os; +  os << "EVAL |||"; +  for (unsigned i = 0; i < fields.size(); ++i) +    os << ' ' << fields[i]; +  string sres; +  RequestResponse(os.str(), &sres); +  return strtod(sres.c_str(), NULL); +} + +void NScoreServer::Evaluate(const vector<vector<WordID> >& refs, const vector<WordID>& hyp, vector<float>* fields) { +  ostringstream os; +  os << "SCORE"; +  for (unsigned i = 0; i < refs.size(); ++i) { +    os << " |||"; +    for (unsigned j = 0; j < refs[i].size(); ++j) { +      os << ' ' << TD::Convert(refs[i][j]); +    } +  } +  os << " |||"; +  for (unsigned i = 0; i < hyp.size(); ++i) { +    os << ' ' << TD::Convert(hyp[i]); +  } +  string sres; +  RequestResponse(os.str(), &sres); +  istringstream is(sres); +  float val; +  fields->clear(); +  while(is >> val) +    fields->push_back(val); +} + +#define MAX_BUF 16000 + +void NScoreServer::RequestResponse(const string& request, string* response) { +//  cerr << "@SERVER: " << request << endl; +  string x = request + "\n"; +  write(p2c[1], x.c_str(), x.size()); +  char buf[MAX_BUF]; +  size_t n = read(c2p[0], buf, MAX_BUF); +  while (n < MAX_BUF && buf[n-1] != '\n') +    n += read(c2p[0], &buf[n], MAX_BUF - n); + +  buf[n-1] = 0; +  if (n < 2) { +    cerr << "Malformed response: " << buf << endl; +  } +  *response = Trim(buf, " \t\n"); +//  cerr << "@RESPONSE: '" << *response << "'\n"; +} + +void ExternalMetric::ComputeSufficientStatistics(const std::vector<WordID>& hyp, +                                           const std::vector<std::vector<WordID> >& refs, +                                           SufficientStats* out) const { +  eval_server->Evaluate(refs, hyp, &out->fields); +} + +float ExternalMetric::ComputeScore(const SufficientStats& stats) const { +  eval_server->ComputeScore(stats.fields); +} + +ExternalMetric::ExternalMetric(const string& metric_name, const std::string& command) : +    EvaluationMetric(metric_name), +    eval_server(new NScoreServer(command)) {} + +ExternalMetric::~ExternalMetric() { +  delete eval_server; +} diff --git a/mteval/ns_ext.h b/mteval/ns_ext.h new file mode 100644 index 00000000..78badb2e --- /dev/null +++ b/mteval/ns_ext.h @@ -0,0 +1,21 @@ +#ifndef _NS_EXTERNAL_SCORER_H_ +#define _NS_EXTERNAL_SCORER_H_ + +#include "ns.h" + +struct NScoreServer; +class ExternalMetric : public EvaluationMetric { + public: +  ExternalMetric(const std::string& metricid, const std::string& command); +  ~ExternalMetric(); + +  virtual void ComputeSufficientStatistics(const std::vector<WordID>& hyp, +                                           const std::vector<std::vector<WordID> >& refs, +                                           SufficientStats* out) const; +  virtual float ComputeScore(const SufficientStats& stats) const; + + protected: +  NScoreServer* eval_server; +}; + +#endif diff --git a/mteval/ns_ter.cc b/mteval/ns_ter.cc new file mode 100644 index 00000000..0e1008db --- /dev/null +++ b/mteval/ns_ter.cc @@ -0,0 +1,492 @@ +#include "ns_ter.h" + +#include <cstdio> +#include <cassert> +#include <iostream> +#include <limits> +#include <tr1/unordered_map> +#include <set> +#include <boost/functional/hash.hpp> +#include "tdict.h" + +static const bool ter_use_average_ref_len = true; +static const int ter_short_circuit_long_sentences = -1; + +static const unsigned kINSERTIONS = 0; +static const unsigned kDELETIONS = 1; +static const unsigned kSUBSTITUTIONS = 2; +static const unsigned kSHIFTS = 3; +static const unsigned kREF_WORDCOUNT = 4; +static const unsigned kDUMMY_LAST_ENTRY = 5; + +using namespace std; +using namespace std::tr1; + +bool TERMetric::IsErrorMetric() const { +  return true; +} + +namespace NewScorer { + +struct COSTS { +  static const float substitution; +  static const float deletion; +  static const float insertion; +  static const float shift; +}; +const float COSTS::substitution = 1.0f; +const float COSTS::deletion = 1.0f; +const float COSTS::insertion = 1.0f; +const float COSTS::shift = 1.0f; + +static const int MAX_SHIFT_SIZE = 10; +static const int MAX_SHIFT_DIST = 50; + +struct Shift { +  unsigned int d_; +  Shift() : d_() {} +  Shift(int b, int e, int m) : d_() { +    begin(b); +    end(e); +    moveto(m); +  } +  inline int begin() const { +    return d_ & 0x3ff; +  } +  inline int end() const { +    return (d_ >> 10) & 0x3ff; +  } +  inline int moveto() const { +    int m = (d_ >> 20) & 0x7ff; +    if (m > 1024) { m -= 1024; m *= -1; } +    return m; +  } +  inline void begin(int b) { +    d_ &= 0xfffffc00u; +    d_ |= (b & 0x3ff); +  } +  inline void end(int e) { +    d_ &= 0xfff003ffu; +    d_ |= (e & 0x3ff) << 10; +  } +  inline void moveto(int m) { +    bool neg = (m < 0); +    if (neg) { m *= -1; m += 1024; } +    d_ &= 0xfffff; +    d_ |= (m & 0x7ff) << 20; +  } +}; + +class TERScorerImpl { + + public: +  enum TransType { MATCH, SUBSTITUTION, INSERTION, DELETION }; + +  explicit TERScorerImpl(const vector<WordID>& ref) : ref_(ref) { +    for (unsigned i = 0; i < ref.size(); ++i) +      rwexists_.insert(ref[i]); +  } + +  float Calculate(const vector<WordID>& hyp, int* subs, int* ins, int* dels, int* shifts) const { +    return CalculateAllShifts(hyp, subs, ins, dels, shifts); +  } + +  inline int GetRefLength() const { +    return ref_.size(); +  } + + private: +  const vector<WordID>& ref_; +  set<WordID> rwexists_; + +  typedef unordered_map<vector<WordID>, set<int>, boost::hash<vector<WordID> > > NgramToIntsMap; +  mutable NgramToIntsMap nmap_; + +  static float MinimumEditDistance( +      const vector<WordID>& hyp, +      const vector<WordID>& ref, +      vector<TransType>* path) { +    vector<vector<TransType> > bmat(hyp.size() + 1, vector<TransType>(ref.size() + 1, MATCH)); +    vector<vector<float> > cmat(hyp.size() + 1, vector<float>(ref.size() + 1, 0)); +    for (int i = 0; i <= hyp.size(); ++i) +      cmat[i][0] = i; +    for (int j = 0; j <= ref.size(); ++j) +      cmat[0][j] = j; +    for (int i = 1; i <= hyp.size(); ++i) { +      const WordID& hw = hyp[i-1]; +      for (int j = 1; j <= ref.size(); ++j) { +        const WordID& rw = ref[j-1]; +	float& cur_c = cmat[i][j]; +	TransType& cur_b = bmat[i][j]; + +        if (rw == hw) { +          cur_c = cmat[i-1][j-1]; +          cur_b = MATCH; +        } else { +          cur_c = cmat[i-1][j-1] + COSTS::substitution; +          cur_b = SUBSTITUTION; +        } +	float cwoi = cmat[i-1][j]; +        if (cur_c > cwoi + COSTS::insertion) { +          cur_c = cwoi + COSTS::insertion; +          cur_b = INSERTION; +        } +        float cwod = cmat[i][j-1]; +        if (cur_c > cwod + COSTS::deletion) { +          cur_c = cwod + COSTS::deletion; +          cur_b = DELETION; +        } +      } +    } + +    // trace back along the best path and record the transition types +    path->clear(); +    int i = hyp.size(); +    int j = ref.size(); +    while (i > 0 || j > 0) { +      if (j == 0) { +        --i; +        path->push_back(INSERTION); +      } else if (i == 0) { +        --j; +        path->push_back(DELETION); +      } else { +        TransType t = bmat[i][j]; +        path->push_back(t); +        switch (t) { +          case SUBSTITUTION: +          case MATCH: +            --i; --j; break; +          case INSERTION: +            --i; break; +          case DELETION: +            --j; break; +        } +      } +    } +    reverse(path->begin(), path->end()); +    return cmat[hyp.size()][ref.size()]; +  } + +  void BuildWordMatches(const vector<WordID>& hyp, NgramToIntsMap* nmap) const { +    nmap->clear(); +    set<WordID> exists_both; +    for (int i = 0; i < hyp.size(); ++i) +      if (rwexists_.find(hyp[i]) != rwexists_.end()) +        exists_both.insert(hyp[i]); +    for (int start=0; start<ref_.size(); ++start) { +      if (exists_both.find(ref_[start]) == exists_both.end()) continue; +      vector<WordID> cp; +      int mlen = min(MAX_SHIFT_SIZE, static_cast<int>(ref_.size() - start)); +      for (int len=0; len<mlen; ++len) { +        if (len && exists_both.find(ref_[start + len]) == exists_both.end()) break; +        cp.push_back(ref_[start + len]); +	(*nmap)[cp].insert(start); +      } +    } +  } + +  static void PerformShift(const vector<WordID>& in, +    int start, int end, int moveto, vector<WordID>* out) { +    // cerr << "ps: " << start << " " << end << " " << moveto << endl; +    out->clear(); +    if (moveto == -1) { +      for (int i = start; i <= end; ++i) +       out->push_back(in[i]); +      for (int i = 0; i < start; ++i) +       out->push_back(in[i]); +      for (int i = end+1; i < in.size(); ++i) +       out->push_back(in[i]); +    } else if (moveto < start) { +      for (int i = 0; i <= moveto; ++i) +       out->push_back(in[i]); +      for (int i = start; i <= end; ++i) +       out->push_back(in[i]); +      for (int i = moveto+1; i < start; ++i) +       out->push_back(in[i]); +      for (int i = end+1; i < in.size(); ++i) +       out->push_back(in[i]); +    } else if (moveto > end) { +      for (int i = 0; i < start; ++i) +       out->push_back(in[i]); +      for (int i = end+1; i <= moveto; ++i) +       out->push_back(in[i]); +      for (int i = start; i <= end; ++i) +       out->push_back(in[i]); +      for (int i = moveto+1; i < in.size(); ++i) +       out->push_back(in[i]); +    } else { +      for (int i = 0; i < start; ++i) +       out->push_back(in[i]); +      for (int i = end+1; (i < in.size()) && (i <= end + (moveto - start)); ++i) +       out->push_back(in[i]); +      for (int i = start; i <= end; ++i) +       out->push_back(in[i]); +      for (int i = (end + (moveto - start))+1; i < in.size(); ++i) +       out->push_back(in[i]); +    } +    if (out->size() != in.size()) { +      cerr << "ps: " << start << " " << end << " " << moveto << endl; +      cerr << "in=" << TD::GetString(in) << endl; +      cerr << "out=" << TD::GetString(*out) << endl; +    } +    assert(out->size() == in.size()); +    // cerr << "ps: " << TD::GetString(*out) << endl; +  } + +  void GetAllPossibleShifts(const vector<WordID>& hyp, +      const vector<int>& ralign, +      const vector<bool>& herr, +      const vector<bool>& rerr, +      const int min_size, +      vector<vector<Shift> >* shifts) const { +    for (int start = 0; start < hyp.size(); ++start) { +      vector<WordID> cp(1, hyp[start]); +      NgramToIntsMap::iterator niter = nmap_.find(cp); +      if (niter == nmap_.end()) continue; +      bool ok = false; +      int moveto; +      for (set<int>::iterator i = niter->second.begin(); i != niter->second.end(); ++i) { +        moveto = *i; +        int rm = ralign[moveto]; +        ok = (start != rm && +              (rm - start) < MAX_SHIFT_DIST && +              (start - rm - 1) < MAX_SHIFT_DIST); +        if (ok) break; +      } +      if (!ok) continue; +      cp.clear(); +      for (int end = start + min_size - 1; +           ok && end < hyp.size() && end < (start + MAX_SHIFT_SIZE); ++end) { +        cp.push_back(hyp[end]); +	vector<Shift>& sshifts = (*shifts)[end - start]; +        ok = false; +        NgramToIntsMap::iterator niter = nmap_.find(cp); +        if (niter == nmap_.end()) break; +        bool any_herr = false; +        for (int i = start; i <= end && !any_herr; ++i) +          any_herr = herr[i]; +        if (!any_herr) { +          ok = true; +          continue; +        } +        for (set<int>::iterator mi = niter->second.begin(); +             mi != niter->second.end(); ++mi) { +          int moveto = *mi; +	  int rm = ralign[moveto]; +	  if (! ((rm != start) && +	        ((rm < start) || (rm > end)) && +		(rm - start <= MAX_SHIFT_DIST) && +		((start - rm - 1) <= MAX_SHIFT_DIST))) continue; +          ok = true; +	  bool any_rerr = false; +	  for (int i = 0; (i <= end - start) && (!any_rerr); ++i) +            any_rerr = rerr[moveto+i]; +	  if (!any_rerr) continue; +	  for (int roff = 0; roff <= (end - start); ++roff) { +	    int rmr = ralign[moveto+roff]; +	    if ((start != rmr) && ((roff == 0) || (rmr != ralign[moveto]))) +	      sshifts.push_back(Shift(start, end, moveto + roff)); +	  } +        } +      } +    } +  } + +  bool CalculateBestShift(const vector<WordID>& cur, +                          const vector<WordID>& hyp, +                          float curerr, +                          const vector<TransType>& path, +                          vector<WordID>* new_hyp, +                          float* newerr, +                          vector<TransType>* new_path) const { +    vector<bool> herr, rerr; +    vector<int> ralign; +    int hpos = -1; +    for (int i = 0; i < path.size(); ++i) { +      switch (path[i]) { +        case MATCH: +	  ++hpos; +	  herr.push_back(false); +	  rerr.push_back(false); +	  ralign.push_back(hpos); +          break; +        case SUBSTITUTION: +	  ++hpos; +	  herr.push_back(true); +	  rerr.push_back(true); +	  ralign.push_back(hpos); +          break; +        case INSERTION: +	  ++hpos; +	  herr.push_back(true); +          break; +	case DELETION: +	  rerr.push_back(true); +	  ralign.push_back(hpos); +          break; +      } +    } +#if 0 +    cerr << "RALIGN: "; +    for (int i = 0; i < rerr.size(); ++i) +      cerr << ralign[i] << " "; +    cerr << endl; +    cerr << "RERR: "; +    for (int i = 0; i < rerr.size(); ++i) +      cerr << (bool)rerr[i] << " "; +    cerr << endl; +    cerr << "HERR: "; +    for (int i = 0; i < herr.size(); ++i) +      cerr << (bool)herr[i] << " "; +    cerr << endl; +#endif + +    vector<vector<Shift> > shifts(MAX_SHIFT_SIZE + 1); +    GetAllPossibleShifts(cur, ralign, herr, rerr, 1, &shifts); +    float cur_best_shift_cost = 0; +    *newerr = curerr; +    vector<TransType> cur_best_path; +    vector<WordID> cur_best_hyp; + +    bool res = false; +    for (int i = shifts.size() - 1; i >=0; --i) { +      float curfix = curerr - (cur_best_shift_cost + *newerr); +      float maxfix = 2.0f * (1 + i) - COSTS::shift; +      if ((curfix > maxfix) || ((cur_best_shift_cost == 0) && (curfix == maxfix))) break; +      for (int j = 0; j < shifts[i].size(); ++j) { +        const Shift& s = shifts[i][j]; +	curfix = curerr - (cur_best_shift_cost + *newerr); +	maxfix = 2.0f * (1 + i) - COSTS::shift;  // TODO remove? +        if ((curfix > maxfix) || ((cur_best_shift_cost == 0) && (curfix == maxfix))) continue; +	vector<WordID> shifted(cur.size()); +	PerformShift(cur, s.begin(), s.end(), ralign[s.moveto()], &shifted); +	vector<TransType> try_path; +	float try_cost = MinimumEditDistance(shifted, ref_, &try_path); +	float gain = (*newerr + cur_best_shift_cost) - (try_cost + COSTS::shift); +	if (gain > 0.0f || ((cur_best_shift_cost == 0.0f) && (gain == 0.0f))) { +	  *newerr = try_cost; +	  cur_best_shift_cost = COSTS::shift; +	  new_path->swap(try_path); +	  new_hyp->swap(shifted); +	  res = true; +	  // cerr << "Found better shift " << s.begin() << "..." << s.end() << " moveto " << s.moveto() << endl; +	} +      } +    } + +    return res; +  } + +  static void GetPathStats(const vector<TransType>& path, int* subs, int* ins, int* dels) { +    *subs = *ins = *dels = 0; +    for (int i = 0; i < path.size(); ++i) { +      switch (path[i]) { +        case SUBSTITUTION: +	  ++(*subs); +        case MATCH: +          break; +        case INSERTION: +          ++(*ins); break; +	case DELETION: +          ++(*dels); break; +      } +    } +  } + +  float CalculateAllShifts(const vector<WordID>& hyp, +      int* subs, int* ins, int* dels, int* shifts) const { +    BuildWordMatches(hyp, &nmap_); +    vector<TransType> path; +    float med_cost = MinimumEditDistance(hyp, ref_, &path); +    float edits = 0; +    vector<WordID> cur = hyp; +    *shifts = 0; +    if (ter_short_circuit_long_sentences < 0 || +        ref_.size() < ter_short_circuit_long_sentences) { +      while (true) { +        vector<WordID> new_hyp; +        vector<TransType> new_path; +        float new_med_cost; +        if (!CalculateBestShift(cur, hyp, med_cost, path, &new_hyp, &new_med_cost, &new_path)) +          break; +        edits += COSTS::shift; +        ++(*shifts); +        med_cost = new_med_cost; +        path.swap(new_path); +        cur.swap(new_hyp); +      } +    } +    GetPathStats(path, subs, ins, dels); +    return med_cost + edits; +  } +}; + +#if 0 +void TERScore::ScoreDetails(std::string* details) const { +  char buf[200]; +  sprintf(buf, "TER = %.2f, %3d|%3d|%3d|%3d (len=%d)", +     ComputeScore() * 100.0f, +     stats[kINSERTIONS], +     stats[kDELETIONS], +     stats[kSUBSTITUTIONS], +     stats[kSHIFTS], +     stats[kREF_WORDCOUNT]); +  *details = buf; +} +#endif + +} // namespace NewScorer + +void TERMetric::ComputeSufficientStatistics(const vector<WordID>& hyp, +                                            const vector<vector<WordID> >& refs, +                                            SufficientStats* out) const { +  out->fields.resize(kDUMMY_LAST_ENTRY); +  float best_score = numeric_limits<float>::max(); +  unsigned avg_len = 0; +  for (int i = 0; i < refs.size(); ++i) +    avg_len += refs[i].size(); +  avg_len /= refs.size(); + +  for (int i = 0; i < refs.size(); ++i) { +    int subs, ins, dels, shifts; +    NewScorer::TERScorerImpl ter(refs[i]); +    float score = ter.Calculate(hyp, &subs, &ins, &dels, &shifts); +    // cerr << "Component TER cost: " << score << endl; +    if (score < best_score) { +      out->fields[kINSERTIONS] = ins; +      out->fields[kDELETIONS] = dels; +      out->fields[kSUBSTITUTIONS] = subs; +      out->fields[kSHIFTS] = shifts; +      if (ter_use_average_ref_len) { +        out->fields[kREF_WORDCOUNT] = avg_len; +      } else { +        out->fields[kREF_WORDCOUNT] = refs[i].size(); +      } + +      best_score = score; +    } +  } +} + +unsigned TERMetric::SufficientStatisticsVectorSize() const { +  return kDUMMY_LAST_ENTRY; +} + +float TERMetric::ComputeScore(const SufficientStats& stats) const { +  float edits = static_cast<float>(stats[kINSERTIONS] + stats[kDELETIONS] + stats[kSUBSTITUTIONS] + stats[kSHIFTS]); +  return edits / static_cast<float>(stats[kREF_WORDCOUNT]); +} + +string TERMetric::DetailedScore(const SufficientStats& stats) const { +  char buf[200]; +  sprintf(buf, "TER = %.2f, %3.f|%3.f|%3.f|%3.f (len=%3.f)", +     ComputeScore(stats) * 100.0f, +     stats[kINSERTIONS], +     stats[kDELETIONS], +     stats[kSUBSTITUTIONS], +     stats[kSHIFTS], +     stats[kREF_WORDCOUNT]); +  return buf; +} + diff --git a/mteval/ns_ter.h b/mteval/ns_ter.h new file mode 100644 index 00000000..c5c25413 --- /dev/null +++ b/mteval/ns_ter.h @@ -0,0 +1,21 @@ +#ifndef _NS_TER_H_ +#define _NS_TER_H_ + +#include "ns.h" + +class TERMetric : public EvaluationMetric { +  friend class EvaluationMetric; + protected: +  TERMetric() : EvaluationMetric("TER") {} + + public: +  virtual bool IsErrorMetric() const; +  virtual unsigned SufficientStatisticsVectorSize() const; +  virtual std::string DetailedScore(const SufficientStats& stats) const; +  virtual void ComputeSufficientStatistics(const std::vector<WordID>& hyp, +                                           const std::vector<std::vector<WordID> >& refs, +                                           SufficientStats* out) const; +  virtual float ComputeScore(const SufficientStats& stats) const; +}; + +#endif diff --git a/mteval/scorer_test.cc b/mteval/scorer_test.cc index a07a8c4b..73159557 100644 --- a/mteval/scorer_test.cc +++ b/mteval/scorer_test.cc @@ -3,9 +3,11 @@  #include <valarray>  #include <gtest/gtest.h> +#include "ns.h"  #include "tdict.h"  #include "scorer.h"  #include "aer_scorer.h" +#include "kernel_string_subseq.h"  using namespace std; @@ -175,6 +177,52 @@ TEST_F(ScorerTest, AERTest) {    EXPECT_EQ(d2, details);  } +TEST_F(ScorerTest, Kernel) { +  for (int i = 1; i < 10; ++i) { +    const float l = (i / 10.0); +    float f = ssk<4>(refs0[0], hyp1, l) + +              ssk<4>(refs0[1], hyp1, l) + +              ssk<4>(refs0[2], hyp1, l) + +              ssk<4>(refs0[3], hyp1, l); +    float f2= ssk<4>(refs1[0], hyp2, l) + +              ssk<4>(refs1[1], hyp2, l) + +              ssk<4>(refs1[2], hyp2, l) + +              ssk<4>(refs1[3], hyp2, l); +    f /= 4; +    f2 /= 4; +    float f3= ssk<4>(refs0[0], hyp2, l) + +              ssk<4>(refs0[1], hyp2, l) + +              ssk<4>(refs0[2], hyp2, l) + +              ssk<4>(refs0[3], hyp2, l); +    float f4= ssk<4>(refs1[0], hyp1, l) + +              ssk<4>(refs1[1], hyp1, l) + +              ssk<4>(refs1[2], hyp1, l) + +              ssk<4>(refs1[3], hyp1, l); +    f3 += f4; +    f3 /= 8; +    cerr << "LAMBDA=" << l << "\t" << f << " " << f2 << "\tf=" << ((f + f2)/2 - f3) << " (bad=" << f3 << ")" << endl; +  } +} + +TEST_F(ScorerTest, NewScoreAPI) { +  //EvaluationMetric* metric = EvaluationMetric::Instance("IBM_BLEU"); +  //EvaluationMetric* metric = EvaluationMetric::Instance("METEOR"); +  EvaluationMetric* metric = EvaluationMetric::Instance("COMB:IBM_BLEU=0.5;TER=-0.5"); +  boost::shared_ptr<SegmentEvaluator> e1 = metric->CreateSegmentEvaluator(refs0); +  boost::shared_ptr<SegmentEvaluator> e2 = metric->CreateSegmentEvaluator(refs1); +  SufficientStats stats1; +  e1->Evaluate(hyp1, &stats1); +  SufficientStats stats2; +  e2->Evaluate(hyp2, &stats2); +  stats1 += stats2; +  string ss; +  stats1.Encode(&ss); +  cerr << "SS: " << ss << endl; +  cerr << metric->ComputeScore(stats1) << endl; +  //SufficientStats statse("IBM_BLEU 53 32 18 11 65 63 61 59 65 72"); +  //cerr << metric->ComputeScore(statse) << endl; +} +  int main(int argc, char **argv) {    testing::InitGoogleTest(&argc, argv);    return RUN_ALL_TESTS(); | 
