From 0da1f6de1b33bbff5cb99b1938bb07d050479f10 Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@cs.cmu.edu>
Date: Wed, 14 Dec 2011 21:02:50 -0800
Subject: random incomplete metric stuff, including string subsequence kernel
 impl

---
 mteval/ns_ter.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)
 create mode 100644 mteval/ns_ter.h

(limited to 'mteval/ns_ter.h')
diff --git a/mteval/ns_ter.h b/mteval/ns_ter.h
new file mode 100644
index 00000000..bb90f95e
--- /dev/null
+++ b/mteval/ns_ter.h
@@ -0,0 +1,18 @@
+#ifndef _NS_TER_H_
+#define _NS_TER_H_
+
+#include "ns.h"
+
+class TERMetric : public EvaluationMetric {
+  friend class EvaluationMetric;
+ protected:
+  TERMetric() : EvaluationMetric("TER") {}
+
+ public:
+  virtual void ComputeSufficientStatistics(const std::vector<WordID>& hyp,
+                                           const std::vector<std::vector<WordID> >& refs,
+                                           SufficientStats* out) const;
+  virtual float ComputeScore(const SufficientStats& stats) const;
+};
+
+#endif
-- 
cgit v1.2.3


From 2eb3bb96c6f780c477585b33273fc0c0d56c80e4 Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@cs.cmu.edu>
Date: Tue, 20 Dec 2011 15:51:11 -0500
Subject: new scorer interface is implemented, but not used

---
 mteval/Makefile.am    |   2 +-
 mteval/ns.cc          |  67 ++++++++++++++++++++------
 mteval/ns.h           |  23 +++++----
 mteval/ns_comb.cc     |  87 +++++++++++++++++++++++++++++++++
 mteval/ns_comb.h      |  19 ++++++++
 mteval/ns_ext.cc      | 130 ++++++++++++++++++++++++++++++++++++++++++++++++++
 mteval/ns_ext.h       |  21 ++++++++
 mteval/ns_ter.cc      | 126 ++++++++++--------------------------------------
 mteval/ns_ter.h       |   1 +
 mteval/scorer_test.cc |  12 +++--
 utils/stringlib.h     |   7 +++
 11 files changed, 362 insertions(+), 133 deletions(-)
 create mode 100644 mteval/ns_comb.cc
 create mode 100644 mteval/ns_comb.h
 create mode 100644 mteval/ns_ext.cc
 create mode 100644 mteval/ns_ext.h

(limited to 'mteval/ns_ter.h')

diff --git a/mteval/Makefile.am b/mteval/Makefile.am
index 95845090..6679d949 100644
--- a/mteval/Makefile.am
+++ b/mteval/Makefile.am
@@ -10,7 +10,7 @@ endif
 
 noinst_LIBRARIES = libmteval.a
 
-libmteval_a_SOURCES = ter.cc comb_scorer.cc aer_scorer.cc scorer.cc external_scorer.cc
+libmteval_a_SOURCES = ter.cc comb_scorer.cc aer_scorer.cc scorer.cc external_scorer.cc ns.cc ns_ter.cc ns_ext.cc ns_comb.cc
 
 fast_score_SOURCES = fast_score.cc
 fast_score_LDADD = libmteval.a $(top_srcdir)/utils/libutils.a -lz
diff --git a/mteval/ns.cc b/mteval/ns.cc
index 1045a51f..6139757d 100644
--- a/mteval/ns.cc
+++ b/mteval/ns.cc
@@ -1,5 +1,7 @@
 #include "ns.h"
 #include "ns_ter.h"
+#include "ns_ext.h"
+#include "ns_comb.h"
 
 #include <cassert>
 #include <cmath>
@@ -7,6 +9,9 @@
 #include <iostream>
 #include <sstream>
 
+#include "tdict.h"
+#include "stringlib.h"
+
 using namespace std;
 using boost::shared_ptr;
 
@@ -19,6 +24,7 @@ struct DefaultSegmentEvaluator : public SegmentEvaluator {
   DefaultSegmentEvaluator(const vector<vector<WordID> >& refs, const EvaluationMetric* em) : refs_(refs), em_(em) {}
   void Evaluate(const vector<WordID>& hyp, SufficientStats* out) const {
     em_->ComputeSufficientStatistics(hyp, refs_, out);
+    out->id_ = em_->MetricId();
   }
   const vector<vector<WordID> > refs_;
   const EvaluationMetric* em_;
@@ -28,6 +34,11 @@ shared_ptr<SegmentEvaluator> EvaluationMetric::CreateSegmentEvaluator(const vect
   return shared_ptr<SegmentEvaluator>(new DefaultSegmentEvaluator(refs, this));
 }
 
+#define MAX_SS_VECTOR_SIZE 50
+unsigned EvaluationMetric::SufficientStatisticsVectorSize() const {
+  return MAX_SS_VECTOR_SIZE;
+}
+
 void EvaluationMetric::ComputeSufficientStatistics(const vector<WordID>&,
                                                    const vector<vector<WordID> >&,
                                                    SufficientStats*) const {
@@ -35,6 +46,12 @@ void EvaluationMetric::ComputeSufficientStatistics(const vector<WordID>&,
   abort();
 }
 
+string EvaluationMetric::DetailedScore(const SufficientStats& stats) const {
+  ostringstream os;
+  os << MetricId() << "=" << ComputeScore(stats);
+  return os.str();
+}
+
 enum BleuType { IBM, Koehn, NIST };
 template <unsigned int N = 4u, BleuType BrevityType = IBM>
 struct BleuSegmentEvaluator : public SegmentEvaluator {
@@ -57,7 +74,7 @@ struct BleuSegmentEvaluator : public SegmentEvaluator {
 
   void Evaluate(const vector<WordID>& hyp, SufficientStats* out) const {
     out->fields.resize(N + N + 2);
-    out->evaluation_metric = evaluation_metric;
+    out->id_ = evaluation_metric->MetricId();
     for (unsigned i = 0; i < N+N+2; ++i) out->fields[i] = 0;
 
     ComputeNgramStats(hyp, &out->fields[0], &out->fields[N], true);
@@ -157,7 +174,12 @@ struct BleuSegmentEvaluator : public SegmentEvaluator {
 template <unsigned int N = 4u, BleuType BrevityType = IBM>
 struct BleuMetric : public EvaluationMetric {
   BleuMetric() : EvaluationMetric("IBM_BLEU") {}
-  float ComputeScore(const SufficientStats& stats) const {
+  unsigned SufficientStatisticsVectorSize() const { return N*2 + 2; }
+  shared_ptr<SegmentEvaluator> CreateSegmentEvaluator(const vector<vector<WordID> >& refs) const {
+    return shared_ptr<SegmentEvaluator>(new BleuSegmentEvaluator<N,BrevityType>(refs, this));
+  }
+  float ComputeBreakdown(const SufficientStats& stats, float* bp, vector<float>* out) const {
+    if (out) { out->clear(); }
     float log_bleu = 0;
     int count = 0;
     for (int i = 0; i < N; ++i) {
@@ -166,7 +188,7 @@ struct BleuMetric : public EvaluationMetric {
         // smooth bleu
         if (!cor_count) { cor_count = 0.01; }
         float lprec = log(cor_count) - log(stats.fields[i+N]); // log(hyp_ngram_counts[i]);
-        // if (precs) precs->push_back(exp(lprec));
+        if (out) out->push_back(exp(lprec));
         log_bleu += lprec;
         ++count;
       }
@@ -178,32 +200,51 @@ struct BleuMetric : public EvaluationMetric {
     if (hyp_len < ref_len)
       lbp = (hyp_len - ref_len) / hyp_len;
     log_bleu += lbp;
-    //if (bp) *bp = exp(lbp);
+    if (bp) *bp = exp(lbp);
     return exp(log_bleu);
   }
-  shared_ptr<SegmentEvaluator> CreateSegmentEvaluator(const vector<vector<WordID> >& refs) const {
-    return shared_ptr<SegmentEvaluator>(new BleuSegmentEvaluator<N,BrevityType>(refs, this));
+  string DetailedScore(const SufficientStats& stats) const {
+    char buf[2000];
+    vector<float> precs(N);
+    float bp;
+    float bleu = ComputeBreakdown(stats, &bp, &precs);
+    sprintf(buf, "BLEU = %.2f, %.1f|%.1f|%.1f|%.1f (brev=%.3f)",
+       bleu*100.0,
+       precs[0]*100.0,
+       precs[1]*100.0,
+       precs[2]*100.0,
+       precs[3]*100.0,
+       bp);
+    return buf;
+  }
+  float ComputeScore(const SufficientStats& stats) const {
+    return ComputeBreakdown(stats, NULL, NULL);
   }
 };
 
-EvaluationMetric* EvaluationMetric::Instance(const string& metric_id) {
+EvaluationMetric* EvaluationMetric::Instance(const string& imetric_id) {
   static bool is_first = true;
   if (is_first) {
     instances_["NULL"] = NULL;
     is_first = false;
   }
+  const string metric_id = UppercaseString(imetric_id);
 
   map<string, EvaluationMetric*>::iterator it = instances_.find(metric_id);
   if (it == instances_.end()) {
     EvaluationMetric* m = NULL; 
-    if (metric_id == "IBM_BLEU") {
+    if        (metric_id == "IBM_BLEU") {
       m = new BleuMetric<4, IBM>;
     } else if (metric_id == "NIST_BLEU") {
       m = new BleuMetric<4, NIST>;
-    } else if (metric_id == "Koehn_BLEU") {
+    } else if (metric_id == "KOEHN_BLEU") {
       m = new BleuMetric<4, Koehn>;
     } else if (metric_id == "TER") {
       m = new TERMetric;
+    } else if (metric_id == "METEOR") {
+      m = new ExternalMetric("METEOR", "java -Xmx1536m -jar /Users/cdyer/software/meteor/meteor-1.3.jar - - -mira -lower -t tune -l en");
+    } else if (metric_id.find("COMB:") == 0) {
+      m = new CombinationMetric(metric_id);
     } else {
       cerr << "Implement please: " << metric_id << endl;
       abort();
@@ -220,9 +261,7 @@ EvaluationMetric* EvaluationMetric::Instance(const string& metric_id) {
 
 SufficientStats::SufficientStats(const string& encoded) {
   istringstream is(encoded);
-  string type;
-  is >> type;
-  evaluation_metric = EvaluationMetric::Instance(type);
+  is >> id_;
   float val;
   while(is >> val)
     fields.push_back(val);
@@ -230,8 +269,8 @@ SufficientStats::SufficientStats(const string& encoded) {
 
 void SufficientStats::Encode(string* out) const {
   ostringstream os;
-  if (evaluation_metric)
-    os << evaluation_metric->MetricId();
+  if (id_.size() > 0)
+    os << id_;
   else
     os << "NULL";
   for (unsigned i = 0; i < fields.size(); ++i)
diff --git a/mteval/ns.h b/mteval/ns.h
index f19b7509..622265db 100644
--- a/mteval/ns.h
+++ b/mteval/ns.h
@@ -7,18 +7,15 @@
 #include <boost/shared_ptr.hpp>
 #include "wordid.h"
 
-class EvaluationMetric;
-
 class SufficientStats {
  public:
-  SufficientStats() : evaluation_metric() {}
+  SufficientStats() : id_() {}
   explicit SufficientStats(const std::string& encoded);
-  explicit SufficientStats(const EvaluationMetric* s) : evaluation_metric(s) {}
-  SufficientStats(const EvaluationMetric* s, const std::vector<float>& f) :
-    evaluation_metric(s), fields(f) {}
+  SufficientStats(const std::string& mid, const std::vector<float>& f) :
+    id_(mid), fields(f) {}
 
   SufficientStats& operator+=(const SufficientStats& delta) {
-    if (delta.evaluation_metric) evaluation_metric = delta.evaluation_metric;
+    if (id_.empty() && delta.id_.size()) id_ = delta.id_;
     if (fields.size() != delta.fields.size())
       fields.resize(std::max(fields.size(), delta.fields.size()));
     for (unsigned i = 0; i < delta.fields.size(); ++i)
@@ -26,7 +23,7 @@ class SufficientStats {
     return *this;
   }
   SufficientStats& operator-=(const SufficientStats& delta) {
-    if (delta.evaluation_metric) evaluation_metric = delta.evaluation_metric;
+    if (id_.empty() && delta.id_.size()) id_ = delta.id_;
     if (fields.size() != delta.fields.size())
       fields.resize(std::max(fields.size(), delta.fields.size()));
     for (unsigned i = 0; i < delta.fields.size(); ++i)
@@ -53,7 +50,7 @@ class SufficientStats {
   }
   void Encode(std::string* out) const;
 
-  const EvaluationMetric* evaluation_metric;
+  std::string id_;
   std::vector<float> fields;
 };
 
@@ -73,13 +70,13 @@ struct SegmentEvaluator {
 };
 
 // Instructions for implementing a new metric
-//   Override MetricId() and give the metric a unique string name (no spaces)
 //   To Instance(), add something that creates the metric
+//   Implement ComputeScore(const SufficientStats& stats) const;
 //   Implement ONE of the following:
 //      1) void ComputeSufficientStatistics(const std::vector<std::vector<WordID> >& refs, SufficientStats* out) const;
 //      2) a new SegmentEvaluator class AND CreateSegmentEvaluator(const std::vector<std::vector<WordID> >& refs) const;
-//   The later (#2) is only used when it is necessary to precompute per-segment data from a set of refs
-//   Implement ComputeScore(const SufficientStats& stats) const;
+//    [The later (#2) is only used when it is necessary to precompute per-segment data from a set of refs]
+//   OPTIONAL: Override SufficientStatisticsVectorSize() if it is easy to do so
 class EvaluationMetric {
  public:
   static EvaluationMetric* Instance(const std::string& metric_id = "IBM_BLEU");
@@ -91,7 +88,9 @@ class EvaluationMetric {
  public:
   const std::string& MetricId() const { return name_; }
 
+  virtual unsigned SufficientStatisticsVectorSize() const;
   virtual float ComputeScore(const SufficientStats& stats) const = 0;
+  virtual std::string DetailedScore(const SufficientStats& stats) const;
   virtual boost::shared_ptr<SegmentEvaluator> CreateSegmentEvaluator(const std::vector<std::vector<WordID> >& refs) const;
   virtual void ComputeSufficientStatistics(const std::vector<WordID>& hyp,
                                            const std::vector<std::vector<WordID> >& refs,
diff --git a/mteval/ns_comb.cc b/mteval/ns_comb.cc
new file mode 100644
index 00000000..41c634cd
--- /dev/null
+++ b/mteval/ns_comb.cc
@@ -0,0 +1,87 @@
+#include "ns_comb.h"
+
+#include <iostream>
+
+#include "stringlib.h"
+
+using namespace std;
+
+// e.g. COMB:IBM_BLEU=0.5;TER=0.5
+CombinationMetric::CombinationMetric(const std::string& cmd) :
+    EvaluationMetric(cmd),
+    total_size() {
+  if (cmd.find("COMB:") != 0 || cmd.size() < 9) {
+    cerr << "Error in combination metric specifier: " << cmd << endl;
+    exit(1);
+  }
+  string mix = cmd.substr(5);
+  vector<string> comps;
+  Tokenize(cmd.substr(5), ';', &comps);
+  if(comps.size() < 2) {
+    cerr << "Error in combination metric specifier: " << cmd << endl;
+    exit(1);
+  }
+  vector<string> cwpairs;
+  for (unsigned i = 0; i < comps.size(); ++i) {
+    Tokenize(comps[i], '=', &cwpairs);
+    if (cwpairs.size() != 2) { cerr << "Error in combination metric specifier: " << cmd << endl; exit(1); }
+    metrics.push_back(EvaluationMetric::Instance(cwpairs[0]));
+    coeffs.push_back(atof(cwpairs[1].c_str()));
+    offsets.push_back(total_size);
+    total_size += metrics.back()->SufficientStatisticsVectorSize();
+    cerr << (i > 0 ? " + " : "( ") << coeffs.back() << " * " << cwpairs[0];
+  }
+  cerr << " )\n";
+}
+
+struct CombinationSegmentEvaluator : public SegmentEvaluator {
+  CombinationSegmentEvaluator(const string& id,
+                              const vector<vector<WordID> >& refs,
+                              const vector<EvaluationMetric*>& metrics,
+                              const vector<unsigned>& offsets,
+                              const unsigned ts) : id_(id), offsets_(offsets), total_size_(ts), component_evaluators_(metrics.size()) {
+    for (unsigned i = 0; i < metrics.size(); ++i)
+      component_evaluators_[i] = metrics[i]->CreateSegmentEvaluator(refs);
+  }
+  virtual void Evaluate(const std::vector<WordID>& hyp, SufficientStats* out) const {
+    out->id_ = id_;
+    out->fields.resize(total_size_);
+    for (unsigned i = 0; i < component_evaluators_.size(); ++i) {
+      SufficientStats t;
+      component_evaluators_[i]->Evaluate(hyp, &t);
+      for (unsigned j = 0; j < t.fields.size(); ++j) {
+        unsigned op = j + offsets_[i];
+        assert(op < out->fields.size());
+        out->fields[op] = t[j];
+      }
+    }
+  }
+  const string& id_;
+  const vector<unsigned>& offsets_;
+  const unsigned total_size_;
+  vector<boost::shared_ptr<SegmentEvaluator> > component_evaluators_;
+};
+
+boost::shared_ptr<SegmentEvaluator> CombinationMetric::CreateSegmentEvaluator(const std::vector<std::vector<WordID> >& refs) const {
+  boost::shared_ptr<SegmentEvaluator> res;
+  res.reset(new CombinationSegmentEvaluator(MetricId(), refs, metrics, offsets, total_size));
+  return res;
+}
+
+float CombinationMetric::ComputeScore(const SufficientStats& stats) const {
+  float tot = 0;
+  for (unsigned i = 0; i < metrics.size(); ++i) {
+    SufficientStats t;
+    unsigned next = total_size;
+    if (i + 1 < offsets.size()) next = offsets[i+1];
+    for (unsigned j = offsets[i]; j < next; ++j)
+      t.fields.push_back(stats[j]);
+    tot += metrics[i]->ComputeScore(t) * coeffs[i];
+  }
+  return tot;
+}
+
+unsigned CombinationMetric::SufficientStatisticsVectorSize() const {
+  return total_size;
+}
+
diff --git a/mteval/ns_comb.h b/mteval/ns_comb.h
new file mode 100644
index 00000000..140e7e6a
--- /dev/null
+++ b/mteval/ns_comb.h
@@ -0,0 +1,19 @@
+#ifndef _NS_COMB_H_
+#define _NS_COMB_H_
+
+#include "ns.h"
+
+class CombinationMetric : public EvaluationMetric {
+ public:
+  CombinationMetric(const std::string& cmd);
+  virtual boost::shared_ptr<SegmentEvaluator> CreateSegmentEvaluator(const std::vector<std::vector<WordID> >& refs) const;
+  virtual float ComputeScore(const SufficientStats& stats) const;
+  virtual unsigned SufficientStatisticsVectorSize() const;
+ private:
+  std::vector<EvaluationMetric*> metrics;
+  std::vector<float> coeffs;
+  std::vector<unsigned> offsets;
+  unsigned total_size;
+};
+
+#endif
diff --git a/mteval/ns_ext.cc b/mteval/ns_ext.cc
new file mode 100644
index 00000000..956708af
--- /dev/null
+++ b/mteval/ns_ext.cc
@@ -0,0 +1,130 @@
+#include "ns_ext.h"
+
+#include <cstdio> // popen
+#include <cstdlib>
+#include <cstring>
+#include <unistd.h>
+#include <sstream>
+#include <iostream>
+#include <cassert>
+
+#include "stringlib.h"
+#include "tdict.h"
+
+using namespace std;
+
+struct NScoreServer {
+  NScoreServer(const std::string& cmd);
+  ~NScoreServer();
+
+  float ComputeScore(const std::vector<float>& fields);
+  void Evaluate(const std::vector<std::vector<WordID> >& refs, const std::vector<WordID>& hyp, std::vector<float>* fields);
+
+ private:
+  void RequestResponse(const std::string& request, std::string* response);
+  int p2c[2];
+  int c2p[2];
+};
+
+NScoreServer::NScoreServer(const string& cmd) {
+  cerr << "Invoking " << cmd << " ..." << endl;
+  if (pipe(p2c) < 0) { perror("pipe"); exit(1); }
+  if (pipe(c2p) < 0) { perror("pipe"); exit(1); }
+  pid_t cpid = fork();
+  if (cpid < 0) { perror("fork"); exit(1); }
+  if (cpid == 0) {  // child
+    close(p2c[1]);
+    close(c2p[0]);
+    dup2(p2c[0], 0);
+    close(p2c[0]);
+    dup2(c2p[1], 1);
+    close(c2p[1]);
+    cerr << "Exec'ing from child " << cmd << endl;
+    vector<string> vargs;
+    SplitOnWhitespace(cmd, &vargs);
+    const char** cargv = static_cast<const char**>(malloc(sizeof(const char*) * vargs.size()));
+    for (unsigned i = 1; i < vargs.size(); ++i) cargv[i-1] = vargs[i].c_str();
+    cargv[vargs.size() - 1] = NULL;
+    execvp(vargs[0].c_str(), (char* const*)cargv);
+  } else { // parent
+    close(c2p[1]);
+    close(p2c[0]);
+  }
+  string dummy;
+  RequestResponse("SCORE ||| Reference initialization string . ||| Testing initialization string .", &dummy);
+  assert(dummy.size() > 0);
+  cerr << "Connection established.\n";
+}
+
+NScoreServer::~NScoreServer() {
+  // TODO close stuff, join stuff
+}
+
+float NScoreServer::ComputeScore(const vector<float>& fields) {
+  ostringstream os;
+  os << "EVAL |||";
+  for (unsigned i = 0; i < fields.size(); ++i)
+    os << ' ' << fields[i];
+  string sres;
+  RequestResponse(os.str(), &sres);
+  return strtod(sres.c_str(), NULL);
+}
+
+void NScoreServer::Evaluate(const vector<vector<WordID> >& refs, const vector<WordID>& hyp, vector<float>* fields) {
+  ostringstream os;
+  os << "SCORE";
+  for (unsigned i = 0; i < refs.size(); ++i) {
+    os << " |||";
+    for (unsigned j = 0; j < refs[i].size(); ++j) {
+      os << ' ' << TD::Convert(refs[i][j]);
+    }
+  }
+  os << " |||";
+  for (unsigned i = 0; i < hyp.size(); ++i) {
+    os << ' ' << TD::Convert(hyp[i]);
+  }
+  string sres;
+  RequestResponse(os.str(), &sres);
+  istringstream is(sres);
+  float val;
+  fields->clear();
+  while(is >> val)
+    fields->push_back(val);
+}
+
+#define MAX_BUF 16000
+
+void NScoreServer::RequestResponse(const string& request, string* response) {
+//  cerr << "@SERVER: " << request << endl;
+  string x = request + "\n";
+  write(p2c[1], x.c_str(), x.size());
+  char buf[MAX_BUF];
+  size_t n = read(c2p[0], buf, MAX_BUF);
+  while (n < MAX_BUF && buf[n-1] != '\n')
+    n += read(c2p[0], &buf[n], MAX_BUF - n);
+
+  buf[n-1] = 0;
+  if (n < 2) {
+    cerr << "Malformed response: " << buf << endl;
+  }
+  *response = Trim(buf, " \t\n");
+//  cerr << "@RESPONSE: '" << *response << "'\n";
+}
+
+void ExternalMetric::ComputeSufficientStatistics(const std::vector<WordID>& hyp,
+                                           const std::vector<std::vector<WordID> >& refs,
+                                           SufficientStats* out) const {
+  eval_server->Evaluate(refs, hyp, &out->fields);
+}
+
+float ExternalMetric::ComputeScore(const SufficientStats& stats) const {
+  eval_server->ComputeScore(stats.fields);
+}
+
+ExternalMetric::ExternalMetric(const string& metric_name, const std::string& command) :
+    EvaluationMetric(metric_name),
+    eval_server(new NScoreServer(command)) {}
+
+ExternalMetric::~ExternalMetric() {
+  delete eval_server;
+}
diff --git a/mteval/ns_ext.h b/mteval/ns_ext.h
new file mode 100644
index 00000000..78badb2e
--- /dev/null
+++ b/mteval/ns_ext.h
@@ -0,0 +1,21 @@
+#ifndef _NS_EXTERNAL_SCORER_H_
+#define _NS_EXTERNAL_SCORER_H_
+
+#include "ns.h"
+
+struct NScoreServer;
+class ExternalMetric : public EvaluationMetric {
+ public:
+  ExternalMetric(const std::string& metricid, const std::string& command);
+  ~ExternalMetric();
+
+  virtual void ComputeSufficientStatistics(const std::vector<WordID>& hyp,
+                                           const std::vector<std::vector<WordID> >& refs,
+                                           SufficientStats* out) const;
+  virtual float ComputeScore(const SufficientStats& stats) const;
+
+ protected:
+  NScoreServer* eval_server;
+};
+
+#endif
diff --git a/mteval/ns_ter.cc b/mteval/ns_ter.cc
index 14dc6e49..8c969e58 100644
--- a/mteval/ns_ter.cc
+++ b/mteval/ns_ter.cc
@@ -1,15 +1,11 @@
 #include "ns_ter.h"
 
-#include <cstdio>
 #include <cassert>
 #include <iostream>
 #include <limits>
-#include <sstream>
 #include <tr1/unordered_map>
 #include <set>
-#include <valarray>
 #include <boost/functional/hash.hpp>
-#include <stdexcept>
 #include "tdict.h"
 
 static const bool ter_use_average_ref_len = true;
@@ -25,7 +21,7 @@ static const unsigned kDUMMY_LAST_ENTRY = 5;
 using namespace std;
 using namespace std::tr1;
 
-#if 0
+namespace NewScorer {
 
 struct COSTS {
   static const float substitution;
@@ -82,7 +78,7 @@ class TERScorerImpl {
   enum TransType { MATCH, SUBSTITUTION, INSERTION, DELETION };
 
   explicit TERScorerImpl(const vector<WordID>& ref) : ref_(ref) {
-    for (int i = 0; i < ref.size(); ++i)
+    for (unsigned i = 0; i < ref.size(); ++i)
       rwexists_.insert(ref[i]);
   }
 
@@ -95,7 +91,7 @@ class TERScorerImpl {
   }
 
  private:
-  vector<WordID> ref_;
+  const vector<WordID>& ref_;
   set<WordID> rwexists_;
 
   typedef unordered_map<vector<WordID>, set<int>, boost::hash<vector<WordID> > > NgramToIntsMap;
@@ -421,68 +417,7 @@ class TERScorerImpl {
   }
 };
 
-class TERScore : public ScoreBase<TERScore> {
-  friend class TERScorer;
-
- public:
-
- TERScore() : stats(0,kDUMMY_LAST_ENTRY) {}
-  float ComputePartialScore() const { return 0.0;}
-  float ComputeScore() const {
-    float edits = static_cast<float>(stats[kINSERTIONS] + stats[kDELETIONS] + stats[kSUBSTITUTIONS] + stats[kSHIFTS]);
-    return edits / static_cast<float>(stats[kREF_WORDCOUNT]);
-  }
-  void ScoreDetails(string* details) const;
-  void PlusPartialEquals(const Score& rhs, int oracle_e_cover, int oracle_f_cover, int src_len){}
-  void PlusEquals(const Score& delta, const float scale) {
-    if (scale==1)
-      stats += static_cast<const TERScore&>(delta).stats;
-    if (scale==-1)
-      stats -= static_cast<const TERScore&>(delta).stats;
-    throw std::runtime_error("TERScore::PlusEquals with scale != +-1");
- }
-  void PlusEquals(const Score& delta) {
-    stats += static_cast<const TERScore&>(delta).stats;
-  }
-
-  ScoreP GetZero() const {
-    return ScoreP(new TERScore);
-  }
-  ScoreP GetOne() const {
-    return ScoreP(new TERScore);
-  }
-  void Subtract(const Score& rhs, Score* res) const {
-    static_cast<TERScore*>(res)->stats = stats - static_cast<const TERScore&>(rhs).stats;
-  }
-  void Encode(std::string* out) const {
-    ostringstream os;
-    os << stats[kINSERTIONS] << ' '
-       << stats[kDELETIONS] << ' '
-       << stats[kSUBSTITUTIONS] << ' '
-       << stats[kSHIFTS] << ' '
-       << stats[kREF_WORDCOUNT];
-    *out = os.str();
-  }
-  bool IsAdditiveIdentity() const {
-    for (int i = 0; i < kDUMMY_LAST_ENTRY; ++i)
-      if (stats[i] != 0) return false;
-    return true;
-  }
- private:
-  valarray<int> stats;
-};
-
-ScoreP TERScorer::ScoreFromString(const std::string& data) {
-  istringstream is(data);
-  TERScore* r = new TERScore;
-  is >> r->stats[TERScore::kINSERTIONS]
-     >> r->stats[TERScore::kDELETIONS]
-     >> r->stats[TERScore::kSUBSTITUTIONS]
-     >> r->stats[TERScore::kSHIFTS]
-     >> r->stats[TERScore::kREF_WORDCOUNT];
-  return ScoreP(r);
-}
-
+#if 0
 void TERScore::ScoreDetails(std::string* details) const {
   char buf[200];
   sprintf(buf, "TER = %.2f, %3d|%3d|%3d|%3d (len=%d)",
@@ -494,54 +429,43 @@ void TERScore::ScoreDetails(std::string* details) const {
      stats[kREF_WORDCOUNT]);
   *details = buf;
 }
+#endif
 
-TERScorer::~TERScorer() {
-  for (vector<TERScorerImpl*>::iterator i = impl_.begin(); i != impl_.end(); ++i)
-    delete *i;
-}
+} // namespace NewScorer
 
-TERScorer::TERScorer(const vector<vector<WordID> >& refs) : impl_(refs.size()) {
+void TERMetric::ComputeSufficientStatistics(const vector<WordID>& hyp,
+                                            const vector<vector<WordID> >& refs,
+                                            SufficientStats* out) const {
+  out->fields.resize(kDUMMY_LAST_ENTRY);
+  float best_score = numeric_limits<float>::max();
+  unsigned avg_len = 0;
   for (int i = 0; i < refs.size(); ++i)
-    impl_[i] = new TERScorerImpl(refs[i]);
-}
+    avg_len += refs[i].size();
+  avg_len /= refs.size();
 
-ScoreP TERScorer::ScoreCCandidate(const vector<WordID>& hyp) const {
-  return ScoreP();
-}
-
-ScoreP TERScorer::ScoreCandidate(const std::vector<WordID>& hyp) const {
-  float best_score = numeric_limits<float>::max();
-  TERScore* res = new TERScore;
-  int avg_len = 0;
-  for (int i = 0; i < impl_.size(); ++i)
-    avg_len += impl_[i]->GetRefLength();
-  avg_len /= impl_.size();
-  for (int i = 0; i < impl_.size(); ++i) {
+  for (int i = 0; i < refs.size(); ++i) {
     int subs, ins, dels, shifts;
-    float score = impl_[i]->Calculate(hyp, &subs, &ins, &dels, &shifts);
+    NewScorer::TERScorerImpl ter(refs[i]);
+    float score = ter.Calculate(hyp, &subs, &ins, &dels, &shifts);
     // cerr << "Component TER cost: " << score << endl;
     if (score < best_score) {
-      res->stats[TERScore::kINSERTIONS] = ins;
-      res->stats[TERScore::kDELETIONS] = dels;
-      res->stats[TERScore::kSUBSTITUTIONS] = subs;
-      res->stats[TERScore::kSHIFTS] = shifts;
+      out->fields[kINSERTIONS] = ins;
+      out->fields[kDELETIONS] = dels;
+      out->fields[kSUBSTITUTIONS] = subs;
+      out->fields[kSHIFTS] = shifts;
       if (ter_use_average_ref_len) {
-        res->stats[TERScore::kREF_WORDCOUNT] = avg_len;
+        out->fields[kREF_WORDCOUNT] = avg_len;
       } else {
-        res->stats[TERScore::kREF_WORDCOUNT] = impl_[i]->GetRefLength();
+        out->fields[kREF_WORDCOUNT] = refs[i].size();
       }
 
       best_score = score;
     }
   }
-  return ScoreP(res);
 }
-#endif
 
-void TERMetric::ComputeSufficientStatistics(const vector<WordID>& hyp,
-                                            const vector<vector<WordID> >& refs,
-                                            SufficientStats* out) const {
-  out->fields.resize(kDUMMY_LAST_ENTRY);
+unsigned TERMetric::SufficientStatisticsVectorSize() const {
+  return kDUMMY_LAST_ENTRY;
 }
 
 float TERMetric::ComputeScore(const SufficientStats& stats) const {
diff --git a/mteval/ns_ter.h b/mteval/ns_ter.h
index bb90f95e..6c020cfa 100644
--- a/mteval/ns_ter.h
+++ b/mteval/ns_ter.h
@@ -9,6 +9,7 @@ class TERMetric : public EvaluationMetric {
   TERMetric() : EvaluationMetric("TER") {}
 
  public:
+  virtual unsigned SufficientStatisticsVectorSize() const;
   virtual void ComputeSufficientStatistics(const std::vector<WordID>& hyp,
                                            const std::vector<std::vector<WordID> >& refs,
                                            SufficientStats* out) const;
diff --git a/mteval/scorer_test.cc b/mteval/scorer_test.cc
index 09da250c..73159557 100644
--- a/mteval/scorer_test.cc
+++ b/mteval/scorer_test.cc
@@ -205,20 +205,22 @@ TEST_F(ScorerTest, Kernel) {
 }
 
 TEST_F(ScorerTest, NewScoreAPI) {
-  EvaluationMetric* metric = EvaluationMetric::Instance("IBM_BLEU");
+  //EvaluationMetric* metric = EvaluationMetric::Instance("IBM_BLEU");
+  //EvaluationMetric* metric = EvaluationMetric::Instance("METEOR");
+  EvaluationMetric* metric = EvaluationMetric::Instance("COMB:IBM_BLEU=0.5;TER=-0.5");
   boost::shared_ptr<SegmentEvaluator> e1 = metric->CreateSegmentEvaluator(refs0);
   boost::shared_ptr<SegmentEvaluator> e2 = metric->CreateSegmentEvaluator(refs1);
   SufficientStats stats1;
-  e1->Evaluate(hyp2, &stats1);
+  e1->Evaluate(hyp1, &stats1);
   SufficientStats stats2;
-  e2->Evaluate(hyp1, &stats2);
+  e2->Evaluate(hyp2, &stats2);
   stats1 += stats2;
   string ss;
   stats1.Encode(&ss);
   cerr << "SS: " << ss << endl;
   cerr << metric->ComputeScore(stats1) << endl;
-  SufficientStats statse("IBM_BLEU 53 32 18 11 65 63 61 59 65 72");
-  cerr << metric->ComputeScore(statse) << endl;
+  //SufficientStats statse("IBM_BLEU 53 32 18 11 65 63 61 59 65 72");
+  //cerr << metric->ComputeScore(statse) << endl;
 }
 
 int main(int argc, char **argv) {
diff --git a/utils/stringlib.h b/utils/stringlib.h
index cafbdac3..f457e1e4 100644
--- a/utils/stringlib.h
+++ b/utils/stringlib.h
@@ -125,6 +125,13 @@ inline std::string LowercaseString(const std::string& in) {
   return res;
 }
 
+inline std::string UppercaseString(const std::string& in) {
+  std::string res(in.size(),' ');
+  for (int i = 0; i < in.size(); ++i)
+    res[i] = toupper(in[i]);
+  return res;
+}
+
 inline int CountSubstrings(const std::string& str, const std::string& sub) {
   size_t p = 0;
   int res = 0;
-- 
cgit v1.2.3


From e4c5e87db2139aa0f8655b063da7d8b5199cb46d Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@cs.cmu.edu>
Date: Tue, 20 Dec 2011 18:34:14 -0500
Subject: migrate fast_score to the new API

---
 mteval/Makefile.am    |  2 +-
 mteval/fast_score.cc  | 40 +++++++++++++++++++++++-----------------
 mteval/ns.cc          |  5 +++--
 mteval/ns_ter.cc      | 12 ++++++++++++
 mteval/ns_ter.h       |  1 +
 pro-train/dist-pro.pl |  2 +-
 vest/dist-vest.pl     |  2 +-
 7 files changed, 42 insertions(+), 22 deletions(-)

(limited to 'mteval/ns_ter.h')

diff --git a/mteval/Makefile.am b/mteval/Makefile.am
index 6679d949..e7126675 100644
--- a/mteval/Makefile.am
+++ b/mteval/Makefile.am
@@ -10,7 +10,7 @@ endif
 
 noinst_LIBRARIES = libmteval.a
 
-libmteval_a_SOURCES = ter.cc comb_scorer.cc aer_scorer.cc scorer.cc external_scorer.cc ns.cc ns_ter.cc ns_ext.cc ns_comb.cc
+libmteval_a_SOURCES = ter.cc comb_scorer.cc aer_scorer.cc scorer.cc external_scorer.cc ns.cc ns_ter.cc ns_ext.cc ns_comb.cc ns_docscorer.cc
 
 fast_score_SOURCES = fast_score.cc
 fast_score_LDADD = libmteval.a $(top_srcdir)/utils/libutils.a -lz
diff --git a/mteval/fast_score.cc b/mteval/fast_score.cc
index 5ee264a6..a271ccc5 100644
--- a/mteval/fast_score.cc
+++ b/mteval/fast_score.cc
@@ -4,9 +4,11 @@
 #include <boost/program_options.hpp>
 #include <boost/program_options/variables_map.hpp>
 
+#include "stringlib.h"
 #include "filelib.h"
 #include "tdict.h"
-#include "scorer.h"
+#include "ns.h"
+#include "ns_docscorer.h"
 
 using namespace std;
 namespace po = boost::program_options;
@@ -14,8 +16,8 @@ namespace po = boost::program_options;
 void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
   po::options_description opts("Configuration options");
   opts.add_options()
-        ("reference,r",po::value<vector<string> >(), "[REQD] Reference translation(s) (tokenized text file)")
-        ("loss_function,l",po::value<string>()->default_value("ibm_bleu"), "Scoring metric (ibm_bleu, nist_bleu, koehn_bleu, ter, combi)")
+        ("reference,r",po::value<vector<string> >(), "[1 or more required] Reference translation(s) in tokenized text files")
+        ("evaluation_metric,m",po::value<string>()->default_value("IBM_BLEU"), "Evaluation metric (ibm_bleu, koehn_bleu, nist_bleu, ter, meteor, etc.)")
         ("in_file,i", po::value<string>()->default_value("-"), "Input file")
         ("help,h", "Help");
   po::options_description dcmdline_options;
@@ -35,24 +37,29 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
 int main(int argc, char** argv) {
   po::variables_map conf;
   InitCommandLine(argc, argv, &conf);
-  const string loss_function = conf["loss_function"].as<string>();
-  ScoreType type = ScoreTypeFromString(loss_function);
-  DocScorer ds(type, conf["reference"].as<vector<string> >(), "");
+  string loss_function = UppercaseString(conf["evaluation_metric"].as<string>());
+  if (loss_function == "COMBI") {
+    cerr << "WARNING: 'combi' metric is no longer supported, switching to 'COMB:TER=-0.5;IBM_BLEU=0.5'\n";
+    loss_function = "COMB:TER=-0.5;IBM_BLEU=0.5";
+  } else if (loss_function == "BLEU") {
+    cerr << "WARNING: 'BLEU' is ambiguous, assuming 'IBM_BLEU'\n";
+    loss_function = "IBM_BLEU";
+  }
+  EvaluationMetric* metric = EvaluationMetric::Instance(loss_function);
+  DocumentScorer ds(metric, conf["reference"].as<vector<string> >());
   cerr << "Loaded " << ds.size() << " references for scoring with " << loss_function << endl;
 
   ReadFile rf(conf["in_file"].as<string>());
-  ScoreP acc;
+  SufficientStats acc;
   istream& in = *rf.stream();
   int lc = 0;
-  while(in) {
-    string line;
-    getline(in, line);
-    if (line.empty() && !in) break;
+  string line;
+  while(getline(in, line)) {
     vector<WordID> sent;
     TD::ConvertSentence(line, &sent);
-    ScoreP sentscore = ds[lc]->ScoreCandidate(sent);
-    if (!acc) { acc = sentscore->GetZero(); }
-    acc->PlusEquals(*sentscore);
+    SufficientStats t;
+    ds[lc]->Evaluate(sent, &t);
+    acc += t;
     ++lc;
   }
   assert(lc > 0);
@@ -63,9 +70,8 @@ int main(int argc, char** argv) {
   if (lc != ds.size())
     cerr << "Fewer sentences in hyp (" << lc << ") than refs ("
          << ds.size() << "): scoring partial set!\n";
-  float score = acc->ComputeScore();
-  string details;
-  acc->ScoreDetails(&details);
+  float score = metric->ComputeScore(acc);
+  const string details = metric->DetailedScore(acc);
   cerr << details << endl;
   cout << score << endl;
   return 0;
diff --git a/mteval/ns.cc b/mteval/ns.cc
index 6139757d..1018319d 100644
--- a/mteval/ns.cc
+++ b/mteval/ns.cc
@@ -173,7 +173,7 @@ struct BleuSegmentEvaluator : public SegmentEvaluator {
 
 template <unsigned int N = 4u, BleuType BrevityType = IBM>
 struct BleuMetric : public EvaluationMetric {
-  BleuMetric() : EvaluationMetric("IBM_BLEU") {}
+  BleuMetric() : EvaluationMetric(BrevityType == IBM ? "IBM_BLEU" : (BrevityType == Koehn ? "KOEHN_BLEU" : "NIST_BLEU")) {}
   unsigned SufficientStatisticsVectorSize() const { return N*2 + 2; }
   shared_ptr<SegmentEvaluator> CreateSegmentEvaluator(const vector<vector<WordID> >& refs) const {
     return shared_ptr<SegmentEvaluator>(new BleuSegmentEvaluator<N,BrevityType>(refs, this));
@@ -208,7 +208,8 @@ struct BleuMetric : public EvaluationMetric {
     vector<float> precs(N);
     float bp;
     float bleu = ComputeBreakdown(stats, &bp, &precs);
-    sprintf(buf, "BLEU = %.2f, %.1f|%.1f|%.1f|%.1f (brev=%.3f)",
+    sprintf(buf, "%s = %.2f, %.1f|%.1f|%.1f|%.1f (brev=%.3f)",
+       MetricId().c_str(),
        bleu*100.0,
        precs[0]*100.0,
        precs[1]*100.0,
diff --git a/mteval/ns_ter.cc b/mteval/ns_ter.cc
index 8c969e58..f75acf1d 100644
--- a/mteval/ns_ter.cc
+++ b/mteval/ns_ter.cc
@@ -473,3 +473,15 @@ float TERMetric::ComputeScore(const SufficientStats& stats) const {
   return edits / static_cast<float>(stats[kREF_WORDCOUNT]);
 }
 
+string TERMetric::DetailedScore(const SufficientStats& stats) const {
+  char buf[200];
+  sprintf(buf, "TER = %.2f, %3.f|%3.f|%3.f|%3.f (len=%3.f)",
+     ComputeScore(stats) * 100.0f,
+     stats[kINSERTIONS],
+     stats[kDELETIONS],
+     stats[kSUBSTITUTIONS],
+     stats[kSHIFTS],
+     stats[kREF_WORDCOUNT]);
+  return buf;
+}
+
diff --git a/mteval/ns_ter.h b/mteval/ns_ter.h
index 6c020cfa..3190fc1b 100644
--- a/mteval/ns_ter.h
+++ b/mteval/ns_ter.h
@@ -10,6 +10,7 @@ class TERMetric : public EvaluationMetric {
 
  public:
   virtual unsigned SufficientStatisticsVectorSize() const;
+  virtual std::string DetailedScore(const SufficientStats& stats) const;
   virtual void ComputeSufficientStatistics(const std::vector<WordID>& hyp,
                                            const std::vector<std::vector<WordID> >& refs,
                                            SufficientStats* out) const;
diff --git a/pro-train/dist-pro.pl b/pro-train/dist-pro.pl
index 5db053de..ba9cdc06 100755
--- a/pro-train/dist-pro.pl
+++ b/pro-train/dist-pro.pl
@@ -288,7 +288,7 @@ while (1){
 	    $retries++;
 	}
 	die "Dev set contains $devSize sentences, but we don't have topbest and hypergraphs for all these! Decoder failure? Check $decoderLog\n" if ($devSize != $num_hgs || $devSize != $num_topbest);
-	my $dec_score = check_output("cat $runFile | $SCORER $refs_comma_sep -l $metric");
+	my $dec_score = check_output("cat $runFile | $SCORER $refs_comma_sep -m $metric");
 	chomp $dec_score;
 	print STDERR "DECODER SCORE: $dec_score\n";
 
diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl
index 11e791c1..c382a972 100755
--- a/vest/dist-vest.pl
+++ b/vest/dist-vest.pl
@@ -308,7 +308,7 @@ while (1){
 	    $retries++;
 	}
 	die "Dev set contains $devSize sentences, but we don't have topbest and hypergraphs for all these! Decoder failure? Check $decoderLog\n" if ($devSize != $num_hgs || $devSize != $num_topbest);
-	my $dec_score = check_output("cat $runFile | $SCORER $refs_comma_sep -l $metric");
+	my $dec_score = check_output("cat $runFile | $SCORER $refs_comma_sep -m $metric");
 	chomp $dec_score;
 	print STDERR "DECODER SCORE: $dec_score\n";
 
-- 
cgit v1.2.3


From dbf367e0fc9d3faf906340d1f51f2dbda1892081 Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@cs.cmu.edu>
Date: Fri, 3 Feb 2012 17:19:16 -0500
Subject: make pro use new interface

---
 .gitignore              | 77 ++++++++++++++++++++++++++++++++++++++++---------
 mteval/ns.cc            |  4 +++
 mteval/ns.h             |  4 +++
 mteval/ns_ter.h         |  1 +
 pro-train/dist-pro.pl   |  4 +--
 pro-train/mr_pro_map.cc | 37 +++++++++++++++---------
 6 files changed, 98 insertions(+), 29 deletions(-)

(limited to 'mteval/ns_ter.h')

diff --git a/.gitignore b/.gitignore
index 5efe37b0..ab8bf2c7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,46 @@
+mira/kbest_mira
+sa-extract/calignment.c
+sa-extract/calignment.so
+sa-extract/cdat.c
+sa-extract/cdat.so
+sa-extract/cfloatlist.c
+sa-extract/cfloatlist.so
+sa-extract/cintlist.c
+sa-extract/cintlist.so
+sa-extract/clex.c
+sa-extract/clex.so
+sa-extract/cn.pyc
+sa-extract/context_model.pyc
+sa-extract/cstrmap.c
+sa-extract/cstrmap.so
+sa-extract/csuf.c
+sa-extract/csuf.so
+sa-extract/cveb.c
+sa-extract/cveb.so
+sa-extract/lcp.c
+sa-extract/lcp.so
+sa-extract/log.pyc
+sa-extract/manager.pyc
+sa-extract/model.pyc
+sa-extract/monitor.pyc
+sa-extract/precomputation.c
+sa-extract/precomputation.so
+sa-extract/rule.c
+sa-extract/rule.so
+sa-extract/rulefactory.c
+sa-extract/rulefactory.so
+sa-extract/sgml.pyc
+sa-extract/sym.c
+sa-extract/sym.so
+training/mpi_flex_optimize
+training/test_ngram
+utils/dict_test
+utils/logval_test
+utils/mfcr_test
+utils/phmt
+utils/small_vector_test
+utils/ts
+utils/weights_test
 pro-train/.deps
 pro-train/mr_pro_map
 pro-train/mr_pro_reduce
@@ -38,8 +81,8 @@ utils/.deps/
 utils/libutils.a
 *swp
 *.o
-vest/sentserver
-vest/sentclient
+dpmert/sentserver
+dpmert/sentclient
 gi/pyp-topics/src/contexts_lexer.cc
 config.guess
 config.sub
@@ -61,12 +104,12 @@ training/mr_em_map_adapter
 training/mr_reduce_to_weights
 training/optimize_test
 training/plftools
-vest/fast_score
-vest/lo_test
-vest/mr_vest_map
-vest/mr_vest_reduce
-vest/scorer_test
-vest/union_forests
+dpmert/fast_score
+dpmert/lo_test
+dpmert/mr_dpmert_map
+dpmert/mr_dpmert_reduce
+dpmert/scorer_test
+dpmert/union_forests
 Makefile
 Makefile.in
 aclocal.m4
@@ -99,11 +142,11 @@ training/Makefile.in
 training/*.o
 training/grammar_convert
 training/model1
-vest/.deps/
-vest/Makefile
-vest/Makefile.in
-vest/mr_vest_generate_mapper_input
-vest/*.o
+dpmert/.deps/
+dpmert/Makefile
+dpmert/Makefile.in
+dpmert/mr_dpmert_generate_mapper_input
+dpmert/*.o
 decoder/logval_test
 extools/build_lexical_translation
 extools/filter_grammar
@@ -124,7 +167,6 @@ m4/ltoptions.m4
 m4/ltsugar.m4
 m4/ltversion.m4
 m4/lt~obsolete.m4
-vest/mbr_kbest
 extools/featurize_grammar
 extools/filter_score_grammar
 gi/posterior-regularisation/prjava/build/
@@ -143,3 +185,10 @@ gi/posterior-regularisation/prjava/lib/prjava-20100715.jar
 *.ps
 *.toc
 *~
+gi/pf/align-lexonly
+gi/pf/align-lexonly-pyp
+gi/pf/condnaive
+mteval/scorer_test
+phrasinator/gibbs_train_plm
+phrasinator/gibbs_train_plm_notables
+.*
diff --git a/mteval/ns.cc b/mteval/ns.cc
index da678b84..788f809a 100644
--- a/mteval/ns.cc
+++ b/mteval/ns.cc
@@ -21,6 +21,10 @@ map<string, EvaluationMetric*> EvaluationMetric::instances_;
 SegmentEvaluator::~SegmentEvaluator() {}
 EvaluationMetric::~EvaluationMetric() {}
 
+bool EvaluationMetric::IsErrorMetric() const {
+  return false;
+}
+
 struct DefaultSegmentEvaluator : public SegmentEvaluator {
   DefaultSegmentEvaluator(const vector<vector<WordID> >& refs, const EvaluationMetric* em) : refs_(refs), em_(em) {}
   void Evaluate(const vector<WordID>& hyp, SufficientStats* out) const {
diff --git a/mteval/ns.h b/mteval/ns.h
index d88c263b..4e4c6975 100644
--- a/mteval/ns.h
+++ b/mteval/ns.h
@@ -94,6 +94,10 @@ class EvaluationMetric {
  public:
   const std::string& MetricId() const { return name_; }
 
+  // returns true for metrics like WER and TER where lower scores are better
+  // false for metrics like BLEU and METEOR where higher scores are better
+  virtual bool IsErrorMetric() const;
+
   virtual unsigned SufficientStatisticsVectorSize() const;
   virtual float ComputeScore(const SufficientStats& stats) const = 0;
   virtual std::string DetailedScore(const SufficientStats& stats) const;
diff --git a/mteval/ns_ter.h b/mteval/ns_ter.h
index 3190fc1b..c5c25413 100644
--- a/mteval/ns_ter.h
+++ b/mteval/ns_ter.h
@@ -9,6 +9,7 @@ class TERMetric : public EvaluationMetric {
   TERMetric() : EvaluationMetric("TER") {}
 
  public:
+  virtual bool IsErrorMetric() const;
   virtual unsigned SufficientStatisticsVectorSize() const;
   virtual std::string DetailedScore(const SufficientStats& stats) const;
   virtual void ComputeSufficientStatistics(const std::vector<WordID>& hyp,
diff --git a/pro-train/dist-pro.pl b/pro-train/dist-pro.pl
index ba9cdc06..31258fa6 100755
--- a/pro-train/dist-pro.pl
+++ b/pro-train/dist-pro.pl
@@ -12,7 +12,7 @@ use POSIX ":sys_wait_h";
 my $QSUB_CMD = qsub_args(mert_memory());
 my $default_jobs = env_default_jobs();
 
-my $VEST_DIR="$SCRIPT_DIR/../vest";
+my $VEST_DIR="$SCRIPT_DIR/../dpmert";
 require "$VEST_DIR/libcall.pl";
 
 # Default settings
@@ -338,7 +338,7 @@ while (1){
 		$mapoutput =~ s/mapinput/mapoutput/;
 		push @mapoutputs, "$dir/splag.$im1/$mapoutput";
 		$o2i{"$dir/splag.$im1/$mapoutput"} = "$dir/splag.$im1/$shard";
-		my $script = "$MAPPER -s $srcFile -l $metric $refs_comma_sep -w $inweights -K $dir/kbest < $dir/splag.$im1/$shard > $dir/splag.$im1/$mapoutput";
+		my $script = "$MAPPER -s $srcFile -m $metric $refs_comma_sep -w $inweights -K $dir/kbest < $dir/splag.$im1/$shard > $dir/splag.$im1/$mapoutput";
 		if ($use_make) {
 			my $script_file = "$dir/scripts/map.$shard";
 			open F, ">$script_file" or die "Can't write $script_file: $!";
diff --git a/pro-train/mr_pro_map.cc b/pro-train/mr_pro_map.cc
index 0a9b75d7..52b67f32 100644
--- a/pro-train/mr_pro_map.cc
+++ b/pro-train/mr_pro_map.cc
@@ -13,11 +13,12 @@
 #include "filelib.h"
 #include "stringlib.h"
 #include "weights.h"
-#include "scorer.h"
 #include "inside_outside.h"
 #include "hg_io.h"
 #include "kbest.h"
 #include "viterbi.h"
+#include "ns.h"
+#include "ns_docscorer.h"
 
 // This is Figure 4 (Algorithm Sampler) from Hopkins&May (2011)
 
@@ -80,7 +81,7 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
         ("kbest_repository,K",po::value<string>()->default_value("./kbest"),"K-best list repository (directory)")
         ("input,i",po::value<string>()->default_value("-"), "Input file to map (- is STDIN)")
         ("source,s",po::value<string>()->default_value(""), "Source file (ignored, except for AER)")
-        ("loss_function,l",po::value<string>()->default_value("ibm_bleu"), "Loss function being optimized")
+        ("evaluation_metric,m",po::value<string>()->default_value("IBM_BLEU"), "Evaluation metric (ibm_bleu, koehn_bleu, nist_bleu, ter, meteor, etc.)")
         ("kbest_size,k",po::value<unsigned>()->default_value(1500u), "Top k-hypotheses to extract")
         ("candidate_pairs,G", po::value<unsigned>()->default_value(5000u), "Number of pairs to sample per hypothesis (Gamma)")
         ("best_pairs,X", po::value<unsigned>()->default_value(50u), "Number of pairs, ranked by magnitude of objective delta, to retain (Xi)")
@@ -109,9 +110,12 @@ struct HypInfo {
   HypInfo(const vector<WordID>& h, const SparseVector<weight_t>& feats) : hyp(h), g_(-100.0f), x(feats) {}
 
   // lazy evaluation
-  double g(const SentenceScorer& scorer) const {
-    if (g_ == -100.0f)
-      g_ = scorer.ScoreCandidate(hyp)->ComputeScore();
+  double g(const SegmentEvaluator& scorer, const EvaluationMetric* metric) const {
+    if (g_ == -100.0f) {
+      SufficientStats ss;
+      scorer.Evaluate(hyp, &ss);
+      g_ = metric->ComputeScore(ss);
+    }
     return g_;
   }
   vector<WordID> hyp;
@@ -233,15 +237,21 @@ struct DiffOrder {
   }
 };
 
-void Sample(const unsigned gamma, const unsigned xi, const vector<HypInfo>& J_i, const SentenceScorer& scorer, const bool invert_score, vector<TrainingInstance>* pv) {
+void Sample(const unsigned gamma,
+            const unsigned xi,
+            const vector<HypInfo>& J_i,
+            const SegmentEvaluator& scorer,
+            const EvaluationMetric* metric,
+            vector<TrainingInstance>* pv) {
+  const bool invert_score = metric->IsErrorMetric();
   vector<TrainingInstance> v1, v2;
   float avg_diff = 0;
   for (unsigned i = 0; i < gamma; ++i) {
     const size_t a = rng->inclusive(0, J_i.size() - 1)();
     const size_t b = rng->inclusive(0, J_i.size() - 1)();
     if (a == b) continue;
-    float ga = J_i[a].g(scorer);
-    float gb = J_i[b].g(scorer);
+    float ga = J_i[a].g(scorer, metric);
+    float gb = J_i[b].g(scorer, metric);
     bool positive = gb < ga;
     if (invert_score) positive = !positive;
     const float gdiff = fabs(ga - gb);
@@ -288,11 +298,12 @@ int main(int argc, char** argv) {
     rng.reset(new MT19937(conf["random_seed"].as<uint32_t>()));
   else
     rng.reset(new MT19937);
-  const string loss_function = conf["loss_function"].as<string>();
+  const string evaluation_metric = conf["evaluation_metric"].as<string>();
+
+  EvaluationMetric* metric = EvaluationMetric::Instance(evaluation_metric);
+  DocumentScorer ds(metric, conf["reference"].as<vector<string> >());
+  cerr << "Loaded " << ds.size() << " references for scoring with " << evaluation_metric << endl;
 
-  ScoreType type = ScoreTypeFromString(loss_function);
-  DocScorer ds(type, conf["reference"].as<vector<string> >(), conf["source"].as<string>());
-  cerr << "Loaded " << ds.size() << " references for scoring with " << loss_function << endl;
   Hypergraph hg;
   string last_file;
   ReadFile in_read(conf["input"].as<string>());
@@ -335,7 +346,7 @@ int main(int argc, char** argv) {
     Dedup(&J_i);
     WriteKBest(kbest_file, J_i);
 
-    Sample(gamma, xi, J_i, *ds[sent_id], (type == TER), &v);
+    Sample(gamma, xi, J_i, *ds[sent_id], metric, &v);
     for (unsigned i = 0; i < v.size(); ++i) {
       const TrainingInstance& vi = v[i];
       cout << vi.y << "\t" << vi.x << endl;
-- 
cgit v1.2.3