From 0da1f6de1b33bbff5cb99b1938bb07d050479f10 Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@cs.cmu.edu>
Date: Wed, 14 Dec 2011 21:02:50 -0800
Subject: random incomplete metric stuff, including string subsequence kernel
 impl

---
 mteval/ns.cc          | 241 ++++++++++++++++++++++
 mteval/ns.h           | 106 ++++++++++
 mteval/ns_ter.cc      | 551 ++++++++++++++++++++++++++++++++++++++++++++++++++
 mteval/ns_ter.h       |  18 ++
 mteval/scorer_test.cc |  46 +++++
 5 files changed, 962 insertions(+)
 create mode 100644 mteval/ns.cc
 create mode 100644 mteval/ns.h
 create mode 100644 mteval/ns_ter.cc
 create mode 100644 mteval/ns_ter.h

(limited to 'mteval')
diff --git a/mteval/ns.cc b/mteval/ns.cc
new file mode 100644
index 00000000..1045a51f
--- /dev/null
+++ b/mteval/ns.cc
@@ -0,0 +1,241 @@
+#include "ns.h"
+#include "ns_ter.h"
+
+#include <cassert>
+#include <cmath>
+#include <cstdlib>
+#include <iostream>
+#include <sstream>
+
+using namespace std;
+using boost::shared_ptr;
+
+map<string, EvaluationMetric*> EvaluationMetric::instances_;
+
+SegmentEvaluator::~SegmentEvaluator() {}
+EvaluationMetric::~EvaluationMetric() {}
+
+struct DefaultSegmentEvaluator : public SegmentEvaluator {
+  DefaultSegmentEvaluator(const vector<vector<WordID> >& refs, const EvaluationMetric* em) : refs_(refs), em_(em) {}
+  void Evaluate(const vector<WordID>& hyp, SufficientStats* out) const {
+    em_->ComputeSufficientStatistics(hyp, refs_, out);
+  }
+  const vector<vector<WordID> > refs_;
+  const EvaluationMetric* em_;
+};
+
+shared_ptr<SegmentEvaluator> EvaluationMetric::CreateSegmentEvaluator(const vector<vector<WordID> >& refs) const {
+  return shared_ptr<SegmentEvaluator>(new DefaultSegmentEvaluator(refs, this));
+}
+
+void EvaluationMetric::ComputeSufficientStatistics(const vector<WordID>&,
+                                                   const vector<vector<WordID> >&,
+                                                   SufficientStats*) const {
+  cerr << "Base class ComputeSufficientStatistics should not be called.\n";
+  abort();
+}
+
+enum BleuType { IBM, Koehn, NIST };
+template <unsigned int N = 4u, BleuType BrevityType = IBM>
+struct BleuSegmentEvaluator : public SegmentEvaluator {
+  BleuSegmentEvaluator(const vector<vector<WordID> >& refs, const EvaluationMetric* em) : evaluation_metric(em) {
+    assert(refs.size() > 0);
+    float tot = 0;
+    int smallest = 9999999;
+    for (vector<vector<WordID> >::const_iterator ci = refs.begin();
+         ci != refs.end(); ++ci) {
+      lengths_.push_back(ci->size());
+      tot += lengths_.back();
+      if (lengths_.back() < smallest) smallest = lengths_.back();
+      CountRef(*ci);
+    }
+    if (BrevityType == Koehn)
+      lengths_[0] = tot / refs.size();
+    if (BrevityType == NIST)
+      lengths_[0] = smallest;
+  }
+
+  void Evaluate(const vector<WordID>& hyp, SufficientStats* out) const {
+    out->fields.resize(N + N + 2);
+    out->evaluation_metric = evaluation_metric;
+    for (unsigned i = 0; i < N+N+2; ++i) out->fields[i] = 0;
+
+    ComputeNgramStats(hyp, &out->fields[0], &out->fields[N], true);
+    float& hyp_len = out->fields[2*N];
+    float& ref_len = out->fields[2*N + 1];
+    hyp_len = hyp.size();
+    ref_len = lengths_[0];
+    if (lengths_.size() > 1 && BrevityType == IBM) {
+      float bestd = 2000000;
+      float hl = hyp.size();
+      float bl = -1;
+      for (vector<float>::const_iterator ci = lengths_.begin(); ci != lengths_.end(); ++ci) {
+        if (fabs(*ci - hl) < bestd) {
+          bestd = fabs(*ci - hl);
+          bl = *ci;
+        }
+      }
+      ref_len = bl;
+    }
+  }
+
+  struct NGramCompare {
+    int operator() (const vector<WordID>& a, const vector<WordID>& b) {
+      const size_t as = a.size();
+      const size_t bs = b.size();
+      const size_t s = (as < bs ? as : bs);
+      for (size_t i = 0; i < s; ++i) {
+         int d = a[i] - b[i];
+         if (d < 0) return true;
+         if (d > 0) return false;
+      }
+      return as < bs;
+    }
+  };
+  typedef map<vector<WordID>, pair<int,int>, NGramCompare> NGramCountMap;
+
+  void CountRef(const vector<WordID>& ref) {
+    NGramCountMap tc;
+    vector<WordID> ngram(N);
+    int s = ref.size();
+    for (int j=0; j<s; ++j) {
+      int remaining = s-j;
+      int k = (N < remaining ? N : remaining);
+      ngram.clear();
+      for (int i=1; i<=k; ++i) {
+        ngram.push_back(ref[j + i - 1]);
+        tc[ngram].first++;
+      }
+    }
+    for (typename NGramCountMap::iterator i = tc.begin(); i != tc.end(); ++i) {
+      pair<int,int>& p = ngrams_[i->first];
+      if (p.first < i->second.first)
+        p = i->second;
+    }
+  }
+
+  void ComputeNgramStats(const vector<WordID>& sent,
+                         float* correct,  // N elements reserved
+                         float* hyp,      // N elements reserved
+                         bool clip_counts = true) const {
+    vector<WordID> ngram(N);
+    *correct *= 0;
+    *hyp *= 0;
+    int s = sent.size();
+    for (int j=0; j<s; ++j) {
+      int remaining = s-j;
+      int k = (N < remaining ? N : remaining);
+      ngram.clear();
+      for (int i=1; i<=k; ++i) {
+        ngram.push_back(sent[j + i - 1]);
+        pair<int,int>& p = ngrams_[ngram];
+        if(clip_counts){
+          if (p.second < p.first) {
+            ++p.second;
+            correct[i-1]++;
+          }
+        } else {
+          ++p.second;
+          correct[i-1]++;
+        }
+        // if the 1 gram isn't found, don't try to match don't need to match any 2- 3- .. grams:
+        if (!p.first) {
+          for (; i<=k; ++i)
+            hyp[i-1]++;
+        } else {
+          hyp[i-1]++;
+        }
+      }
+    }
+  }
+
+  const EvaluationMetric* evaluation_metric;
+  vector<float> lengths_;
+  mutable NGramCountMap ngrams_;
+};
+
+template <unsigned int N = 4u, BleuType BrevityType = IBM>
+struct BleuMetric : public EvaluationMetric {
+  BleuMetric() : EvaluationMetric("IBM_BLEU") {}
+  float ComputeScore(const SufficientStats& stats) const {
+    float log_bleu = 0;
+    int count = 0;
+    for (int i = 0; i < N; ++i) {
+      if (stats.fields[i+N] > 0) {
+        float cor_count = stats.fields[i];  // correct_ngram_hit_counts[i];
+        // smooth bleu
+        if (!cor_count) { cor_count = 0.01; }
+        float lprec = log(cor_count) - log(stats.fields[i+N]); // log(hyp_ngram_counts[i]);
+        // if (precs) precs->push_back(exp(lprec));
+        log_bleu += lprec;
+        ++count;
+      }
+    }
+    log_bleu /= count;
+    float lbp = 0.0;
+    const float& hyp_len = stats.fields[2*N];
+    const float& ref_len = stats.fields[2*N + 1];
+    if (hyp_len < ref_len)
+      lbp = (hyp_len - ref_len) / hyp_len;
+    log_bleu += lbp;
+    //if (bp) *bp = exp(lbp);
+    return exp(log_bleu);
+  }
+  shared_ptr<SegmentEvaluator> CreateSegmentEvaluator(const vector<vector<WordID> >& refs) const {
+    return shared_ptr<SegmentEvaluator>(new BleuSegmentEvaluator<N,BrevityType>(refs, this));
+  }
+};
+
+EvaluationMetric* EvaluationMetric::Instance(const string& metric_id) {
+  static bool is_first = true;
+  if (is_first) {
+    instances_["NULL"] = NULL;
+    is_first = false;
+  }
+
+  map<string, EvaluationMetric*>::iterator it = instances_.find(metric_id);
+  if (it == instances_.end()) {
+    EvaluationMetric* m = NULL; 
+    if (metric_id == "IBM_BLEU") {
+      m = new BleuMetric<4, IBM>;
+    } else if (metric_id == "NIST_BLEU") {
+      m = new BleuMetric<4, NIST>;
+    } else if (metric_id == "Koehn_BLEU") {
+      m = new BleuMetric<4, Koehn>;
+    } else if (metric_id == "TER") {
+      m = new TERMetric;
+    } else {
+      cerr << "Implement please: " << metric_id << endl;
+      abort();
+    }
+    if (m->MetricId() != metric_id) {
+      cerr << "Registry error: " << metric_id << " vs. " << m->MetricId() << endl;
+      abort();
+    }
+    return instances_[metric_id] = m;
+  } else {
+    return it->second;
+  }
+}
+
+SufficientStats::SufficientStats(const string& encoded) {
+  istringstream is(encoded);
+  string type;
+  is >> type;
+  evaluation_metric = EvaluationMetric::Instance(type);
+  float val;
+  while(is >> val)
+    fields.push_back(val);
+}
+
+void SufficientStats::Encode(string* out) const {
+  ostringstream os;
+  if (evaluation_metric)
+    os << evaluation_metric->MetricId();
+  else
+    os << "NULL";
+  for (unsigned i = 0; i < fields.size(); ++i)
+    os << ' ' << fields[i];
+  *out = os.str();
+}
+
diff --git a/mteval/ns.h b/mteval/ns.h
new file mode 100644
index 00000000..f19b7509
--- /dev/null
+++ b/mteval/ns.h
@@ -0,0 +1,106 @@
+#ifndef _NS_H_
+#define _NS_H_
+
+#include <string>
+#include <vector>
+#include <map>
+#include <boost/shared_ptr.hpp>
+#include "wordid.h"
+
+class EvaluationMetric;
+
+class SufficientStats {
+ public:
+  SufficientStats() : evaluation_metric() {}
+  explicit SufficientStats(const std::string& encoded);
+  explicit SufficientStats(const EvaluationMetric* s) : evaluation_metric(s) {}
+  SufficientStats(const EvaluationMetric* s, const std::vector<float>& f) :
+    evaluation_metric(s), fields(f) {}
+
+  SufficientStats& operator+=(const SufficientStats& delta) {
+    if (delta.evaluation_metric) evaluation_metric = delta.evaluation_metric;
+    if (fields.size() != delta.fields.size())
+      fields.resize(std::max(fields.size(), delta.fields.size()));
+    for (unsigned i = 0; i < delta.fields.size(); ++i)
+      fields[i] += delta.fields[i];
+    return *this;
+  }
+  SufficientStats& operator-=(const SufficientStats& delta) {
+    if (delta.evaluation_metric) evaluation_metric = delta.evaluation_metric;
+    if (fields.size() != delta.fields.size())
+      fields.resize(std::max(fields.size(), delta.fields.size()));
+    for (unsigned i = 0; i < delta.fields.size(); ++i)
+      fields[i] -= delta.fields[i];
+    return *this;
+  }
+  SufficientStats& operator*=(const double& scalar) {
+    for (unsigned i = 0; i < fields.size(); ++i)
+      fields[i] *= scalar;
+    return *this;
+  }
+  SufficientStats& operator/=(const double& scalar) {
+    for (unsigned i = 0; i < fields.size(); ++i)
+      fields[i] /= scalar;
+    return *this;
+  }
+  bool operator==(const SufficientStats& other) const {
+    return other.fields == fields;
+  }
+  size_t size() const { return fields.size(); }
+  float operator[](size_t i) const {
+    if (i < fields.size()) return fields[i];
+    return 0;
+  }
+  void Encode(std::string* out) const;
+
+  const EvaluationMetric* evaluation_metric;
+  std::vector<float> fields;
+};
+
+inline const SufficientStats& operator+(const SufficientStats& a, const SufficientStats& b) {
+  SufficientStats res(a);
+  return res += b;
+}
+
+inline const SufficientStats& operator-(const SufficientStats& a, const SufficientStats& b) {
+  SufficientStats res(a);
+  return res -= b;
+}
+
+struct SegmentEvaluator {
+  virtual ~SegmentEvaluator();
+  virtual void Evaluate(const std::vector<WordID>& hyp, SufficientStats* out) const = 0;
+};
+
+// Instructions for implementing a new metric
+//   Override MetricId() and give the metric a unique string name (no spaces)
+//   To Instance(), add something that creates the metric
+//   Implement ONE of the following:
+//      1) void ComputeSufficientStatistics(const std::vector<std::vector<WordID> >& refs, SufficientStats* out) const;
+//      2) a new SegmentEvaluator class AND CreateSegmentEvaluator(const std::vector<std::vector<WordID> >& refs) const;
+//   The later (#2) is only used when it is necessary to precompute per-segment data from a set of refs
+//   Implement ComputeScore(const SufficientStats& stats) const;
+class EvaluationMetric {
+ public:
+  static EvaluationMetric* Instance(const std::string& metric_id = "IBM_BLEU");
+
+ protected:
+  EvaluationMetric(const std::string& id) : name_(id) {}
+  virtual ~EvaluationMetric();
+
+ public:
+  const std::string& MetricId() const { return name_; }
+
+  virtual float ComputeScore(const SufficientStats& stats) const = 0;
+  virtual boost::shared_ptr<SegmentEvaluator> CreateSegmentEvaluator(const std::vector<std::vector<WordID> >& refs) const;
+  virtual void ComputeSufficientStatistics(const std::vector<WordID>& hyp,
+                                           const std::vector<std::vector<WordID> >& refs,
+                                           SufficientStats* out) const;
+
+ private:
+  static std::map<std::string, EvaluationMetric*> instances_;
+  const std::string name_;
+};
+
+#endif
+
diff --git a/mteval/ns_ter.cc b/mteval/ns_ter.cc
new file mode 100644
index 00000000..14dc6e49
--- /dev/null
+++ b/mteval/ns_ter.cc
@@ -0,0 +1,551 @@
+#include "ns_ter.h"
+
+#include <cstdio>
+#include <cassert>
+#include <iostream>
+#include <limits>
+#include <sstream>
+#include <tr1/unordered_map>
+#include <set>
+#include <valarray>
+#include <boost/functional/hash.hpp>
+#include <stdexcept>
+#include "tdict.h"
+
+static const bool ter_use_average_ref_len = true;
+static const int ter_short_circuit_long_sentences = -1;
+
+static const unsigned kINSERTIONS = 0;
+static const unsigned kDELETIONS = 1;
+static const unsigned kSUBSTITUTIONS = 2;
+static const unsigned kSHIFTS = 3;
+static const unsigned kREF_WORDCOUNT = 4;
+static const unsigned kDUMMY_LAST_ENTRY = 5;
+
+using namespace std;
+using namespace std::tr1;
+
+#if 0
+
+struct COSTS {
+  static const float substitution;
+  static const float deletion;
+  static const float insertion;
+  static const float shift;
+};
+const float COSTS::substitution = 1.0f;
+const float COSTS::deletion = 1.0f;
+const float COSTS::insertion = 1.0f;
+const float COSTS::shift = 1.0f;
+
+static const int MAX_SHIFT_SIZE = 10;
+static const int MAX_SHIFT_DIST = 50;
+
+struct Shift {
+  unsigned int d_;
+  Shift() : d_() {}
+  Shift(int b, int e, int m) : d_() {
+    begin(b);
+    end(e);
+    moveto(m);
+  }
+  inline int begin() const {
+    return d_ & 0x3ff;
+  }
+  inline int end() const {
+    return (d_ >> 10) & 0x3ff;
+  }
+  inline int moveto() const {
+    int m = (d_ >> 20) & 0x7ff;
+    if (m > 1024) { m -= 1024; m *= -1; }
+    return m;
+  }
+  inline void begin(int b) {
+    d_ &= 0xfffffc00u;
+    d_ |= (b & 0x3ff);
+  }
+  inline void end(int e) {
+    d_ &= 0xfff003ffu;
+    d_ |= (e & 0x3ff) << 10;
+  }
+  inline void moveto(int m) {
+    bool neg = (m < 0);
+    if (neg) { m *= -1; m += 1024; }
+    d_ &= 0xfffff;
+    d_ |= (m & 0x7ff) << 20;
+  }
+};
+
+class TERScorerImpl {
+
+ public:
+  enum TransType { MATCH, SUBSTITUTION, INSERTION, DELETION };
+
+  explicit TERScorerImpl(const vector<WordID>& ref) : ref_(ref) {
+    for (int i = 0; i < ref.size(); ++i)
+      rwexists_.insert(ref[i]);
+  }
+
+  float Calculate(const vector<WordID>& hyp, int* subs, int* ins, int* dels, int* shifts) const {
+    return CalculateAllShifts(hyp, subs, ins, dels, shifts);
+  }
+
+  inline int GetRefLength() const {
+    return ref_.size();
+  }
+
+ private:
+  vector<WordID> ref_;
+  set<WordID> rwexists_;
+
+  typedef unordered_map<vector<WordID>, set<int>, boost::hash<vector<WordID> > > NgramToIntsMap;
+  mutable NgramToIntsMap nmap_;
+
+  static float MinimumEditDistance(
+      const vector<WordID>& hyp,
+      const vector<WordID>& ref,
+      vector<TransType>* path) {
+    vector<vector<TransType> > bmat(hyp.size() + 1, vector<TransType>(ref.size() + 1, MATCH));
+    vector<vector<float> > cmat(hyp.size() + 1, vector<float>(ref.size() + 1, 0));
+    for (int i = 0; i <= hyp.size(); ++i)
+      cmat[i][0] = i;
+    for (int j = 0; j <= ref.size(); ++j)
+      cmat[0][j] = j;
+    for (int i = 1; i <= hyp.size(); ++i) {
+      const WordID& hw = hyp[i-1];
+      for (int j = 1; j <= ref.size(); ++j) {
+        const WordID& rw = ref[j-1];
+	float& cur_c = cmat[i][j];
+	TransType& cur_b = bmat[i][j];
+
+        if (rw == hw) {
+          cur_c = cmat[i-1][j-1];
+          cur_b = MATCH;
+        } else {
+          cur_c = cmat[i-1][j-1] + COSTS::substitution;
+          cur_b = SUBSTITUTION;
+        }
+	float cwoi = cmat[i-1][j];
+        if (cur_c > cwoi + COSTS::insertion) {
+          cur_c = cwoi + COSTS::insertion;
+          cur_b = INSERTION;
+        }
+        float cwod = cmat[i][j-1];
+        if (cur_c > cwod + COSTS::deletion) {
+          cur_c = cwod + COSTS::deletion;
+          cur_b = DELETION;
+        }
+      }
+    }
+
+    // trace back along the best path and record the transition types
+    path->clear();
+    int i = hyp.size();
+    int j = ref.size();
+    while (i > 0 || j > 0) {
+      if (j == 0) {
+        --i;
+        path->push_back(INSERTION);
+      } else if (i == 0) {
+        --j;
+        path->push_back(DELETION);
+      } else {
+        TransType t = bmat[i][j];
+        path->push_back(t);
+        switch (t) {
+          case SUBSTITUTION:
+          case MATCH:
+            --i; --j; break;
+          case INSERTION:
+            --i; break;
+          case DELETION:
+            --j; break;
+        }
+      }
+    }
+    reverse(path->begin(), path->end());
+    return cmat[hyp.size()][ref.size()];
+  }
+
+  void BuildWordMatches(const vector<WordID>& hyp, NgramToIntsMap* nmap) const {
+    nmap->clear();
+    set<WordID> exists_both;
+    for (int i = 0; i < hyp.size(); ++i)
+      if (rwexists_.find(hyp[i]) != rwexists_.end())
+        exists_both.insert(hyp[i]);
+    for (int start=0; start<ref_.size(); ++start) {
+      if (exists_both.find(ref_[start]) == exists_both.end()) continue;
+      vector<WordID> cp;
+      int mlen = min(MAX_SHIFT_SIZE, static_cast<int>(ref_.size() - start));
+      for (int len=0; len<mlen; ++len) {
+        if (len && exists_both.find(ref_[start + len]) == exists_both.end()) break;
+        cp.push_back(ref_[start + len]);
+	(*nmap)[cp].insert(start);
+      }
+    }
+  }
+
+  static void PerformShift(const vector<WordID>& in,
+    int start, int end, int moveto, vector<WordID>* out) {
+    // cerr << "ps: " << start << " " << end << " " << moveto << endl;
+    out->clear();
+    if (moveto == -1) {
+      for (int i = start; i <= end; ++i)
+       out->push_back(in[i]);
+      for (int i = 0; i < start; ++i)
+       out->push_back(in[i]);
+      for (int i = end+1; i < in.size(); ++i)
+       out->push_back(in[i]);
+    } else if (moveto < start) {
+      for (int i = 0; i <= moveto; ++i)
+       out->push_back(in[i]);
+      for (int i = start; i <= end; ++i)
+       out->push_back(in[i]);
+      for (int i = moveto+1; i < start; ++i)
+       out->push_back(in[i]);
+      for (int i = end+1; i < in.size(); ++i)
+       out->push_back(in[i]);
+    } else if (moveto > end) {
+      for (int i = 0; i < start; ++i)
+       out->push_back(in[i]);
+      for (int i = end+1; i <= moveto; ++i)
+       out->push_back(in[i]);
+      for (int i = start; i <= end; ++i)
+       out->push_back(in[i]);
+      for (int i = moveto+1; i < in.size(); ++i)
+       out->push_back(in[i]);
+    } else {
+      for (int i = 0; i < start; ++i)
+       out->push_back(in[i]);
+      for (int i = end+1; (i < in.size()) && (i <= end + (moveto - start)); ++i)
+       out->push_back(in[i]);
+      for (int i = start; i <= end; ++i)
+       out->push_back(in[i]);
+      for (int i = (end + (moveto - start))+1; i < in.size(); ++i)
+       out->push_back(in[i]);
+    }
+    if (out->size() != in.size()) {
+      cerr << "ps: " << start << " " << end << " " << moveto << endl;
+      cerr << "in=" << TD::GetString(in) << endl;
+      cerr << "out=" << TD::GetString(*out) << endl;
+    }
+    assert(out->size() == in.size());
+    // cerr << "ps: " << TD::GetString(*out) << endl;
+  }
+
+  void GetAllPossibleShifts(const vector<WordID>& hyp,
+      const vector<int>& ralign,
+      const vector<bool>& herr,
+      const vector<bool>& rerr,
+      const int min_size,
+      vector<vector<Shift> >* shifts) const {
+    for (int start = 0; start < hyp.size(); ++start) {
+      vector<WordID> cp(1, hyp[start]);
+      NgramToIntsMap::iterator niter = nmap_.find(cp);
+      if (niter == nmap_.end()) continue;
+      bool ok = false;
+      int moveto;
+      for (set<int>::iterator i = niter->second.begin(); i != niter->second.end(); ++i) {
+        moveto = *i;
+        int rm = ralign[moveto];
+        ok = (start != rm &&
+              (rm - start) < MAX_SHIFT_DIST &&
+              (start - rm - 1) < MAX_SHIFT_DIST);
+        if (ok) break;
+      }
+      if (!ok) continue;
+      cp.clear();
+      for (int end = start + min_size - 1;
+           ok && end < hyp.size() && end < (start + MAX_SHIFT_SIZE); ++end) {
+        cp.push_back(hyp[end]);
+	vector<Shift>& sshifts = (*shifts)[end - start];
+        ok = false;
+        NgramToIntsMap::iterator niter = nmap_.find(cp);
+        if (niter == nmap_.end()) break;
+        bool any_herr = false;
+        for (int i = start; i <= end && !any_herr; ++i)
+          any_herr = herr[i];
+        if (!any_herr) {
+          ok = true;
+          continue;
+        }
+        for (set<int>::iterator mi = niter->second.begin();
+             mi != niter->second.end(); ++mi) {
+          int moveto = *mi;
+	  int rm = ralign[moveto];
+	  if (! ((rm != start) &&
+	        ((rm < start) || (rm > end)) &&
+		(rm - start <= MAX_SHIFT_DIST) &&
+		((start - rm - 1) <= MAX_SHIFT_DIST))) continue;
+          ok = true;
+	  bool any_rerr = false;
+	  for (int i = 0; (i <= end - start) && (!any_rerr); ++i)
+            any_rerr = rerr[moveto+i];
+	  if (!any_rerr) continue;
+	  for (int roff = 0; roff <= (end - start); ++roff) {
+	    int rmr = ralign[moveto+roff];
+	    if ((start != rmr) && ((roff == 0) || (rmr != ralign[moveto])))
+	      sshifts.push_back(Shift(start, end, moveto + roff));
+	  }
+        }
+      }
+    }
+  }
+
+  bool CalculateBestShift(const vector<WordID>& cur,
+                          const vector<WordID>& hyp,
+                          float curerr,
+                          const vector<TransType>& path,
+                          vector<WordID>* new_hyp,
+                          float* newerr,
+                          vector<TransType>* new_path) const {
+    vector<bool> herr, rerr;
+    vector<int> ralign;
+    int hpos = -1;
+    for (int i = 0; i < path.size(); ++i) {
+      switch (path[i]) {
+        case MATCH:
+	  ++hpos;
+	  herr.push_back(false);
+	  rerr.push_back(false);
+	  ralign.push_back(hpos);
+          break;
+        case SUBSTITUTION:
+	  ++hpos;
+	  herr.push_back(true);
+	  rerr.push_back(true);
+	  ralign.push_back(hpos);
+          break;
+        case INSERTION:
+	  ++hpos;
+	  herr.push_back(true);
+          break;
+	case DELETION:
+	  rerr.push_back(true);
+	  ralign.push_back(hpos);
+          break;
+      }
+    }
+#if 0
+    cerr << "RALIGN: ";
+    for (int i = 0; i < rerr.size(); ++i)
+      cerr << ralign[i] << " ";
+    cerr << endl;
+    cerr << "RERR: ";
+    for (int i = 0; i < rerr.size(); ++i)
+      cerr << (bool)rerr[i] << " ";
+    cerr << endl;
+    cerr << "HERR: ";
+    for (int i = 0; i < herr.size(); ++i)
+      cerr << (bool)herr[i] << " ";
+    cerr << endl;
+#endif
+
+    vector<vector<Shift> > shifts(MAX_SHIFT_SIZE + 1);
+    GetAllPossibleShifts(cur, ralign, herr, rerr, 1, &shifts);
+    float cur_best_shift_cost = 0;
+    *newerr = curerr;
+    vector<TransType> cur_best_path;
+    vector<WordID> cur_best_hyp;
+
+    bool res = false;
+    for (int i = shifts.size() - 1; i >=0; --i) {
+      float curfix = curerr - (cur_best_shift_cost + *newerr);
+      float maxfix = 2.0f * (1 + i) - COSTS::shift;
+      if ((curfix > maxfix) || ((cur_best_shift_cost == 0) && (curfix == maxfix))) break;
+      for (int j = 0; j < shifts[i].size(); ++j) {
+        const Shift& s = shifts[i][j];
+	curfix = curerr - (cur_best_shift_cost + *newerr);
+	maxfix = 2.0f * (1 + i) - COSTS::shift;  // TODO remove?
+        if ((curfix > maxfix) || ((cur_best_shift_cost == 0) && (curfix == maxfix))) continue;
+	vector<WordID> shifted(cur.size());
+	PerformShift(cur, s.begin(), s.end(), ralign[s.moveto()], &shifted);
+	vector<TransType> try_path;
+	float try_cost = MinimumEditDistance(shifted, ref_, &try_path);
+	float gain = (*newerr + cur_best_shift_cost) - (try_cost + COSTS::shift);
+	if (gain > 0.0f || ((cur_best_shift_cost == 0.0f) && (gain == 0.0f))) {
+	  *newerr = try_cost;
+	  cur_best_shift_cost = COSTS::shift;
+	  new_path->swap(try_path);
+	  new_hyp->swap(shifted);
+	  res = true;
+	  // cerr << "Found better shift " << s.begin() << "..." << s.end() << " moveto " << s.moveto() << endl;
+	}
+      }
+    }
+
+    return res;
+  }
+
+  static void GetPathStats(const vector<TransType>& path, int* subs, int* ins, int* dels) {
+    *subs = *ins = *dels = 0;
+    for (int i = 0; i < path.size(); ++i) {
+      switch (path[i]) {
+        case SUBSTITUTION:
+	  ++(*subs);
+        case MATCH:
+          break;
+        case INSERTION:
+          ++(*ins); break;
+	case DELETION:
+          ++(*dels); break;
+      }
+    }
+  }
+
+  float CalculateAllShifts(const vector<WordID>& hyp,
+      int* subs, int* ins, int* dels, int* shifts) const {
+    BuildWordMatches(hyp, &nmap_);
+    vector<TransType> path;
+    float med_cost = MinimumEditDistance(hyp, ref_, &path);
+    float edits = 0;
+    vector<WordID> cur = hyp;
+    *shifts = 0;
+    if (ter_short_circuit_long_sentences < 0 ||
+        ref_.size() < ter_short_circuit_long_sentences) {
+      while (true) {
+        vector<WordID> new_hyp;
+        vector<TransType> new_path;
+        float new_med_cost;
+        if (!CalculateBestShift(cur, hyp, med_cost, path, &new_hyp, &new_med_cost, &new_path))
+          break;
+        edits += COSTS::shift;
+        ++(*shifts);
+        med_cost = new_med_cost;
+        path.swap(new_path);
+        cur.swap(new_hyp);
+      }
+    }
+    GetPathStats(path, subs, ins, dels);
+    return med_cost + edits;
+  }
+};
+
+class TERScore : public ScoreBase<TERScore> {
+  friend class TERScorer;
+
+ public:
+
+ TERScore() : stats(0,kDUMMY_LAST_ENTRY) {}
+  float ComputePartialScore() const { return 0.0;}
+  float ComputeScore() const {
+    float edits = static_cast<float>(stats[kINSERTIONS] + stats[kDELETIONS] + stats[kSUBSTITUTIONS] + stats[kSHIFTS]);
+    return edits / static_cast<float>(stats[kREF_WORDCOUNT]);
+  }
+  void ScoreDetails(string* details) const;
+  void PlusPartialEquals(const Score& rhs, int oracle_e_cover, int oracle_f_cover, int src_len){}
+  void PlusEquals(const Score& delta, const float scale) {
+    if (scale==1)
+      stats += static_cast<const TERScore&>(delta).stats;
+    if (scale==-1)
+      stats -= static_cast<const TERScore&>(delta).stats;
+    throw std::runtime_error("TERScore::PlusEquals with scale != +-1");
+ }
+  void PlusEquals(const Score& delta) {
+    stats += static_cast<const TERScore&>(delta).stats;
+  }
+
+  ScoreP GetZero() const {
+    return ScoreP(new TERScore);
+  }
+  ScoreP GetOne() const {
+    return ScoreP(new TERScore);
+  }
+  void Subtract(const Score& rhs, Score* res) const {
+    static_cast<TERScore*>(res)->stats = stats - static_cast<const TERScore&>(rhs).stats;
+  }
+  void Encode(std::string* out) const {
+    ostringstream os;
+    os << stats[kINSERTIONS] << ' '
+       << stats[kDELETIONS] << ' '
+       << stats[kSUBSTITUTIONS] << ' '
+       << stats[kSHIFTS] << ' '
+       << stats[kREF_WORDCOUNT];
+    *out = os.str();
+  }
+  bool IsAdditiveIdentity() const {
+    for (int i = 0; i < kDUMMY_LAST_ENTRY; ++i)
+      if (stats[i] != 0) return false;
+    return true;
+  }
+ private:
+  valarray<int> stats;
+};
+
+ScoreP TERScorer::ScoreFromString(const std::string& data) {
+  istringstream is(data);
+  TERScore* r = new TERScore;
+  is >> r->stats[TERScore::kINSERTIONS]
+     >> r->stats[TERScore::kDELETIONS]
+     >> r->stats[TERScore::kSUBSTITUTIONS]
+     >> r->stats[TERScore::kSHIFTS]
+     >> r->stats[TERScore::kREF_WORDCOUNT];
+  return ScoreP(r);
+}
+
+void TERScore::ScoreDetails(std::string* details) const {
+  char buf[200];
+  sprintf(buf, "TER = %.2f, %3d|%3d|%3d|%3d (len=%d)",
+     ComputeScore() * 100.0f,
+     stats[kINSERTIONS],
+     stats[kDELETIONS],
+     stats[kSUBSTITUTIONS],
+     stats[kSHIFTS],
+     stats[kREF_WORDCOUNT]);
+  *details = buf;
+}
+
+TERScorer::~TERScorer() {
+  for (vector<TERScorerImpl*>::iterator i = impl_.begin(); i != impl_.end(); ++i)
+    delete *i;
+}
+
+TERScorer::TERScorer(const vector<vector<WordID> >& refs) : impl_(refs.size()) {
+  for (int i = 0; i < refs.size(); ++i)
+    impl_[i] = new TERScorerImpl(refs[i]);
+}
+
+ScoreP TERScorer::ScoreCCandidate(const vector<WordID>& hyp) const {
+  return ScoreP();
+}
+
+ScoreP TERScorer::ScoreCandidate(const std::vector<WordID>& hyp) const {
+  float best_score = numeric_limits<float>::max();
+  TERScore* res = new TERScore;
+  int avg_len = 0;
+  for (int i = 0; i < impl_.size(); ++i)
+    avg_len += impl_[i]->GetRefLength();
+  avg_len /= impl_.size();
+  for (int i = 0; i < impl_.size(); ++i) {
+    int subs, ins, dels, shifts;
+    float score = impl_[i]->Calculate(hyp, &subs, &ins, &dels, &shifts);
+    // cerr << "Component TER cost: " << score << endl;
+    if (score < best_score) {
+      res->stats[TERScore::kINSERTIONS] = ins;
+      res->stats[TERScore::kDELETIONS] = dels;
+      res->stats[TERScore::kSUBSTITUTIONS] = subs;
+      res->stats[TERScore::kSHIFTS] = shifts;
+      if (ter_use_average_ref_len) {
+        res->stats[TERScore::kREF_WORDCOUNT] = avg_len;
+      } else {
+        res->stats[TERScore::kREF_WORDCOUNT] = impl_[i]->GetRefLength();
+      }
+
+      best_score = score;
+    }
+  }
+  return ScoreP(res);
+}
+#endif
+
+void TERMetric::ComputeSufficientStatistics(const vector<WordID>& hyp,
+                                            const vector<vector<WordID> >& refs,
+                                            SufficientStats* out) const {
+  out->fields.resize(kDUMMY_LAST_ENTRY);
+}
+
+float TERMetric::ComputeScore(const SufficientStats& stats) const {
+  float edits = static_cast<float>(stats[kINSERTIONS] + stats[kDELETIONS] + stats[kSUBSTITUTIONS] + stats[kSHIFTS]);
+  return edits / static_cast<float>(stats[kREF_WORDCOUNT]);
+}
+
diff --git a/mteval/ns_ter.h b/mteval/ns_ter.h
new file mode 100644
index 00000000..bb90f95e
--- /dev/null
+++ b/mteval/ns_ter.h
@@ -0,0 +1,18 @@
+#ifndef _NS_TER_H_
+#define _NS_TER_H_
+
+#include "ns.h"
+
+class TERMetric : public EvaluationMetric {
+  friend class EvaluationMetric;
+ protected:
+  TERMetric() : EvaluationMetric("TER") {}
+
+ public:
+  virtual void ComputeSufficientStatistics(const std::vector<WordID>& hyp,
+                                           const std::vector<std::vector<WordID> >& refs,
+                                           SufficientStats* out) const;
+  virtual float ComputeScore(const SufficientStats& stats) const;
+};
+
+#endif
diff --git a/mteval/scorer_test.cc b/mteval/scorer_test.cc
index a07a8c4b..09da250c 100644
--- a/mteval/scorer_test.cc
+++ b/mteval/scorer_test.cc
@@ -3,9 +3,11 @@
 #include <valarray>
 #include <gtest/gtest.h>
 
+#include "ns.h"
 #include "tdict.h"
 #include "scorer.h"
 #include "aer_scorer.h"
+#include "kernel_string_subseq.h"
 
 using namespace std;
 
@@ -175,6 +177,50 @@ TEST_F(ScorerTest, AERTest) {
   EXPECT_EQ(d2, details);
 }
 
+TEST_F(ScorerTest, Kernel) {
+  for (int i = 1; i < 10; ++i) {
+    const float l = (i / 10.0);
+    float f = ssk<4>(refs0[0], hyp1, l) +
+              ssk<4>(refs0[1], hyp1, l) +
+              ssk<4>(refs0[2], hyp1, l) +
+              ssk<4>(refs0[3], hyp1, l);
+    float f2= ssk<4>(refs1[0], hyp2, l) +
+              ssk<4>(refs1[1], hyp2, l) +
+              ssk<4>(refs1[2], hyp2, l) +
+              ssk<4>(refs1[3], hyp2, l);
+    f /= 4;
+    f2 /= 4;
+    float f3= ssk<4>(refs0[0], hyp2, l) +
+              ssk<4>(refs0[1], hyp2, l) +
+              ssk<4>(refs0[2], hyp2, l) +
+              ssk<4>(refs0[3], hyp2, l);
+    float f4= ssk<4>(refs1[0], hyp1, l) +
+              ssk<4>(refs1[1], hyp1, l) +
+              ssk<4>(refs1[2], hyp1, l) +
+              ssk<4>(refs1[3], hyp1, l);
+    f3 += f4;
+    f3 /= 8;
+    cerr << "LAMBDA=" << l << "\t" << f << " " << f2 << "\tf=" << ((f + f2)/2 - f3) << " (bad=" << f3 << ")" << endl;
+  }
+}
+
+TEST_F(ScorerTest, NewScoreAPI) {
+  EvaluationMetric* metric = EvaluationMetric::Instance("IBM_BLEU");
+  boost::shared_ptr<SegmentEvaluator> e1 = metric->CreateSegmentEvaluator(refs0);
+  boost::shared_ptr<SegmentEvaluator> e2 = metric->CreateSegmentEvaluator(refs1);
+  SufficientStats stats1;
+  e1->Evaluate(hyp2, &stats1);
+  SufficientStats stats2;
+  e2->Evaluate(hyp1, &stats2);
+  stats1 += stats2;
+  string ss;
+  stats1.Encode(&ss);
+  cerr << "SS: " << ss << endl;
+  cerr << metric->ComputeScore(stats1) << endl;
+  SufficientStats statse("IBM_BLEU 53 32 18 11 65 63 61 59 65 72");
+  cerr << metric->ComputeScore(statse) << endl;
+}
+
 int main(int argc, char **argv) {
   testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
-- 
cgit v1.2.3


From 2eb3bb96c6f780c477585b33273fc0c0d56c80e4 Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@cs.cmu.edu>
Date: Tue, 20 Dec 2011 15:51:11 -0500
Subject: new scorer interface is implemented, but not used

---
 mteval/Makefile.am    |   2 +-
 mteval/ns.cc          |  67 ++++++++++++++++++++------
 mteval/ns.h           |  23 +++++----
 mteval/ns_comb.cc     |  87 +++++++++++++++++++++++++++++++++
 mteval/ns_comb.h      |  19 ++++++++
 mteval/ns_ext.cc      | 130 ++++++++++++++++++++++++++++++++++++++++++++++++++
 mteval/ns_ext.h       |  21 ++++++++
 mteval/ns_ter.cc      | 126 ++++++++++--------------------------------------
 mteval/ns_ter.h       |   1 +
 mteval/scorer_test.cc |  12 +++--
 utils/stringlib.h     |   7 +++
 11 files changed, 362 insertions(+), 133 deletions(-)
 create mode 100644 mteval/ns_comb.cc
 create mode 100644 mteval/ns_comb.h
 create mode 100644 mteval/ns_ext.cc
 create mode 100644 mteval/ns_ext.h

(limited to 'mteval')

diff --git a/mteval/Makefile.am b/mteval/Makefile.am
index 95845090..6679d949 100644
--- a/mteval/Makefile.am
+++ b/mteval/Makefile.am
@@ -10,7 +10,7 @@ endif
 
 noinst_LIBRARIES = libmteval.a
 
-libmteval_a_SOURCES = ter.cc comb_scorer.cc aer_scorer.cc scorer.cc external_scorer.cc
+libmteval_a_SOURCES = ter.cc comb_scorer.cc aer_scorer.cc scorer.cc external_scorer.cc ns.cc ns_ter.cc ns_ext.cc ns_comb.cc
 
 fast_score_SOURCES = fast_score.cc
 fast_score_LDADD = libmteval.a $(top_srcdir)/utils/libutils.a -lz
diff --git a/mteval/ns.cc b/mteval/ns.cc
index 1045a51f..6139757d 100644
--- a/mteval/ns.cc
+++ b/mteval/ns.cc
@@ -1,5 +1,7 @@
 #include "ns.h"
 #include "ns_ter.h"
+#include "ns_ext.h"
+#include "ns_comb.h"
 
 #include <cassert>
 #include <cmath>
@@ -7,6 +9,9 @@
 #include <iostream>
 #include <sstream>
 
+#include "tdict.h"
+#include "stringlib.h"
+
 using namespace std;
 using boost::shared_ptr;
 
@@ -19,6 +24,7 @@ struct DefaultSegmentEvaluator : public SegmentEvaluator {
   DefaultSegmentEvaluator(const vector<vector<WordID> >& refs, const EvaluationMetric* em) : refs_(refs), em_(em) {}
   void Evaluate(const vector<WordID>& hyp, SufficientStats* out) const {
     em_->ComputeSufficientStatistics(hyp, refs_, out);
+    out->id_ = em_->MetricId();
   }
   const vector<vector<WordID> > refs_;
   const EvaluationMetric* em_;
@@ -28,6 +34,11 @@ shared_ptr<SegmentEvaluator> EvaluationMetric::CreateSegmentEvaluator(const vect
   return shared_ptr<SegmentEvaluator>(new DefaultSegmentEvaluator(refs, this));
 }
 
+#define MAX_SS_VECTOR_SIZE 50
+unsigned EvaluationMetric::SufficientStatisticsVectorSize() const {
+  return MAX_SS_VECTOR_SIZE;
+}
+
 void EvaluationMetric::ComputeSufficientStatistics(const vector<WordID>&,
                                                    const vector<vector<WordID> >&,
                                                    SufficientStats*) const {
@@ -35,6 +46,12 @@ void EvaluationMetric::ComputeSufficientStatistics(const vector<WordID>&,
   abort();
 }
 
+string EvaluationMetric::DetailedScore(const SufficientStats& stats) const {
+  ostringstream os;
+  os << MetricId() << "=" << ComputeScore(stats);
+  return os.str();
+}
+
 enum BleuType { IBM, Koehn, NIST };
 template <unsigned int N = 4u, BleuType BrevityType = IBM>
 struct BleuSegmentEvaluator : public SegmentEvaluator {
@@ -57,7 +74,7 @@ struct BleuSegmentEvaluator : public SegmentEvaluator {
 
   void Evaluate(const vector<WordID>& hyp, SufficientStats* out) const {
     out->fields.resize(N + N + 2);
-    out->evaluation_metric = evaluation_metric;
+    out->id_ = evaluation_metric->MetricId();
     for (unsigned i = 0; i < N+N+2; ++i) out->fields[i] = 0;
 
     ComputeNgramStats(hyp, &out->fields[0], &out->fields[N], true);
@@ -157,7 +174,12 @@ struct BleuSegmentEvaluator : public SegmentEvaluator {
 template <unsigned int N = 4u, BleuType BrevityType = IBM>
 struct BleuMetric : public EvaluationMetric {
   BleuMetric() : EvaluationMetric("IBM_BLEU") {}
-  float ComputeScore(const SufficientStats& stats) const {
+  unsigned SufficientStatisticsVectorSize() const { return N*2 + 2; }
+  shared_ptr<SegmentEvaluator> CreateSegmentEvaluator(const vector<vector<WordID> >& refs) const {
+    return shared_ptr<SegmentEvaluator>(new BleuSegmentEvaluator<N,BrevityType>(refs, this));
+  }
+  float ComputeBreakdown(const SufficientStats& stats, float* bp, vector<float>* out) const {
+    if (out) { out->clear(); }
     float log_bleu = 0;
     int count = 0;
     for (int i = 0; i < N; ++i) {
@@ -166,7 +188,7 @@ struct BleuMetric : public EvaluationMetric {
         // smooth bleu
         if (!cor_count) { cor_count = 0.01; }
         float lprec = log(cor_count) - log(stats.fields[i+N]); // log(hyp_ngram_counts[i]);
-        // if (precs) precs->push_back(exp(lprec));
+        if (out) out->push_back(exp(lprec));
         log_bleu += lprec;
         ++count;
       }
@@ -178,32 +200,51 @@ struct BleuMetric : public EvaluationMetric {
     if (hyp_len < ref_len)
       lbp = (hyp_len - ref_len) / hyp_len;
     log_bleu += lbp;
-    //if (bp) *bp = exp(lbp);
+    if (bp) *bp = exp(lbp);
     return exp(log_bleu);
   }
-  shared_ptr<SegmentEvaluator> CreateSegmentEvaluator(const vector<vector<WordID> >& refs) const {
-    return shared_ptr<SegmentEvaluator>(new BleuSegmentEvaluator<N,BrevityType>(refs, this));
+  string DetailedScore(const SufficientStats& stats) const {
+    char buf[2000];
+    vector<float> precs(N);
+    float bp;
+    float bleu = ComputeBreakdown(stats, &bp, &precs);
+    sprintf(buf, "BLEU = %.2f, %.1f|%.1f|%.1f|%.1f (brev=%.3f)",
+       bleu*100.0,
+       precs[0]*100.0,
+       precs[1]*100.0,
+       precs[2]*100.0,
+       precs[3]*100.0,
+       bp);
+    return buf;
+  }
+  float ComputeScore(const SufficientStats& stats) const {
+    return ComputeBreakdown(stats, NULL, NULL);
   }
 };
 
-EvaluationMetric* EvaluationMetric::Instance(const string& metric_id) {
+EvaluationMetric* EvaluationMetric::Instance(const string& imetric_id) {
   static bool is_first = true;
   if (is_first) {
     instances_["NULL"] = NULL;
     is_first = false;
   }
+  const string metric_id = UppercaseString(imetric_id);
 
   map<string, EvaluationMetric*>::iterator it = instances_.find(metric_id);
   if (it == instances_.end()) {
     EvaluationMetric* m = NULL; 
-    if (metric_id == "IBM_BLEU") {
+    if        (metric_id == "IBM_BLEU") {
       m = new BleuMetric<4, IBM>;
     } else if (metric_id == "NIST_BLEU") {
       m = new BleuMetric<4, NIST>;
-    } else if (metric_id == "Koehn_BLEU") {
+    } else if (metric_id == "KOEHN_BLEU") {
       m = new BleuMetric<4, Koehn>;
     } else if (metric_id == "TER") {
       m = new TERMetric;
+    } else if (metric_id == "METEOR") {
+      m = new ExternalMetric("METEOR", "java -Xmx1536m -jar /Users/cdyer/software/meteor/meteor-1.3.jar - - -mira -lower -t tune -l en");
+    } else if (metric_id.find("COMB:") == 0) {
+      m = new CombinationMetric(metric_id);
     } else {
       cerr << "Implement please: " << metric_id << endl;
       abort();
@@ -220,9 +261,7 @@ EvaluationMetric* EvaluationMetric::Instance(const string& metric_id) {
 
 SufficientStats::SufficientStats(const string& encoded) {
   istringstream is(encoded);
-  string type;
-  is >> type;
-  evaluation_metric = EvaluationMetric::Instance(type);
+  is >> id_;
   float val;
   while(is >> val)
     fields.push_back(val);
@@ -230,8 +269,8 @@ SufficientStats::SufficientStats(const string& encoded) {
 
 void SufficientStats::Encode(string* out) const {
   ostringstream os;
-  if (evaluation_metric)
-    os << evaluation_metric->MetricId();
+  if (id_.size() > 0)
+    os << id_;
   else
     os << "NULL";
   for (unsigned i = 0; i < fields.size(); ++i)
diff --git a/mteval/ns.h b/mteval/ns.h
index f19b7509..622265db 100644
--- a/mteval/ns.h
+++ b/mteval/ns.h
@@ -7,18 +7,15 @@
 #include <boost/shared_ptr.hpp>
 #include "wordid.h"
 
-class EvaluationMetric;
-
 class SufficientStats {
  public:
-  SufficientStats() : evaluation_metric() {}
+  SufficientStats() : id_() {}
   explicit SufficientStats(const std::string& encoded);
-  explicit SufficientStats(const EvaluationMetric* s) : evaluation_metric(s) {}
-  SufficientStats(const EvaluationMetric* s, const std::vector<float>& f) :
-    evaluation_metric(s), fields(f) {}
+  SufficientStats(const std::string& mid, const std::vector<float>& f) :
+    id_(mid), fields(f) {}
 
   SufficientStats& operator+=(const SufficientStats& delta) {
-    if (delta.evaluation_metric) evaluation_metric = delta.evaluation_metric;
+    if (id_.empty() && delta.id_.size()) id_ = delta.id_;
     if (fields.size() != delta.fields.size())
       fields.resize(std::max(fields.size(), delta.fields.size()));
     for (unsigned i = 0; i < delta.fields.size(); ++i)
@@ -26,7 +23,7 @@ class SufficientStats {
     return *this;
   }
   SufficientStats& operator-=(const SufficientStats& delta) {
-    if (delta.evaluation_metric) evaluation_metric = delta.evaluation_metric;
+    if (id_.empty() && delta.id_.size()) id_ = delta.id_;
     if (fields.size() != delta.fields.size())
       fields.resize(std::max(fields.size(), delta.fields.size()));
     for (unsigned i = 0; i < delta.fields.size(); ++i)
@@ -53,7 +50,7 @@ class SufficientStats {
   }
   void Encode(std::string* out) const;
 
-  const EvaluationMetric* evaluation_metric;
+  std::string id_;
   std::vector<float> fields;
 };
 
@@ -73,13 +70,13 @@ struct SegmentEvaluator {
 };
 
 // Instructions for implementing a new metric
-//   Override MetricId() and give the metric a unique string name (no spaces)
 //   To Instance(), add something that creates the metric
+//   Implement ComputeScore(const SufficientStats& stats) const;
 //   Implement ONE of the following:
 //      1) void ComputeSufficientStatistics(const std::vector<std::vector<WordID> >& refs, SufficientStats* out) const;
 //      2) a new SegmentEvaluator class AND CreateSegmentEvaluator(const std::vector<std::vector<WordID> >& refs) const;
-//   The later (#2) is only used when it is necessary to precompute per-segment data from a set of refs
-//   Implement ComputeScore(const SufficientStats& stats) const;
+//    [The later (#2) is only used when it is necessary to precompute per-segment data from a set of refs]
+//   OPTIONAL: Override SufficientStatisticsVectorSize() if it is easy to do so
 class EvaluationMetric {
  public:
   static EvaluationMetric* Instance(const std::string& metric_id = "IBM_BLEU");
@@ -91,7 +88,9 @@ class EvaluationMetric {
  public:
   const std::string& MetricId() const { return name_; }
 
+  virtual unsigned SufficientStatisticsVectorSize() const;
   virtual float ComputeScore(const SufficientStats& stats) const = 0;
+  virtual std::string DetailedScore(const SufficientStats& stats) const;
   virtual boost::shared_ptr<SegmentEvaluator> CreateSegmentEvaluator(const std::vector<std::vector<WordID> >& refs) const;
   virtual void ComputeSufficientStatistics(const std::vector<WordID>& hyp,
                                            const std::vector<std::vector<WordID> >& refs,
diff --git a/mteval/ns_comb.cc b/mteval/ns_comb.cc
new file mode 100644
index 00000000..41c634cd
--- /dev/null
+++ b/mteval/ns_comb.cc
@@ -0,0 +1,87 @@
+#include "ns_comb.h"
+
+#include <iostream>
+
+#include "stringlib.h"
+
+using namespace std;
+
+// e.g. COMB:IBM_BLEU=0.5;TER=0.5
+CombinationMetric::CombinationMetric(const std::string& cmd) :
+    EvaluationMetric(cmd),
+    total_size() {
+  if (cmd.find("COMB:") != 0 || cmd.size() < 9) {
+    cerr << "Error in combination metric specifier: " << cmd << endl;
+    exit(1);
+  }
+  string mix = cmd.substr(5);
+  vector<string> comps;
+  Tokenize(cmd.substr(5), ';', &comps);
+  if(comps.size() < 2) {
+    cerr << "Error in combination metric specifier: " << cmd << endl;
+    exit(1);
+  }
+  vector<string> cwpairs;
+  for (unsigned i = 0; i < comps.size(); ++i) {
+    Tokenize(comps[i], '=', &cwpairs);
+    if (cwpairs.size() != 2) { cerr << "Error in combination metric specifier: " << cmd << endl; exit(1); }
+    metrics.push_back(EvaluationMetric::Instance(cwpairs[0]));
+    coeffs.push_back(atof(cwpairs[1].c_str()));
+    offsets.push_back(total_size);
+    total_size += metrics.back()->SufficientStatisticsVectorSize();
+    cerr << (i > 0 ? " + " : "( ") << coeffs.back() << " * " << cwpairs[0];
+  }
+  cerr << " )\n";
+}
+
+struct CombinationSegmentEvaluator : public SegmentEvaluator {
+  CombinationSegmentEvaluator(const string& id,
+                              const vector<vector<WordID> >& refs,
+                              const vector<EvaluationMetric*>& metrics,
+                              const vector<unsigned>& offsets,
+                              const unsigned ts) : id_(id), offsets_(offsets), total_size_(ts), component_evaluators_(metrics.size()) {
+    for (unsigned i = 0; i < metrics.size(); ++i)
+      component_evaluators_[i] = metrics[i]->CreateSegmentEvaluator(refs);
+  }
+  virtual void Evaluate(const std::vector<WordID>& hyp, SufficientStats* out) const {
+    out->id_ = id_;
+    out->fields.resize(total_size_);
+    for (unsigned i = 0; i < component_evaluators_.size(); ++i) {
+      SufficientStats t;
+      component_evaluators_[i]->Evaluate(hyp, &t);
+      for (unsigned j = 0; j < t.fields.size(); ++j) {
+        unsigned op = j + offsets_[i];
+        assert(op < out->fields.size());
+        out->fields[op] = t[j];
+      }
+    }
+  }
+  const string& id_;
+  const vector<unsigned>& offsets_;
+  const unsigned total_size_;
+  vector<boost::shared_ptr<SegmentEvaluator> > component_evaluators_;
+};
+
+boost::shared_ptr<SegmentEvaluator> CombinationMetric::CreateSegmentEvaluator(const std::vector<std::vector<WordID> >& refs) const {
+  boost::shared_ptr<SegmentEvaluator> res;
+  res.reset(new CombinationSegmentEvaluator(MetricId(), refs, metrics, offsets, total_size));
+  return res;
+}
+
+float CombinationMetric::ComputeScore(const SufficientStats& stats) const {
+  float tot = 0;
+  for (unsigned i = 0; i < metrics.size(); ++i) {
+    SufficientStats t;
+    unsigned next = total_size;
+    if (i + 1 < offsets.size()) next = offsets[i+1];
+    for (unsigned j = offsets[i]; j < next; ++j)
+      t.fields.push_back(stats[j]);
+    tot += metrics[i]->ComputeScore(t) * coeffs[i];
+  }
+  return tot;
+}
+
+unsigned CombinationMetric::SufficientStatisticsVectorSize() const {
+  return total_size;
+}
+
diff --git a/mteval/ns_comb.h b/mteval/ns_comb.h
new file mode 100644
index 00000000..140e7e6a
--- /dev/null
+++ b/mteval/ns_comb.h
@@ -0,0 +1,19 @@
+#ifndef _NS_COMB_H_
+#define _NS_COMB_H_
+
+#include "ns.h"
+
+class CombinationMetric : public EvaluationMetric {
+ public:
+  CombinationMetric(const std::string& cmd);
+  virtual boost::shared_ptr<SegmentEvaluator> CreateSegmentEvaluator(const std::vector<std::vector<WordID> >& refs) const;
+  virtual float ComputeScore(const SufficientStats& stats) const;
+  virtual unsigned SufficientStatisticsVectorSize() const;
+ private:
+  std::vector<EvaluationMetric*> metrics;
+  std::vector<float> coeffs;
+  std::vector<unsigned> offsets;
+  unsigned total_size;
+};
+
+#endif
diff --git a/mteval/ns_ext.cc b/mteval/ns_ext.cc
new file mode 100644
index 00000000..956708af
--- /dev/null
+++ b/mteval/ns_ext.cc
@@ -0,0 +1,130 @@
+#include "ns_ext.h"
+
+#include <cstdio> // popen
+#include <cstdlib>
+#include <cstring>
+#include <unistd.h>
+#include <sstream>
+#include <iostream>
+#include <cassert>
+
+#include "stringlib.h"
+#include "tdict.h"
+
+using namespace std;
+
+struct NScoreServer {
+  NScoreServer(const std::string& cmd);
+  ~NScoreServer();
+
+  float ComputeScore(const std::vector<float>& fields);
+  void Evaluate(const std::vector<std::vector<WordID> >& refs, const std::vector<WordID>& hyp, std::vector<float>* fields);
+
+ private:
+  void RequestResponse(const std::string& request, std::string* response);
+  int p2c[2];
+  int c2p[2];
+};
+
+NScoreServer::NScoreServer(const string& cmd) {
+  cerr << "Invoking " << cmd << " ..." << endl;
+  if (pipe(p2c) < 0) { perror("pipe"); exit(1); }
+  if (pipe(c2p) < 0) { perror("pipe"); exit(1); }
+  pid_t cpid = fork();
+  if (cpid < 0) { perror("fork"); exit(1); }
+  if (cpid == 0) {  // child
+    close(p2c[1]);
+    close(c2p[0]);
+    dup2(p2c[0], 0);
+    close(p2c[0]);
+    dup2(c2p[1], 1);
+    close(c2p[1]);
+    cerr << "Exec'ing from child " << cmd << endl;
+    vector<string> vargs;
+    SplitOnWhitespace(cmd, &vargs);
+    const char** cargv = static_cast<const char**>(malloc(sizeof(const char*) * vargs.size()));
+    for (unsigned i = 1; i < vargs.size(); ++i) cargv[i-1] = vargs[i].c_str();
+    cargv[vargs.size() - 1] = NULL;
+    execvp(vargs[0].c_str(), (char* const*)cargv);
+  } else { // parent
+    close(c2p[1]);
+    close(p2c[0]);
+  }
+  string dummy;
+  RequestResponse("SCORE ||| Reference initialization string . ||| Testing initialization string .", &dummy);
+  assert(dummy.size() > 0);
+  cerr << "Connection established.\n";
+}
+
+NScoreServer::~NScoreServer() {
+  // TODO close stuff, join stuff
+}
+
+float NScoreServer::ComputeScore(const vector<float>& fields) {
+  ostringstream os;
+  os << "EVAL |||";
+  for (unsigned i = 0; i < fields.size(); ++i)
+    os << ' ' << fields[i];
+  string sres;
+  RequestResponse(os.str(), &sres);
+  return strtod(sres.c_str(), NULL);
+}
+
+void NScoreServer::Evaluate(const vector<vector<WordID> >& refs, const vector<WordID>& hyp, vector<float>* fields) {
+  ostringstream os;
+  os << "SCORE";
+  for (unsigned i = 0; i < refs.size(); ++i) {
+    os << " |||";
+    for (unsigned j = 0; j < refs[i].size(); ++j) {
+      os << ' ' << TD::Convert(refs[i][j]);
+    }
+  }
+  os << " |||";
+  for (unsigned i = 0; i < hyp.size(); ++i) {
+    os << ' ' << TD::Convert(hyp[i]);
+  }
+  string sres;
+  RequestResponse(os.str(), &sres);
+  istringstream is(sres);
+  float val;
+  fields->clear();
+  while(is >> val)
+    fields->push_back(val);
+}
+
+#define MAX_BUF 16000
+
+void NScoreServer::RequestResponse(const string& request, string* response) {
+//  cerr << "@SERVER: " << request << endl;
+  string x = request + "\n";
+  write(p2c[1], x.c_str(), x.size());
+  char buf[MAX_BUF];
+  size_t n = read(c2p[0], buf, MAX_BUF);
+  while (n < MAX_BUF && buf[n-1] != '\n')
+    n += read(c2p[0], &buf[n], MAX_BUF - n);
+
+  buf[n-1] = 0;
+  if (n < 2) {
+    cerr << "Malformed response: " << buf << endl;
+  }
+  *response = Trim(buf, " \t\n");
+//  cerr << "@RESPONSE: '" << *response << "'\n";
+}
+
+void ExternalMetric::ComputeSufficientStatistics(const std::vector<WordID>& hyp,
+                                           const std::vector<std::vector<WordID> >& refs,
+                                           SufficientStats* out) const {
+  eval_server->Evaluate(refs, hyp, &out->fields);
+}
+
+float ExternalMetric::ComputeScore(const SufficientStats& stats) const {
+  eval_server->ComputeScore(stats.fields);
+}
+
+ExternalMetric::ExternalMetric(const string& metric_name, const std::string& command) :
+    EvaluationMetric(metric_name),
+    eval_server(new NScoreServer(command)) {}
+
+ExternalMetric::~ExternalMetric() {
+  delete eval_server;
+}
diff --git a/mteval/ns_ext.h b/mteval/ns_ext.h
new file mode 100644
index 00000000..78badb2e
--- /dev/null
+++ b/mteval/ns_ext.h
@@ -0,0 +1,21 @@
+#ifndef _NS_EXTERNAL_SCORER_H_
+#define _NS_EXTERNAL_SCORER_H_
+
+#include "ns.h"
+
+struct NScoreServer;
+class ExternalMetric : public EvaluationMetric {
+ public:
+  ExternalMetric(const std::string& metricid, const std::string& command);
+  ~ExternalMetric();
+
+  virtual void ComputeSufficientStatistics(const std::vector<WordID>& hyp,
+                                           const std::vector<std::vector<WordID> >& refs,
+                                           SufficientStats* out) const;
+  virtual float ComputeScore(const SufficientStats& stats) const;
+
+ protected:
+  NScoreServer* eval_server;
+};
+
+#endif
diff --git a/mteval/ns_ter.cc b/mteval/ns_ter.cc
index 14dc6e49..8c969e58 100644
--- a/mteval/ns_ter.cc
+++ b/mteval/ns_ter.cc
@@ -1,15 +1,11 @@
 #include "ns_ter.h"
 
-#include <cstdio>
 #include <cassert>
 #include <iostream>
 #include <limits>
-#include <sstream>
 #include <tr1/unordered_map>
 #include <set>
-#include <valarray>
 #include <boost/functional/hash.hpp>
-#include <stdexcept>
 #include "tdict.h"
 
 static const bool ter_use_average_ref_len = true;
@@ -25,7 +21,7 @@ static const unsigned kDUMMY_LAST_ENTRY = 5;
 using namespace std;
 using namespace std::tr1;
 
-#if 0
+namespace NewScorer {
 
 struct COSTS {
   static const float substitution;
@@ -82,7 +78,7 @@ class TERScorerImpl {
   enum TransType { MATCH, SUBSTITUTION, INSERTION, DELETION };
 
   explicit TERScorerImpl(const vector<WordID>& ref) : ref_(ref) {
-    for (int i = 0; i < ref.size(); ++i)
+    for (unsigned i = 0; i < ref.size(); ++i)
       rwexists_.insert(ref[i]);
   }
 
@@ -95,7 +91,7 @@ class TERScorerImpl {
   }
 
  private:
-  vector<WordID> ref_;
+  const vector<WordID>& ref_;
   set<WordID> rwexists_;
 
   typedef unordered_map<vector<WordID>, set<int>, boost::hash<vector<WordID> > > NgramToIntsMap;
@@ -421,68 +417,7 @@ class TERScorerImpl {
   }
 };
 
-class TERScore : public ScoreBase<TERScore> {
-  friend class TERScorer;
-
- public:
-
- TERScore() : stats(0,kDUMMY_LAST_ENTRY) {}
-  float ComputePartialScore() const { return 0.0;}
-  float ComputeScore() const {
-    float edits = static_cast<float>(stats[kINSERTIONS] + stats[kDELETIONS] + stats[kSUBSTITUTIONS] + stats[kSHIFTS]);
-    return edits / static_cast<float>(stats[kREF_WORDCOUNT]);
-  }
-  void ScoreDetails(string* details) const;
-  void PlusPartialEquals(const Score& rhs, int oracle_e_cover, int oracle_f_cover, int src_len){}
-  void PlusEquals(const Score& delta, const float scale) {
-    if (scale==1)
-      stats += static_cast<const TERScore&>(delta).stats;
-    if (scale==-1)
-      stats -= static_cast<const TERScore&>(delta).stats;
-    throw std::runtime_error("TERScore::PlusEquals with scale != +-1");
- }
-  void PlusEquals(const Score& delta) {
-    stats += static_cast<const TERScore&>(delta).stats;
-  }
-
-  ScoreP GetZero() const {
-    return ScoreP(new TERScore);
-  }
-  ScoreP GetOne() const {
-    return ScoreP(new TERScore);
-  }
-  void Subtract(const Score& rhs, Score* res) const {
-    static_cast<TERScore*>(res)->stats = stats - static_cast<const TERScore&>(rhs).stats;
-  }
-  void Encode(std::string* out) const {
-    ostringstream os;
-    os << stats[kINSERTIONS] << ' '
-       << stats[kDELETIONS] << ' '
-       << stats[kSUBSTITUTIONS] << ' '
-       << stats[kSHIFTS] << ' '
-       << stats[kREF_WORDCOUNT];
-    *out = os.str();
-  }
-  bool IsAdditiveIdentity() const {
-    for (int i = 0; i < kDUMMY_LAST_ENTRY; ++i)
-      if (stats[i] != 0) return false;
-    return true;
-  }
- private:
-  valarray<int> stats;
-};
-
-ScoreP TERScorer::ScoreFromString(const std::string& data) {
-  istringstream is(data);
-  TERScore* r = new TERScore;
-  is >> r->stats[TERScore::kINSERTIONS]
-     >> r->stats[TERScore::kDELETIONS]
-     >> r->stats[TERScore::kSUBSTITUTIONS]
-     >> r->stats[TERScore::kSHIFTS]
-     >> r->stats[TERScore::kREF_WORDCOUNT];
-  return ScoreP(r);
-}
-
+#if 0
 void TERScore::ScoreDetails(std::string* details) const {
   char buf[200];
   sprintf(buf, "TER = %.2f, %3d|%3d|%3d|%3d (len=%d)",
@@ -494,54 +429,43 @@ void TERScore::ScoreDetails(std::string* details) const {
      stats[kREF_WORDCOUNT]);
   *details = buf;
 }
+#endif
 
-TERScorer::~TERScorer() {
-  for (vector<TERScorerImpl*>::iterator i = impl_.begin(); i != impl_.end(); ++i)
-    delete *i;
-}
+} // namespace NewScorer
 
-TERScorer::TERScorer(const vector<vector<WordID> >& refs) : impl_(refs.size()) {
+void TERMetric::ComputeSufficientStatistics(const vector<WordID>& hyp,
+                                            const vector<vector<WordID> >& refs,
+                                            SufficientStats* out) const {
+  out->fields.resize(kDUMMY_LAST_ENTRY);
+  float best_score = numeric_limits<float>::max();
+  unsigned avg_len = 0;
   for (int i = 0; i < refs.size(); ++i)
-    impl_[i] = new TERScorerImpl(refs[i]);
-}
+    avg_len += refs[i].size();
+  avg_len /= refs.size();
 
-ScoreP TERScorer::ScoreCCandidate(const vector<WordID>& hyp) const {
-  return ScoreP();
-}
-
-ScoreP TERScorer::ScoreCandidate(const std::vector<WordID>& hyp) const {
-  float best_score = numeric_limits<float>::max();
-  TERScore* res = new TERScore;
-  int avg_len = 0;
-  for (int i = 0; i < impl_.size(); ++i)
-    avg_len += impl_[i]->GetRefLength();
-  avg_len /= impl_.size();
-  for (int i = 0; i < impl_.size(); ++i) {
+  for (int i = 0; i < refs.size(); ++i) {
     int subs, ins, dels, shifts;
-    float score = impl_[i]->Calculate(hyp, &subs, &ins, &dels, &shifts);
+    NewScorer::TERScorerImpl ter(refs[i]);
+    float score = ter.Calculate(hyp, &subs, &ins, &dels, &shifts);
     // cerr << "Component TER cost: " << score << endl;
     if (score < best_score) {
-      res->stats[TERScore::kINSERTIONS] = ins;
-      res->stats[TERScore::kDELETIONS] = dels;
-      res->stats[TERScore::kSUBSTITUTIONS] = subs;
-      res->stats[TERScore::kSHIFTS] = shifts;
+      out->fields[kINSERTIONS] = ins;
+      out->fields[kDELETIONS] = dels;
+      out->fields[kSUBSTITUTIONS] = subs;
+      out->fields[kSHIFTS] = shifts;
       if (ter_use_average_ref_len) {
-        res->stats[TERScore::kREF_WORDCOUNT] = avg_len;
+        out->fields[kREF_WORDCOUNT] = avg_len;
       } else {
-        res->stats[TERScore::kREF_WORDCOUNT] = impl_[i]->GetRefLength();
+        out->fields[kREF_WORDCOUNT] = refs[i].size();
       }
 
       best_score = score;
     }
   }
-  return ScoreP(res);
 }
-#endif
 
-void TERMetric::ComputeSufficientStatistics(const vector<WordID>& hyp,
-                                            const vector<vector<WordID> >& refs,
-                                            SufficientStats* out) const {
-  out->fields.resize(kDUMMY_LAST_ENTRY);
+unsigned TERMetric::SufficientStatisticsVectorSize() const {
+  return kDUMMY_LAST_ENTRY;
 }
 
 float TERMetric::ComputeScore(const SufficientStats& stats) const {
diff --git a/mteval/ns_ter.h b/mteval/ns_ter.h
index bb90f95e..6c020cfa 100644
--- a/mteval/ns_ter.h
+++ b/mteval/ns_ter.h
@@ -9,6 +9,7 @@ class TERMetric : public EvaluationMetric {
   TERMetric() : EvaluationMetric("TER") {}
 
  public:
+  virtual unsigned SufficientStatisticsVectorSize() const;
   virtual void ComputeSufficientStatistics(const std::vector<WordID>& hyp,
                                            const std::vector<std::vector<WordID> >& refs,
                                            SufficientStats* out) const;
diff --git a/mteval/scorer_test.cc b/mteval/scorer_test.cc
index 09da250c..73159557 100644
--- a/mteval/scorer_test.cc
+++ b/mteval/scorer_test.cc
@@ -205,20 +205,22 @@ TEST_F(ScorerTest, Kernel) {
 }
 
 TEST_F(ScorerTest, NewScoreAPI) {
-  EvaluationMetric* metric = EvaluationMetric::Instance("IBM_BLEU");
+  //EvaluationMetric* metric = EvaluationMetric::Instance("IBM_BLEU");
+  //EvaluationMetric* metric = EvaluationMetric::Instance("METEOR");
+  EvaluationMetric* metric = EvaluationMetric::Instance("COMB:IBM_BLEU=0.5;TER=-0.5");
   boost::shared_ptr<SegmentEvaluator> e1 = metric->CreateSegmentEvaluator(refs0);
   boost::shared_ptr<SegmentEvaluator> e2 = metric->CreateSegmentEvaluator(refs1);
   SufficientStats stats1;
-  e1->Evaluate(hyp2, &stats1);
+  e1->Evaluate(hyp1, &stats1);
   SufficientStats stats2;
-  e2->Evaluate(hyp1, &stats2);
+  e2->Evaluate(hyp2, &stats2);
   stats1 += stats2;
   string ss;
   stats1.Encode(&ss);
   cerr << "SS: " << ss << endl;
   cerr << metric->ComputeScore(stats1) << endl;
-  SufficientStats statse("IBM_BLEU 53 32 18 11 65 63 61 59 65 72");
-  cerr << metric->ComputeScore(statse) << endl;
+  //SufficientStats statse("IBM_BLEU 53 32 18 11 65 63 61 59 65 72");
+  //cerr << metric->ComputeScore(statse) << endl;
 }
 
 int main(int argc, char **argv) {
diff --git a/utils/stringlib.h b/utils/stringlib.h
index cafbdac3..f457e1e4 100644
--- a/utils/stringlib.h
+++ b/utils/stringlib.h
@@ -125,6 +125,13 @@ inline std::string LowercaseString(const std::string& in) {
   return res;
 }
 
+inline std::string UppercaseString(const std::string& in) {
+  std::string res(in.size(),' ');
+  for (int i = 0; i < in.size(); ++i)
+    res[i] = toupper(in[i]);
+  return res;
+}
+
 inline int CountSubstrings(const std::string& str, const std::string& sub) {
   size_t p = 0;
   int res = 0;
-- 
cgit v1.2.3


From e4c5e87db2139aa0f8655b063da7d8b5199cb46d Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@cs.cmu.edu>
Date: Tue, 20 Dec 2011 18:34:14 -0500
Subject: migrate fast_score to the new API

---
 mteval/Makefile.am    |  2 +-
 mteval/fast_score.cc  | 40 +++++++++++++++++++++++-----------------
 mteval/ns.cc          |  5 +++--
 mteval/ns_ter.cc      | 12 ++++++++++++
 mteval/ns_ter.h       |  1 +
 pro-train/dist-pro.pl |  2 +-
 vest/dist-vest.pl     |  2 +-
 7 files changed, 42 insertions(+), 22 deletions(-)

(limited to 'mteval')

diff --git a/mteval/Makefile.am b/mteval/Makefile.am
index 6679d949..e7126675 100644
--- a/mteval/Makefile.am
+++ b/mteval/Makefile.am
@@ -10,7 +10,7 @@ endif
 
 noinst_LIBRARIES = libmteval.a
 
-libmteval_a_SOURCES = ter.cc comb_scorer.cc aer_scorer.cc scorer.cc external_scorer.cc ns.cc ns_ter.cc ns_ext.cc ns_comb.cc
+libmteval_a_SOURCES = ter.cc comb_scorer.cc aer_scorer.cc scorer.cc external_scorer.cc ns.cc ns_ter.cc ns_ext.cc ns_comb.cc ns_docscorer.cc
 
 fast_score_SOURCES = fast_score.cc
 fast_score_LDADD = libmteval.a $(top_srcdir)/utils/libutils.a -lz
diff --git a/mteval/fast_score.cc b/mteval/fast_score.cc
index 5ee264a6..a271ccc5 100644
--- a/mteval/fast_score.cc
+++ b/mteval/fast_score.cc
@@ -4,9 +4,11 @@
 #include <boost/program_options.hpp>
 #include <boost/program_options/variables_map.hpp>
 
+#include "stringlib.h"
 #include "filelib.h"
 #include "tdict.h"
-#include "scorer.h"
+#include "ns.h"
+#include "ns_docscorer.h"
 
 using namespace std;
 namespace po = boost::program_options;
@@ -14,8 +16,8 @@ namespace po = boost::program_options;
 void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
   po::options_description opts("Configuration options");
   opts.add_options()
-        ("reference,r",po::value<vector<string> >(), "[REQD] Reference translation(s) (tokenized text file)")
-        ("loss_function,l",po::value<string>()->default_value("ibm_bleu"), "Scoring metric (ibm_bleu, nist_bleu, koehn_bleu, ter, combi)")
+        ("reference,r",po::value<vector<string> >(), "[1 or more required] Reference translation(s) in tokenized text files")
+        ("evaluation_metric,m",po::value<string>()->default_value("IBM_BLEU"), "Evaluation metric (ibm_bleu, koehn_bleu, nist_bleu, ter, meteor, etc.)")
         ("in_file,i", po::value<string>()->default_value("-"), "Input file")
         ("help,h", "Help");
   po::options_description dcmdline_options;
@@ -35,24 +37,29 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
 int main(int argc, char** argv) {
   po::variables_map conf;
   InitCommandLine(argc, argv, &conf);
-  const string loss_function = conf["loss_function"].as<string>();
-  ScoreType type = ScoreTypeFromString(loss_function);
-  DocScorer ds(type, conf["reference"].as<vector<string> >(), "");
+  string loss_function = UppercaseString(conf["evaluation_metric"].as<string>());
+  if (loss_function == "COMBI") {
+    cerr << "WARNING: 'combi' metric is no longer supported, switching to 'COMB:TER=-0.5;IBM_BLEU=0.5'\n";
+    loss_function = "COMB:TER=-0.5;IBM_BLEU=0.5";
+  } else if (loss_function == "BLEU") {
+    cerr << "WARNING: 'BLEU' is ambiguous, assuming 'IBM_BLEU'\n";
+    loss_function = "IBM_BLEU";
+  }
+  EvaluationMetric* metric = EvaluationMetric::Instance(loss_function);
+  DocumentScorer ds(metric, conf["reference"].as<vector<string> >());
   cerr << "Loaded " << ds.size() << " references for scoring with " << loss_function << endl;
 
   ReadFile rf(conf["in_file"].as<string>());
-  ScoreP acc;
+  SufficientStats acc;
   istream& in = *rf.stream();
   int lc = 0;
-  while(in) {
-    string line;
-    getline(in, line);
-    if (line.empty() && !in) break;
+  string line;
+  while(getline(in, line)) {
     vector<WordID> sent;
     TD::ConvertSentence(line, &sent);
-    ScoreP sentscore = ds[lc]->ScoreCandidate(sent);
-    if (!acc) { acc = sentscore->GetZero(); }
-    acc->PlusEquals(*sentscore);
+    SufficientStats t;
+    ds[lc]->Evaluate(sent, &t);
+    acc += t;
     ++lc;
   }
   assert(lc > 0);
@@ -63,9 +70,8 @@ int main(int argc, char** argv) {
   if (lc != ds.size())
     cerr << "Fewer sentences in hyp (" << lc << ") than refs ("
          << ds.size() << "): scoring partial set!\n";
-  float score = acc->ComputeScore();
-  string details;
-  acc->ScoreDetails(&details);
+  float score = metric->ComputeScore(acc);
+  const string details = metric->DetailedScore(acc);
   cerr << details << endl;
   cout << score << endl;
   return 0;
diff --git a/mteval/ns.cc b/mteval/ns.cc
index 6139757d..1018319d 100644
--- a/mteval/ns.cc
+++ b/mteval/ns.cc
@@ -173,7 +173,7 @@ struct BleuSegmentEvaluator : public SegmentEvaluator {
 
 template <unsigned int N = 4u, BleuType BrevityType = IBM>
 struct BleuMetric : public EvaluationMetric {
-  BleuMetric() : EvaluationMetric("IBM_BLEU") {}
+  BleuMetric() : EvaluationMetric(BrevityType == IBM ? "IBM_BLEU" : (BrevityType == Koehn ? "KOEHN_BLEU" : "NIST_BLEU")) {}
   unsigned SufficientStatisticsVectorSize() const { return N*2 + 2; }
   shared_ptr<SegmentEvaluator> CreateSegmentEvaluator(const vector<vector<WordID> >& refs) const {
     return shared_ptr<SegmentEvaluator>(new BleuSegmentEvaluator<N,BrevityType>(refs, this));
@@ -208,7 +208,8 @@ struct BleuMetric : public EvaluationMetric {
     vector<float> precs(N);
     float bp;
     float bleu = ComputeBreakdown(stats, &bp, &precs);
-    sprintf(buf, "BLEU = %.2f, %.1f|%.1f|%.1f|%.1f (brev=%.3f)",
+    sprintf(buf, "%s = %.2f, %.1f|%.1f|%.1f|%.1f (brev=%.3f)",
+       MetricId().c_str(),
        bleu*100.0,
        precs[0]*100.0,
        precs[1]*100.0,
diff --git a/mteval/ns_ter.cc b/mteval/ns_ter.cc
index 8c969e58..f75acf1d 100644
--- a/mteval/ns_ter.cc
+++ b/mteval/ns_ter.cc
@@ -473,3 +473,15 @@ float TERMetric::ComputeScore(const SufficientStats& stats) const {
   return edits / static_cast<float>(stats[kREF_WORDCOUNT]);
 }
 
+string TERMetric::DetailedScore(const SufficientStats& stats) const {
+  char buf[200];
+  sprintf(buf, "TER = %.2f, %3.f|%3.f|%3.f|%3.f (len=%3.f)",
+     ComputeScore(stats) * 100.0f,
+     stats[kINSERTIONS],
+     stats[kDELETIONS],
+     stats[kSUBSTITUTIONS],
+     stats[kSHIFTS],
+     stats[kREF_WORDCOUNT]);
+  return buf;
+}
+
diff --git a/mteval/ns_ter.h b/mteval/ns_ter.h
index 6c020cfa..3190fc1b 100644
--- a/mteval/ns_ter.h
+++ b/mteval/ns_ter.h
@@ -10,6 +10,7 @@ class TERMetric : public EvaluationMetric {
 
  public:
   virtual unsigned SufficientStatisticsVectorSize() const;
+  virtual std::string DetailedScore(const SufficientStats& stats) const;
   virtual void ComputeSufficientStatistics(const std::vector<WordID>& hyp,
                                            const std::vector<std::vector<WordID> >& refs,
                                            SufficientStats* out) const;
diff --git a/pro-train/dist-pro.pl b/pro-train/dist-pro.pl
index 5db053de..ba9cdc06 100755
--- a/pro-train/dist-pro.pl
+++ b/pro-train/dist-pro.pl
@@ -288,7 +288,7 @@ while (1){
 	    $retries++;
 	}
 	die "Dev set contains $devSize sentences, but we don't have topbest and hypergraphs for all these! Decoder failure? Check $decoderLog\n" if ($devSize != $num_hgs || $devSize != $num_topbest);
-	my $dec_score = check_output("cat $runFile | $SCORER $refs_comma_sep -l $metric");
+	my $dec_score = check_output("cat $runFile | $SCORER $refs_comma_sep -m $metric");
 	chomp $dec_score;
 	print STDERR "DECODER SCORE: $dec_score\n";
 
diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl
index 11e791c1..c382a972 100755
--- a/vest/dist-vest.pl
+++ b/vest/dist-vest.pl
@@ -308,7 +308,7 @@ while (1){
 	    $retries++;
 	}
 	die "Dev set contains $devSize sentences, but we don't have topbest and hypergraphs for all these! Decoder failure? Check $decoderLog\n" if ($devSize != $num_hgs || $devSize != $num_topbest);
-	my $dec_score = check_output("cat $runFile | $SCORER $refs_comma_sep -l $metric");
+	my $dec_score = check_output("cat $runFile | $SCORER $refs_comma_sep -m $metric");
 	chomp $dec_score;
 	print STDERR "DECODER SCORE: $dec_score\n";
 
-- 
cgit v1.2.3


From d021894c27ffea13decf4e64e9bee428ffc85013 Mon Sep 17 00:00:00 2001
From: Chris Dyer <prguest11@taipan.cs>
Date: Tue, 20 Dec 2011 23:37:25 +0000
Subject: new headers

---
 mteval/ns.cc     | 1 +
 mteval/ns_ter.cc | 1 +
 2 files changed, 2 insertions(+)

(limited to 'mteval')

diff --git a/mteval/ns.cc b/mteval/ns.cc
index 1018319d..68c8deaa 100644
--- a/mteval/ns.cc
+++ b/mteval/ns.cc
@@ -3,6 +3,7 @@
 #include "ns_ext.h"
 #include "ns_comb.h"
 
+#include <cstdio>
 #include <cassert>
 #include <cmath>
 #include <cstdlib>
diff --git a/mteval/ns_ter.cc b/mteval/ns_ter.cc
index f75acf1d..91a17f0d 100644
--- a/mteval/ns_ter.cc
+++ b/mteval/ns_ter.cc
@@ -1,5 +1,6 @@
 #include "ns_ter.h"
 
+#include <cstdio>
 #include <cassert>
 #include <iostream>
 #include <limits>
-- 
cgit v1.2.3


From da92444f09b7e04f3cfa4d461aef47c6b59827e2 Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@cs.cmu.edu>
Date: Tue, 20 Dec 2011 18:37:43 -0500
Subject: new doc scorer

---
 mteval/ns_docscorer.cc | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++
 mteval/ns_docscorer.h  | 31 ++++++++++++++++++++++++++
 2 files changed, 91 insertions(+)
 create mode 100644 mteval/ns_docscorer.cc
 create mode 100644 mteval/ns_docscorer.h

(limited to 'mteval')

diff --git a/mteval/ns_docscorer.cc b/mteval/ns_docscorer.cc
new file mode 100644
index 00000000..28a2fd09
--- /dev/null
+++ b/mteval/ns_docscorer.cc
@@ -0,0 +1,60 @@
+#include "ns_docscorer.h"
+
+#include <iostream>
+#include <cstring>
+
+#include "tdict.h"
+#include "filelib.h"
+#include "ns.h"
+
+using namespace std;
+
+DocumentScorer::~DocumentScorer() {}
+
+void DocumentScorer::Init(const EvaluationMetric* metric,
+            const vector<string>& ref_files,
+            const string& src_file,
+            bool verbose) {
+  scorers_.clear();
+  cerr << "Loading references (" << ref_files.size() << " files)\n";
+  assert(src_file.empty());
+  std::vector<ReadFile> ifs(ref_files.begin(),ref_files.end());
+  for (int i=0; i < ref_files.size(); ++i) ifs[i].Init(ref_files[i]);
+  char buf[64000];
+  bool expect_eof = false;
+  int line=0;
+  while (ifs[0].get()) {
+    vector<vector<WordID> > refs(ref_files.size());
+    for (int i=0; i < ref_files.size(); ++i) {
+      istream &in=ifs[i].get();
+      if (in.eof()) break;
+      in.getline(buf, 64000);
+      refs[i].clear();
+      if (strlen(buf) == 0) {
+        if (in.eof()) {
+          if (!expect_eof) {
+            assert(i == 0);
+            expect_eof = true;
+          }
+          break;
+        }
+      } else {
+        TD::ConvertSentence(buf, &refs[i]);
+        assert(!refs[i].empty());
+      }
+      assert(!expect_eof);
+    }
+    if (!expect_eof) {
+      string src_line;
+      //if (srcrf) {
+      //  getline(srcrf.get(), src_line);
+      //  map<string,string> dummy;
+      //  ProcessAndStripSGML(&src_line, &dummy);
+      //}
+      scorers_.push_back(metric->CreateSegmentEvaluator(refs));
+      ++line;
+    }
+  }
+  cerr << "Loaded reference translations for " << scorers_.size() << " sentences.\n";
+}
+
diff --git a/mteval/ns_docscorer.h b/mteval/ns_docscorer.h
new file mode 100644
index 00000000..170ac627
--- /dev/null
+++ b/mteval/ns_docscorer.h
@@ -0,0 +1,31 @@
+#ifndef _NS_DOC_SCORER_H_
+#define _NS_DOC_SCORER_H_
+
+#include <vector>
+#include <string>
+#include <boost/shared_ptr.hpp>
+
+struct EvaluationMetric;
+struct SegmentEvaluator;
+class DocumentScorer {
+ public:
+  ~DocumentScorer();
+  DocumentScorer() {  }
+  DocumentScorer(const EvaluationMetric* metric,
+                 const std::vector<std::string>& ref_files,
+                 const std::string& src_file = "",
+                 bool verbose=false) {
+    Init(metric,ref_files,src_file,verbose);
+  }
+  void Init(const EvaluationMetric* metric,
+            const std::vector<std::string>& ref_files,
+            const std::string& src_file = "",
+            bool verbose=false);
+
+  int size() const { return scorers_.size(); }
+  const SegmentEvaluator* operator[](size_t i) const { return scorers_[i].get(); }
+ private:
+  std::vector<boost::shared_ptr<SegmentEvaluator> > scorers_;
+};
+
+#endif
-- 
cgit v1.2.3


From 481a120564fdb73c8c6833e2102acb533683261c Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@cs.cmu.edu>
Date: Fri, 27 Jan 2012 02:31:00 -0500
Subject: migrate mert to the new scorer interface

---
 gi/pf/base_distributions.cc | 241 ++++++++++++++++++++++++++++++++++++++++
 gi/pf/base_distributions.h  | 261 ++++++++++++++++++++++++++++++++++++++++++++
 gi/pf/base_measures.cc      | 241 ----------------------------------------
 gi/pf/base_measures.h       | 247 -----------------------------------------
 mteval/ns.cc                |   4 +
 mteval/ns.h                 |  10 +-
 vest/ces.cc                 |  42 +++----
 vest/ces.h                  |  10 +-
 vest/dist-vest.pl           |   4 +-
 vest/error_surface.cc       |  11 +-
 vest/error_surface.h        |   6 +-
 vest/line_optimizer.cc      |  20 ++--
 vest/line_optimizer.h       |   2 +
 vest/lo_test.cc             |  21 ++--
 vest/mr_vest_map.cc         |  16 +--
 vest/mr_vest_reduce.cc      |  34 +++---
 16 files changed, 602 insertions(+), 568 deletions(-)
 create mode 100644 gi/pf/base_distributions.cc
 create mode 100644 gi/pf/base_distributions.h
 delete mode 100644 gi/pf/base_measures.cc
 delete mode 100644 gi/pf/base_measures.h

(limited to 'mteval')

diff --git a/gi/pf/base_distributions.cc b/gi/pf/base_distributions.cc
new file mode 100644
index 00000000..4b1863fa
--- /dev/null
+++ b/gi/pf/base_distributions.cc
@@ -0,0 +1,241 @@
+#include "base_measures.h"
+
+#include <iostream>
+
+#include "filelib.h"
+
+using namespace std;
+
+TableLookupBase::TableLookupBase(const string& fname) {
+  cerr << "TableLookupBase reading from " << fname << " ..." << endl;
+  ReadFile rf(fname);
+  istream& in = *rf.stream();
+  string line;
+  unsigned lc = 0;
+  const WordID kDIV = TD::Convert("|||");
+  vector<WordID> tmp;
+  vector<int> le, lf;
+  TRule x;
+  x.lhs_ = -TD::Convert("X");
+  bool flag = false;
+  while(getline(in, line)) {
+    ++lc;
+    if (lc % 1000000 == 0) { cerr << " [" << lc << ']' << endl; flag = false; }
+    else if (lc % 25000 == 0) { cerr << '.' << flush; flag = true; }
+    tmp.clear();
+    TD::ConvertSentence(line, &tmp);
+    x.f_.clear();
+    x.e_.clear();
+    size_t pos = 0;
+    int cc = 0;
+    while(pos < tmp.size()) {
+      const WordID cur = tmp[pos++];
+      if (cur == kDIV) {
+        ++cc;
+      } else if (cc == 0) {
+        x.f_.push_back(cur);    
+      } else if (cc == 1) {
+        x.e_.push_back(cur);
+      } else if (cc == 2) {
+        table[x].logeq(atof(TD::Convert(cur)));
+        ++cc;
+      } else {
+        if (flag) cerr << endl;
+        cerr << "Bad format in " << lc << ": " << line << endl; abort();
+      }
+    }
+    if (cc != 3) {
+      if (flag) cerr << endl;
+      cerr << "Bad format in " << lc << ": " << line << endl; abort();
+    }
+  }
+  if (flag) cerr << endl;
+  cerr << " read " << lc << " entries\n";
+}
+
+prob_t PhraseConditionalUninformativeUnigramBase::p0(const vector<WordID>& vsrc,
+                                                     const vector<WordID>& vtrg,
+                                                     int start_src, int start_trg) const {
+  const int flen = vsrc.size() - start_src;
+  const int elen = vtrg.size() - start_trg;
+  prob_t p;
+  p.logeq(log_poisson(elen, flen + 0.01));       // elen | flen          ~Pois(flen + 0.01)
+  //p.logeq(log_poisson(elen, 1));       // elen | flen          ~Pois(flen + 0.01)
+  for (int i = 0; i < elen; ++i)
+    p *= u(vtrg[i + start_trg]);                        // draw e_i             ~Uniform
+  return p;
+}
+
+prob_t PhraseConditionalUninformativeBase::p0(const vector<WordID>& vsrc,
+                                              const vector<WordID>& vtrg,
+                                              int start_src, int start_trg) const {
+  const int flen = vsrc.size() - start_src;
+  const int elen = vtrg.size() - start_trg;
+  prob_t p;
+  //p.logeq(log_poisson(elen, flen + 0.01));       // elen | flen          ~Pois(flen + 0.01)
+  p.logeq(log_poisson(elen, 1));       // elen | flen          ~Pois(flen + 0.01)
+  for (int i = 0; i < elen; ++i)
+    p *= kUNIFORM_TARGET;                        // draw e_i             ~Uniform
+  return p;
+}
+
+void Model1::LoadModel1(const string& fname) {
+  cerr << "Loading Model 1 parameters from " << fname << " ..." << endl;
+  ReadFile rf(fname);
+  istream& in = *rf.stream();
+  string line;
+  unsigned lc = 0;
+  while(getline(in, line)) {
+    ++lc;
+    int cur = 0;
+    int start = 0;
+    while(cur < line.size() && line[cur] != ' ') { ++cur; }
+    assert(cur != line.size());
+    line[cur] = 0;
+    const WordID src = TD::Convert(&line[0]);
+    ++cur;
+    start = cur;
+    while(cur < line.size() && line[cur] != ' ') { ++cur; }
+    assert(cur != line.size());
+    line[cur] = 0;
+    WordID trg = TD::Convert(&line[start]);
+    const double logprob = strtod(&line[cur + 1], NULL);
+    if (src >= ttable.size()) ttable.resize(src + 1);
+    ttable[src][trg].logeq(logprob);
+  }
+  cerr << "  read " << lc << " parameters.\n";
+}
+
+prob_t PhraseConditionalBase::p0(const vector<WordID>& vsrc,
+                                 const vector<WordID>& vtrg,
+                                 int start_src, int start_trg) const {
+  const int flen = vsrc.size() - start_src;
+  const int elen = vtrg.size() - start_trg;
+  prob_t uniform_src_alignment; uniform_src_alignment.logeq(-log(flen + 1));
+  prob_t p;
+  p.logeq(log_poisson(elen, flen + 0.01));       // elen | flen          ~Pois(flen + 0.01)
+  for (int i = 0; i < elen; ++i) {               // for each position i in e-RHS
+    const WordID trg = vtrg[i + start_trg];
+    prob_t tp = prob_t::Zero();
+    for (int j = -1; j < flen; ++j) {
+      const WordID src = j < 0 ? 0 : vsrc[j + start_src];
+      tp += kM1MIXTURE * model1(src, trg);
+      tp += kUNIFORM_MIXTURE * kUNIFORM_TARGET;
+    }
+    tp *= uniform_src_alignment;                 //     draw a_i         ~uniform
+    p *= tp;                                     //     draw e_i         ~Model1(f_a_i) / uniform
+  }
+  if (p.is_0()) {
+    cerr << "Zero! " << vsrc << "\nTRG=" << vtrg << endl;
+    abort();
+  }
+  return p;
+}
+
+prob_t PhraseJointBase::p0(const vector<WordID>& vsrc,
+                           const vector<WordID>& vtrg,
+                           int start_src, int start_trg) const {
+  const int flen = vsrc.size() - start_src;
+  const int elen = vtrg.size() - start_trg;
+  prob_t uniform_src_alignment; uniform_src_alignment.logeq(-log(flen + 1));
+  prob_t p;
+  p.logeq(log_poisson(flen, 1.0));               // flen                 ~Pois(1)
+                                                 // elen | flen          ~Pois(flen + 0.01)
+  prob_t ptrglen; ptrglen.logeq(log_poisson(elen, flen + 0.01));
+  p *= ptrglen;
+  p *= kUNIFORM_SOURCE.pow(flen);                // each f in F ~Uniform
+  for (int i = 0; i < elen; ++i) {               // for each position i in E
+    const WordID trg = vtrg[i + start_trg];
+    prob_t tp = prob_t::Zero();
+    for (int j = -1; j < flen; ++j) {
+      const WordID src = j < 0 ? 0 : vsrc[j + start_src];
+      tp += kM1MIXTURE * model1(src, trg);
+      tp += kUNIFORM_MIXTURE * kUNIFORM_TARGET;
+    }
+    tp *= uniform_src_alignment;                 //     draw a_i         ~uniform
+    p *= tp;                                     //     draw e_i         ~Model1(f_a_i) / uniform
+  }
+  if (p.is_0()) {
+    cerr << "Zero! " << vsrc << "\nTRG=" << vtrg << endl;
+    abort();
+  }
+  return p;
+}
+
+prob_t PhraseJointBase_BiDir::p0(const vector<WordID>& vsrc,
+                                 const vector<WordID>& vtrg,
+                                 int start_src, int start_trg) const {
+  const int flen = vsrc.size() - start_src;
+  const int elen = vtrg.size() - start_trg;
+  prob_t uniform_src_alignment; uniform_src_alignment.logeq(-log(flen + 1));
+  prob_t uniform_trg_alignment; uniform_trg_alignment.logeq(-log(elen + 1));
+
+  prob_t p1;
+  p1.logeq(log_poisson(flen, 1.0));               // flen                 ~Pois(1)
+                                                 // elen | flen          ~Pois(flen + 0.01)
+  prob_t ptrglen; ptrglen.logeq(log_poisson(elen, flen + 0.01));
+  p1 *= ptrglen;
+  p1 *= kUNIFORM_SOURCE.pow(flen);                // each f in F ~Uniform
+  for (int i = 0; i < elen; ++i) {               // for each position i in E
+    const WordID trg = vtrg[i + start_trg];
+    prob_t tp = prob_t::Zero();
+    for (int j = -1; j < flen; ++j) {
+      const WordID src = j < 0 ? 0 : vsrc[j + start_src];
+      tp += kM1MIXTURE * model1(src, trg);
+      tp += kUNIFORM_MIXTURE * kUNIFORM_TARGET;
+    }
+    tp *= uniform_src_alignment;                 //     draw a_i         ~uniform
+    p1 *= tp;                                     //     draw e_i         ~Model1(f_a_i) / uniform
+  }
+  if (p1.is_0()) {
+    cerr << "Zero! " << vsrc << "\nTRG=" << vtrg << endl;
+    abort();
+  }
+
+  prob_t p2;
+  p2.logeq(log_poisson(elen, 1.0));               // elen                 ~Pois(1)
+                                                 // flen | elen          ~Pois(flen + 0.01)
+  prob_t psrclen; psrclen.logeq(log_poisson(flen, elen + 0.01));
+  p2 *= psrclen;
+  p2 *= kUNIFORM_TARGET.pow(elen);                // each f in F ~Uniform
+  for (int i = 0; i < flen; ++i) {               // for each position i in E
+    const WordID src = vsrc[i + start_src];
+    prob_t tp = prob_t::Zero();
+    for (int j = -1; j < elen; ++j) {
+      const WordID trg = j < 0 ? 0 : vtrg[j + start_trg];
+      tp += kM1MIXTURE * invmodel1(trg, src);
+      tp += kUNIFORM_MIXTURE * kUNIFORM_SOURCE;
+    }
+    tp *= uniform_trg_alignment;                 //     draw a_i         ~uniform
+    p2 *= tp;                                     //     draw e_i         ~Model1(f_a_i) / uniform
+  }
+  if (p2.is_0()) {
+    cerr << "Zero! " << vsrc << "\nTRG=" << vtrg << endl;
+    abort();
+  }
+
+  static const prob_t kHALF(0.5);
+  return (p1 + p2) * kHALF;
+}
+
+JumpBase::JumpBase() : p(200) {
+  for (unsigned src_len = 1; src_len < 200; ++src_len) {
+    map<int, prob_t>& cpd = p[src_len];
+    int min_jump = 1 - src_len;
+    int max_jump = src_len;
+    prob_t z;
+    for (int j = min_jump; j <= max_jump; ++j) {
+      prob_t& cp = cpd[j];
+      if (j < 0)
+        cp.logeq(log_poisson(1.5-j, 1));
+      else if (j > 0)
+        cp.logeq(log_poisson(j, 1));
+      cp.poweq(0.2);
+      z += cp;
+    }
+    for (int j = min_jump; j <= max_jump; ++j) {
+      cpd[j] /= z;
+    }
+  }
+}
+
diff --git a/gi/pf/base_distributions.h b/gi/pf/base_distributions.h
new file mode 100644
index 00000000..a23ac32b
--- /dev/null
+++ b/gi/pf/base_distributions.h
@@ -0,0 +1,261 @@
+#ifndef _BASE_MEASURES_H_
+#define _BASE_MEASURES_H_
+
+#include <vector>
+#include <map>
+#include <string>
+#include <cmath>
+#include <iostream>
+#include <cassert>
+
+#include "unigrams.h"
+#include "trule.h"
+#include "prob.h"
+#include "tdict.h"
+#include "sampler.h"
+
+inline double log_poisson(unsigned x, const double& lambda) {
+  assert(lambda > 0.0);
+  return log(lambda) * x - lgamma(x + 1) - lambda;
+}
+
+inline double log_binom_coeff(unsigned n, unsigned k) {
+  assert(n >= k);
+  if (n == k) return 0.0;
+  return lgamma(n + 1) - lgamma(k + 1) - lgamma(n - k + 1);
+}
+
+// http://en.wikipedia.org/wiki/Negative_binomial_distribution
+inline double log_negative_binom(unsigned x, unsigned r, double p) {
+  assert(p > 0.0);
+  assert(p < 1.0);
+  return log_binom_coeff(x + r - 1, x) + r * log(1 - p) + x * log(p);
+}
+
+inline std::ostream& operator<<(std::ostream& os, const std::vector<WordID>& p) {
+  os << '[';
+  for (int i = 0; i < p.size(); ++i)
+    os << (i==0 ? "" : " ") << TD::Convert(p[i]);
+  return os << ']';
+}
+
+struct Model1 {
+  explicit Model1(const std::string& fname) :
+      kNULL(TD::Convert("<eps>")),
+      kZERO() {
+    LoadModel1(fname);
+  }
+
+  void LoadModel1(const std::string& fname);
+
+  // returns prob 0 if src or trg is not found
+  const prob_t& operator()(WordID src, WordID trg) const {
+    if (src == 0) src = kNULL;
+    if (src < ttable.size()) {
+      const std::map<WordID, prob_t>& cpd = ttable[src];
+      const std::map<WordID, prob_t>::const_iterator it = cpd.find(trg);
+      if (it != cpd.end())
+        return it->second;
+    }
+    return kZERO;
+  }
+
+  const WordID kNULL;
+  const prob_t kZERO;
+  std::vector<std::map<WordID, prob_t> > ttable;
+};
+
+struct PoissonUniformUninformativeBase {
+  explicit PoissonUniformUninformativeBase(const unsigned ves) : kUNIFORM(1.0 / ves) {}
+  prob_t operator()(const TRule& r) const {
+    prob_t p; p.logeq(log_poisson(r.e_.size(), 1.0));
+    prob_t q = kUNIFORM; q.poweq(r.e_.size());
+    p *= q;
+    return p;
+  }
+  void Summary() const {}
+  void ResampleHyperparameters(MT19937*) {}
+  void Increment(const TRule&) {}
+  void Decrement(const TRule&) {}
+  prob_t Likelihood() const { return prob_t::One(); }
+  const prob_t kUNIFORM;
+};
+
+struct CompletelyUniformBase {
+  explicit CompletelyUniformBase(const unsigned ves) : kUNIFORM(1.0 / ves) {}
+  prob_t operator()(const TRule&) const {
+    return kUNIFORM;
+  }
+  void Summary() const {}
+  void ResampleHyperparameters(MT19937*) {}
+  void Increment(const TRule&) {}
+  void Decrement(const TRule&) {}
+  prob_t Likelihood() const { return prob_t::One(); }
+  const prob_t kUNIFORM;
+};
+
+struct UnigramWordBase {
+  explicit UnigramWordBase(const std::string& fname) : un(fname) {}
+  prob_t operator()(const TRule& r) const {
+    return un(r.e_);
+  }
+  const UnigramWordModel un;
+};
+
+struct RuleHasher {
+  size_t operator()(const TRule& r) const {
+    return hash_value(r);
+  }
+};
+
+struct TableLookupBase {
+  TableLookupBase(const std::string& fname);
+
+  prob_t operator()(const TRule& rule) const {
+    const std::tr1::unordered_map<TRule,prob_t>::const_iterator it = table.find(rule);
+    if (it == table.end()) {
+      std::cerr << rule << " not found\n";
+      abort();
+    }
+    return it->second;
+  }
+
+  void ResampleHyperparameters(MT19937*) {}
+  void Increment(const TRule&) {}
+  void Decrement(const TRule&) {}
+  prob_t Likelihood() const { return prob_t::One(); }
+  void Summary() const {}
+
+  std::tr1::unordered_map<TRule,prob_t,RuleHasher> table;
+};
+
+struct PhraseConditionalUninformativeBase {
+  explicit PhraseConditionalUninformativeBase(const unsigned vocab_e_size) :
+      kUNIFORM_TARGET(1.0 / vocab_e_size) {
+    assert(vocab_e_size > 0);
+  }
+
+  // return p0 of rule.e_ | rule.f_
+  prob_t operator()(const TRule& rule) const {
+    return p0(rule.f_, rule.e_, 0, 0);
+  }
+
+  prob_t p0(const std::vector<WordID>& vsrc, const std::vector<WordID>& vtrg, int start_src, int start_trg) const;
+
+  void Summary() const {}
+  void ResampleHyperparameters(MT19937*) {}
+  void Increment(const TRule&) {}
+  void Decrement(const TRule&) {}
+  prob_t Likelihood() const { return prob_t::One(); }
+  const prob_t kUNIFORM_TARGET;
+};
+
+struct PhraseConditionalUninformativeUnigramBase {
+  explicit PhraseConditionalUninformativeUnigramBase(const std::string& file, const unsigned vocab_e_size) : u(file, vocab_e_size) {}
+
+  // return p0 of rule.e_ | rule.f_
+  prob_t operator()(const TRule& rule) const {
+    return p0(rule.f_, rule.e_, 0, 0);
+  }
+
+  prob_t p0(const std::vector<WordID>& vsrc, const std::vector<WordID>& vtrg, int start_src, int start_trg) const;
+
+  const UnigramModel u;
+};
+
+struct PhraseConditionalBase {
+  explicit PhraseConditionalBase(const Model1& m1, const double m1mixture, const unsigned vocab_e_size) :
+      model1(m1),
+      kM1MIXTURE(m1mixture),
+      kUNIFORM_MIXTURE(1.0 - m1mixture),
+      kUNIFORM_TARGET(1.0 / vocab_e_size) {
+    assert(m1mixture >= 0.0 && m1mixture <= 1.0);
+    assert(vocab_e_size > 0);
+  }
+
+  // return p0 of rule.e_ | rule.f_
+  prob_t operator()(const TRule& rule) const {
+    return p0(rule.f_, rule.e_, 0, 0);
+  }
+
+  prob_t p0(const std::vector<WordID>& vsrc, const std::vector<WordID>& vtrg, int start_src, int start_trg) const;
+
+  const Model1& model1;
+  const prob_t kM1MIXTURE;  // Model 1 mixture component
+  const prob_t kUNIFORM_MIXTURE; // uniform mixture component
+  const prob_t kUNIFORM_TARGET;
+};
+
+struct PhraseJointBase {
+  explicit PhraseJointBase(const Model1& m1, const double m1mixture, const unsigned vocab_e_size, const unsigned vocab_f_size) :
+      model1(m1),
+      kM1MIXTURE(m1mixture),
+      kUNIFORM_MIXTURE(1.0 - m1mixture),
+      kUNIFORM_SOURCE(1.0 / vocab_f_size),
+      kUNIFORM_TARGET(1.0 / vocab_e_size) {
+    assert(m1mixture >= 0.0 && m1mixture <= 1.0);
+    assert(vocab_e_size > 0);
+  }
+
+  // return p0 of rule.e_ , rule.f_
+  prob_t operator()(const TRule& rule) const {
+    return p0(rule.f_, rule.e_, 0, 0);
+  }
+
+  prob_t p0(const std::vector<WordID>& vsrc, const std::vector<WordID>& vtrg, int start_src, int start_trg) const;
+
+  const Model1& model1;
+  const prob_t kM1MIXTURE;  // Model 1 mixture component
+  const prob_t kUNIFORM_MIXTURE; // uniform mixture component
+  const prob_t kUNIFORM_SOURCE;
+  const prob_t kUNIFORM_TARGET;
+};
+
+struct PhraseJointBase_BiDir {
+  explicit PhraseJointBase_BiDir(const Model1& m1,
+                                 const Model1& im1,
+                                 const double m1mixture,
+                                 const unsigned vocab_e_size,
+                                 const unsigned vocab_f_size) :
+      model1(m1),
+      invmodel1(im1),
+      kM1MIXTURE(m1mixture),
+      kUNIFORM_MIXTURE(1.0 - m1mixture),
+      kUNIFORM_SOURCE(1.0 / vocab_f_size),
+      kUNIFORM_TARGET(1.0 / vocab_e_size) {
+    assert(m1mixture >= 0.0 && m1mixture <= 1.0);
+    assert(vocab_e_size > 0);
+  }
+
+  // return p0 of rule.e_ , rule.f_
+  prob_t operator()(const TRule& rule) const {
+    return p0(rule.f_, rule.e_, 0, 0);
+  }
+
+  prob_t p0(const std::vector<WordID>& vsrc, const std::vector<WordID>& vtrg, int start_src, int start_trg) const;
+
+  const Model1& model1;
+  const Model1& invmodel1;
+  const prob_t kM1MIXTURE;  // Model 1 mixture component
+  const prob_t kUNIFORM_MIXTURE; // uniform mixture component
+  const prob_t kUNIFORM_SOURCE;
+  const prob_t kUNIFORM_TARGET;
+};
+
+// base distribution for jump size multinomials
+// basically p(0) = 0 and then, p(1) is max, and then
+// you drop as you move to the max jump distance
+struct JumpBase {
+  JumpBase();
+
+  const prob_t& operator()(int jump, unsigned src_len) const {
+    assert(jump != 0);
+    const std::map<int, prob_t>::const_iterator it = p[src_len].find(jump);
+    assert(it != p[src_len].end());
+    return it->second;
+  }
+  std::vector<std::map<int, prob_t> > p;
+};
+
+
+#endif
diff --git a/gi/pf/base_measures.cc b/gi/pf/base_measures.cc
deleted file mode 100644
index 4b1863fa..00000000
--- a/gi/pf/base_measures.cc
+++ /dev/null
@@ -1,241 +0,0 @@
-#include "base_measures.h"
-
-#include <iostream>
-
-#include "filelib.h"
-
-using namespace std;
-
-TableLookupBase::TableLookupBase(const string& fname) {
-  cerr << "TableLookupBase reading from " << fname << " ..." << endl;
-  ReadFile rf(fname);
-  istream& in = *rf.stream();
-  string line;
-  unsigned lc = 0;
-  const WordID kDIV = TD::Convert("|||");
-  vector<WordID> tmp;
-  vector<int> le, lf;
-  TRule x;
-  x.lhs_ = -TD::Convert("X");
-  bool flag = false;
-  while(getline(in, line)) {
-    ++lc;
-    if (lc % 1000000 == 0) { cerr << " [" << lc << ']' << endl; flag = false; }
-    else if (lc % 25000 == 0) { cerr << '.' << flush; flag = true; }
-    tmp.clear();
-    TD::ConvertSentence(line, &tmp);
-    x.f_.clear();
-    x.e_.clear();
-    size_t pos = 0;
-    int cc = 0;
-    while(pos < tmp.size()) {
-      const WordID cur = tmp[pos++];
-      if (cur == kDIV) {
-        ++cc;
-      } else if (cc == 0) {
-        x.f_.push_back(cur);    
-      } else if (cc == 1) {
-        x.e_.push_back(cur);
-      } else if (cc == 2) {
-        table[x].logeq(atof(TD::Convert(cur)));
-        ++cc;
-      } else {
-        if (flag) cerr << endl;
-        cerr << "Bad format in " << lc << ": " << line << endl; abort();
-      }
-    }
-    if (cc != 3) {
-      if (flag) cerr << endl;
-      cerr << "Bad format in " << lc << ": " << line << endl; abort();
-    }
-  }
-  if (flag) cerr << endl;
-  cerr << " read " << lc << " entries\n";
-}
-
-prob_t PhraseConditionalUninformativeUnigramBase::p0(const vector<WordID>& vsrc,
-                                                     const vector<WordID>& vtrg,
-                                                     int start_src, int start_trg) const {
-  const int flen = vsrc.size() - start_src;
-  const int elen = vtrg.size() - start_trg;
-  prob_t p;
-  p.logeq(log_poisson(elen, flen + 0.01));       // elen | flen          ~Pois(flen + 0.01)
-  //p.logeq(log_poisson(elen, 1));       // elen | flen          ~Pois(flen + 0.01)
-  for (int i = 0; i < elen; ++i)
-    p *= u(vtrg[i + start_trg]);                        // draw e_i             ~Uniform
-  return p;
-}
-
-prob_t PhraseConditionalUninformativeBase::p0(const vector<WordID>& vsrc,
-                                              const vector<WordID>& vtrg,
-                                              int start_src, int start_trg) const {
-  const int flen = vsrc.size() - start_src;
-  const int elen = vtrg.size() - start_trg;
-  prob_t p;
-  //p.logeq(log_poisson(elen, flen + 0.01));       // elen | flen          ~Pois(flen + 0.01)
-  p.logeq(log_poisson(elen, 1));       // elen | flen          ~Pois(flen + 0.01)
-  for (int i = 0; i < elen; ++i)
-    p *= kUNIFORM_TARGET;                        // draw e_i             ~Uniform
-  return p;
-}
-
-void Model1::LoadModel1(const string& fname) {
-  cerr << "Loading Model 1 parameters from " << fname << " ..." << endl;
-  ReadFile rf(fname);
-  istream& in = *rf.stream();
-  string line;
-  unsigned lc = 0;
-  while(getline(in, line)) {
-    ++lc;
-    int cur = 0;
-    int start = 0;
-    while(cur < line.size() && line[cur] != ' ') { ++cur; }
-    assert(cur != line.size());
-    line[cur] = 0;
-    const WordID src = TD::Convert(&line[0]);
-    ++cur;
-    start = cur;
-    while(cur < line.size() && line[cur] != ' ') { ++cur; }
-    assert(cur != line.size());
-    line[cur] = 0;
-    WordID trg = TD::Convert(&line[start]);
-    const double logprob = strtod(&line[cur + 1], NULL);
-    if (src >= ttable.size()) ttable.resize(src + 1);
-    ttable[src][trg].logeq(logprob);
-  }
-  cerr << "  read " << lc << " parameters.\n";
-}
-
-prob_t PhraseConditionalBase::p0(const vector<WordID>& vsrc,
-                                 const vector<WordID>& vtrg,
-                                 int start_src, int start_trg) const {
-  const int flen = vsrc.size() - start_src;
-  const int elen = vtrg.size() - start_trg;
-  prob_t uniform_src_alignment; uniform_src_alignment.logeq(-log(flen + 1));
-  prob_t p;
-  p.logeq(log_poisson(elen, flen + 0.01));       // elen | flen          ~Pois(flen + 0.01)
-  for (int i = 0; i < elen; ++i) {               // for each position i in e-RHS
-    const WordID trg = vtrg[i + start_trg];
-    prob_t tp = prob_t::Zero();
-    for (int j = -1; j < flen; ++j) {
-      const WordID src = j < 0 ? 0 : vsrc[j + start_src];
-      tp += kM1MIXTURE * model1(src, trg);
-      tp += kUNIFORM_MIXTURE * kUNIFORM_TARGET;
-    }
-    tp *= uniform_src_alignment;                 //     draw a_i         ~uniform
-    p *= tp;                                     //     draw e_i         ~Model1(f_a_i) / uniform
-  }
-  if (p.is_0()) {
-    cerr << "Zero! " << vsrc << "\nTRG=" << vtrg << endl;
-    abort();
-  }
-  return p;
-}
-
-prob_t PhraseJointBase::p0(const vector<WordID>& vsrc,
-                           const vector<WordID>& vtrg,
-                           int start_src, int start_trg) const {
-  const int flen = vsrc.size() - start_src;
-  const int elen = vtrg.size() - start_trg;
-  prob_t uniform_src_alignment; uniform_src_alignment.logeq(-log(flen + 1));
-  prob_t p;
-  p.logeq(log_poisson(flen, 1.0));               // flen                 ~Pois(1)
-                                                 // elen | flen          ~Pois(flen + 0.01)
-  prob_t ptrglen; ptrglen.logeq(log_poisson(elen, flen + 0.01));
-  p *= ptrglen;
-  p *= kUNIFORM_SOURCE.pow(flen);                // each f in F ~Uniform
-  for (int i = 0; i < elen; ++i) {               // for each position i in E
-    const WordID trg = vtrg[i + start_trg];
-    prob_t tp = prob_t::Zero();
-    for (int j = -1; j < flen; ++j) {
-      const WordID src = j < 0 ? 0 : vsrc[j + start_src];
-      tp += kM1MIXTURE * model1(src, trg);
-      tp += kUNIFORM_MIXTURE * kUNIFORM_TARGET;
-    }
-    tp *= uniform_src_alignment;                 //     draw a_i         ~uniform
-    p *= tp;                                     //     draw e_i         ~Model1(f_a_i) / uniform
-  }
-  if (p.is_0()) {
-    cerr << "Zero! " << vsrc << "\nTRG=" << vtrg << endl;
-    abort();
-  }
-  return p;
-}
-
-prob_t PhraseJointBase_BiDir::p0(const vector<WordID>& vsrc,
-                                 const vector<WordID>& vtrg,
-                                 int start_src, int start_trg) const {
-  const int flen = vsrc.size() - start_src;
-  const int elen = vtrg.size() - start_trg;
-  prob_t uniform_src_alignment; uniform_src_alignment.logeq(-log(flen + 1));
-  prob_t uniform_trg_alignment; uniform_trg_alignment.logeq(-log(elen + 1));
-
-  prob_t p1;
-  p1.logeq(log_poisson(flen, 1.0));               // flen                 ~Pois(1)
-                                                 // elen | flen          ~Pois(flen + 0.01)
-  prob_t ptrglen; ptrglen.logeq(log_poisson(elen, flen + 0.01));
-  p1 *= ptrglen;
-  p1 *= kUNIFORM_SOURCE.pow(flen);                // each f in F ~Uniform
-  for (int i = 0; i < elen; ++i) {               // for each position i in E
-    const WordID trg = vtrg[i + start_trg];
-    prob_t tp = prob_t::Zero();
-    for (int j = -1; j < flen; ++j) {
-      const WordID src = j < 0 ? 0 : vsrc[j + start_src];
-      tp += kM1MIXTURE * model1(src, trg);
-      tp += kUNIFORM_MIXTURE * kUNIFORM_TARGET;
-    }
-    tp *= uniform_src_alignment;                 //     draw a_i         ~uniform
-    p1 *= tp;                                     //     draw e_i         ~Model1(f_a_i) / uniform
-  }
-  if (p1.is_0()) {
-    cerr << "Zero! " << vsrc << "\nTRG=" << vtrg << endl;
-    abort();
-  }
-
-  prob_t p2;
-  p2.logeq(log_poisson(elen, 1.0));               // elen                 ~Pois(1)
-                                                 // flen | elen          ~Pois(flen + 0.01)
-  prob_t psrclen; psrclen.logeq(log_poisson(flen, elen + 0.01));
-  p2 *= psrclen;
-  p2 *= kUNIFORM_TARGET.pow(elen);                // each f in F ~Uniform
-  for (int i = 0; i < flen; ++i) {               // for each position i in E
-    const WordID src = vsrc[i + start_src];
-    prob_t tp = prob_t::Zero();
-    for (int j = -1; j < elen; ++j) {
-      const WordID trg = j < 0 ? 0 : vtrg[j + start_trg];
-      tp += kM1MIXTURE * invmodel1(trg, src);
-      tp += kUNIFORM_MIXTURE * kUNIFORM_SOURCE;
-    }
-    tp *= uniform_trg_alignment;                 //     draw a_i         ~uniform
-    p2 *= tp;                                     //     draw e_i         ~Model1(f_a_i) / uniform
-  }
-  if (p2.is_0()) {
-    cerr << "Zero! " << vsrc << "\nTRG=" << vtrg << endl;
-    abort();
-  }
-
-  static const prob_t kHALF(0.5);
-  return (p1 + p2) * kHALF;
-}
-
-JumpBase::JumpBase() : p(200) {
-  for (unsigned src_len = 1; src_len < 200; ++src_len) {
-    map<int, prob_t>& cpd = p[src_len];
-    int min_jump = 1 - src_len;
-    int max_jump = src_len;
-    prob_t z;
-    for (int j = min_jump; j <= max_jump; ++j) {
-      prob_t& cp = cpd[j];
-      if (j < 0)
-        cp.logeq(log_poisson(1.5-j, 1));
-      else if (j > 0)
-        cp.logeq(log_poisson(j, 1));
-      cp.poweq(0.2);
-      z += cp;
-    }
-    for (int j = min_jump; j <= max_jump; ++j) {
-      cpd[j] /= z;
-    }
-  }
-}
-
diff --git a/gi/pf/base_measures.h b/gi/pf/base_measures.h
deleted file mode 100644
index b0495bfd..00000000
--- a/gi/pf/base_measures.h
+++ /dev/null
@@ -1,247 +0,0 @@
-#ifndef _BASE_MEASURES_H_
-#define _BASE_MEASURES_H_
-
-#include <vector>
-#include <map>
-#include <string>
-#include <cmath>
-#include <iostream>
-
-#include "unigrams.h"
-#include "trule.h"
-#include "prob.h"
-#include "tdict.h"
-#include "sampler.h"
-
-inline double log_poisson(unsigned x, const double& lambda) {
-  assert(lambda > 0.0);
-  return log(lambda) * x - lgamma(x + 1) - lambda;
-}
-
-inline std::ostream& operator<<(std::ostream& os, const std::vector<WordID>& p) {
-  os << '[';
-  for (int i = 0; i < p.size(); ++i)
-    os << (i==0 ? "" : " ") << TD::Convert(p[i]);
-  return os << ']';
-}
-
-struct Model1 {
-  explicit Model1(const std::string& fname) :
-      kNULL(TD::Convert("<eps>")),
-      kZERO() {
-    LoadModel1(fname);
-  }
-
-  void LoadModel1(const std::string& fname);
-
-  // returns prob 0 if src or trg is not found
-  const prob_t& operator()(WordID src, WordID trg) const {
-    if (src == 0) src = kNULL;
-    if (src < ttable.size()) {
-      const std::map<WordID, prob_t>& cpd = ttable[src];
-      const std::map<WordID, prob_t>::const_iterator it = cpd.find(trg);
-      if (it != cpd.end())
-        return it->second;
-    }
-    return kZERO;
-  }
-
-  const WordID kNULL;
-  const prob_t kZERO;
-  std::vector<std::map<WordID, prob_t> > ttable;
-};
-
-struct PoissonUniformUninformativeBase {
-  explicit PoissonUniformUninformativeBase(const unsigned ves) : kUNIFORM(1.0 / ves) {}
-  prob_t operator()(const TRule& r) const {
-    prob_t p; p.logeq(log_poisson(r.e_.size(), 1.0));
-    prob_t q = kUNIFORM; q.poweq(r.e_.size());
-    p *= q;
-    return p;
-  }
-  void Summary() const {}
-  void ResampleHyperparameters(MT19937*) {}
-  void Increment(const TRule&) {}
-  void Decrement(const TRule&) {}
-  prob_t Likelihood() const { return prob_t::One(); }
-  const prob_t kUNIFORM;
-};
-
-struct CompletelyUniformBase {
-  explicit CompletelyUniformBase(const unsigned ves) : kUNIFORM(1.0 / ves) {}
-  prob_t operator()(const TRule&) const {
-    return kUNIFORM;
-  }
-  void Summary() const {}
-  void ResampleHyperparameters(MT19937*) {}
-  void Increment(const TRule&) {}
-  void Decrement(const TRule&) {}
-  prob_t Likelihood() const { return prob_t::One(); }
-  const prob_t kUNIFORM;
-};
-
-struct UnigramWordBase {
-  explicit UnigramWordBase(const std::string& fname) : un(fname) {}
-  prob_t operator()(const TRule& r) const {
-    return un(r.e_);
-  }
-  const UnigramWordModel un;
-};
-
-struct RuleHasher {
-  size_t operator()(const TRule& r) const {
-    return hash_value(r);
-  }
-};
-
-struct TableLookupBase {
-  TableLookupBase(const std::string& fname);
-
-  prob_t operator()(const TRule& rule) const {
-    const std::tr1::unordered_map<TRule,prob_t>::const_iterator it = table.find(rule);
-    if (it == table.end()) {
-      std::cerr << rule << " not found\n";
-      abort();
-    }
-    return it->second;
-  }
-
-  void ResampleHyperparameters(MT19937*) {}
-  void Increment(const TRule&) {}
-  void Decrement(const TRule&) {}
-  prob_t Likelihood() const { return prob_t::One(); }
-  void Summary() const {}
-
-  std::tr1::unordered_map<TRule,prob_t,RuleHasher> table;
-};
-
-struct PhraseConditionalUninformativeBase {
-  explicit PhraseConditionalUninformativeBase(const unsigned vocab_e_size) :
-      kUNIFORM_TARGET(1.0 / vocab_e_size) {
-    assert(vocab_e_size > 0);
-  }
-
-  // return p0 of rule.e_ | rule.f_
-  prob_t operator()(const TRule& rule) const {
-    return p0(rule.f_, rule.e_, 0, 0);
-  }
-
-  prob_t p0(const std::vector<WordID>& vsrc, const std::vector<WordID>& vtrg, int start_src, int start_trg) const;
-
-  void Summary() const {}
-  void ResampleHyperparameters(MT19937*) {}
-  void Increment(const TRule&) {}
-  void Decrement(const TRule&) {}
-  prob_t Likelihood() const { return prob_t::One(); }
-  const prob_t kUNIFORM_TARGET;
-};
-
-struct PhraseConditionalUninformativeUnigramBase {
-  explicit PhraseConditionalUninformativeUnigramBase(const std::string& file, const unsigned vocab_e_size) : u(file, vocab_e_size) {}
-
-  // return p0 of rule.e_ | rule.f_
-  prob_t operator()(const TRule& rule) const {
-    return p0(rule.f_, rule.e_, 0, 0);
-  }
-
-  prob_t p0(const std::vector<WordID>& vsrc, const std::vector<WordID>& vtrg, int start_src, int start_trg) const;
-
-  const UnigramModel u;
-};
-
-struct PhraseConditionalBase {
-  explicit PhraseConditionalBase(const Model1& m1, const double m1mixture, const unsigned vocab_e_size) :
-      model1(m1),
-      kM1MIXTURE(m1mixture),
-      kUNIFORM_MIXTURE(1.0 - m1mixture),
-      kUNIFORM_TARGET(1.0 / vocab_e_size) {
-    assert(m1mixture >= 0.0 && m1mixture <= 1.0);
-    assert(vocab_e_size > 0);
-  }
-
-  // return p0 of rule.e_ | rule.f_
-  prob_t operator()(const TRule& rule) const {
-    return p0(rule.f_, rule.e_, 0, 0);
-  }
-
-  prob_t p0(const std::vector<WordID>& vsrc, const std::vector<WordID>& vtrg, int start_src, int start_trg) const;
-
-  const Model1& model1;
-  const prob_t kM1MIXTURE;  // Model 1 mixture component
-  const prob_t kUNIFORM_MIXTURE; // uniform mixture component
-  const prob_t kUNIFORM_TARGET;
-};
-
-struct PhraseJointBase {
-  explicit PhraseJointBase(const Model1& m1, const double m1mixture, const unsigned vocab_e_size, const unsigned vocab_f_size) :
-      model1(m1),
-      kM1MIXTURE(m1mixture),
-      kUNIFORM_MIXTURE(1.0 - m1mixture),
-      kUNIFORM_SOURCE(1.0 / vocab_f_size),
-      kUNIFORM_TARGET(1.0 / vocab_e_size) {
-    assert(m1mixture >= 0.0 && m1mixture <= 1.0);
-    assert(vocab_e_size > 0);
-  }
-
-  // return p0 of rule.e_ , rule.f_
-  prob_t operator()(const TRule& rule) const {
-    return p0(rule.f_, rule.e_, 0, 0);
-  }
-
-  prob_t p0(const std::vector<WordID>& vsrc, const std::vector<WordID>& vtrg, int start_src, int start_trg) const;
-
-  const Model1& model1;
-  const prob_t kM1MIXTURE;  // Model 1 mixture component
-  const prob_t kUNIFORM_MIXTURE; // uniform mixture component
-  const prob_t kUNIFORM_SOURCE;
-  const prob_t kUNIFORM_TARGET;
-};
-
-struct PhraseJointBase_BiDir {
-  explicit PhraseJointBase_BiDir(const Model1& m1,
-                                 const Model1& im1,
-                                 const double m1mixture,
-                                 const unsigned vocab_e_size,
-                                 const unsigned vocab_f_size) :
-      model1(m1),
-      invmodel1(im1),
-      kM1MIXTURE(m1mixture),
-      kUNIFORM_MIXTURE(1.0 - m1mixture),
-      kUNIFORM_SOURCE(1.0 / vocab_f_size),
-      kUNIFORM_TARGET(1.0 / vocab_e_size) {
-    assert(m1mixture >= 0.0 && m1mixture <= 1.0);
-    assert(vocab_e_size > 0);
-  }
-
-  // return p0 of rule.e_ , rule.f_
-  prob_t operator()(const TRule& rule) const {
-    return p0(rule.f_, rule.e_, 0, 0);
-  }
-
-  prob_t p0(const std::vector<WordID>& vsrc, const std::vector<WordID>& vtrg, int start_src, int start_trg) const;
-
-  const Model1& model1;
-  const Model1& invmodel1;
-  const prob_t kM1MIXTURE;  // Model 1 mixture component
-  const prob_t kUNIFORM_MIXTURE; // uniform mixture component
-  const prob_t kUNIFORM_SOURCE;
-  const prob_t kUNIFORM_TARGET;
-};
-
-// base distribution for jump size multinomials
-// basically p(0) = 0 and then, p(1) is max, and then
-// you drop as you move to the max jump distance
-struct JumpBase {
-  JumpBase();
-
-  const prob_t& operator()(int jump, unsigned src_len) const {
-    assert(jump != 0);
-    const std::map<int, prob_t>::const_iterator it = p[src_len].find(jump);
-    assert(it != p[src_len].end());
-    return it->second;
-  }
-  std::vector<std::map<int, prob_t> > p;
-};
-
-
-#endif
diff --git a/mteval/ns.cc b/mteval/ns.cc
index 68c8deaa..da678b84 100644
--- a/mteval/ns.cc
+++ b/mteval/ns.cc
@@ -136,6 +136,10 @@ struct BleuSegmentEvaluator : public SegmentEvaluator {
                          float* correct,  // N elements reserved
                          float* hyp,      // N elements reserved
                          bool clip_counts = true) const {
+    // clear clipping stats
+    for (typename NGramCountMap::iterator it = ngrams_.begin(); it != ngrams_.end(); ++it)
+      it->second.second = 0;
+
     vector<WordID> ngram(N);
     *correct *= 0;
     *hyp *= 0;
diff --git a/mteval/ns.h b/mteval/ns.h
index 622265db..d88c263b 100644
--- a/mteval/ns.h
+++ b/mteval/ns.h
@@ -6,6 +6,7 @@
 #include <map>
 #include <boost/shared_ptr.hpp>
 #include "wordid.h"
+#include <iostream>
 
 class SufficientStats {
  public:
@@ -43,6 +44,11 @@ class SufficientStats {
   bool operator==(const SufficientStats& other) const {
     return other.fields == fields;
   }
+  bool IsAdditiveIdentity() const {
+    for (unsigned i = 0; i < fields.size(); ++i)
+      if (fields[i]) return false;
+    return true;
+  }
   size_t size() const { return fields.size(); }
   float operator[](size_t i) const {
     if (i < fields.size()) return fields[i];
@@ -54,12 +60,12 @@ class SufficientStats {
   std::vector<float> fields;
 };
 
-inline const SufficientStats& operator+(const SufficientStats& a, const SufficientStats& b) {
+inline const SufficientStats operator+(const SufficientStats& a, const SufficientStats& b) {
   SufficientStats res(a);
   return res += b;
 }
 
-inline const SufficientStats& operator-(const SufficientStats& a, const SufficientStats& b) {
+inline const SufficientStats operator-(const SufficientStats& a, const SufficientStats& b) {
   SufficientStats res(a);
   return res -= b;
 }
diff --git a/vest/ces.cc b/vest/ces.cc
index 4ae6b695..cd89aa69 100644
--- a/vest/ces.cc
+++ b/vest/ces.cc
@@ -4,25 +4,32 @@
 #include <sstream>
 #include <boost/shared_ptr.hpp>
 
-#include "aligner.h"
+// TODO, if AER is to be optimized again, we will need this
+// #include "aligner.h"
 #include "lattice.h"
 #include "viterbi_envelope.h"
 #include "error_surface.h"
+#include "ns.h"
 
 using boost::shared_ptr;
 using namespace std;
 
 const bool minimize_segments = true;    // if adjacent segments have equal scores, merge them
 
-void ComputeErrorSurface(const SentenceScorer& ss, const ViterbiEnvelope& ve, ErrorSurface* env, const ScoreType type, const Hypergraph& hg) {
+void ComputeErrorSurface(const SegmentEvaluator& ss,
+                         const ViterbiEnvelope& ve,
+                         ErrorSurface* env,
+                         const EvaluationMetric* metric,
+                         const Hypergraph& hg) {
   vector<WordID> prev_trans;
   const vector<shared_ptr<Segment> >& ienv = ve.GetSortedSegs();
   env->resize(ienv.size());
-  ScoreP prev_score;
+  SufficientStats prev_score; // defaults to 0
   int j = 0;
   for (int i = 0; i < ienv.size(); ++i) {
     const Segment& seg = *ienv[i];
     vector<WordID> trans;
+#if 0
     if (type == AER) {
       vector<bool> edges(hg.edges_.size(), false);
       seg.CollectEdgesUsed(&edges);  // get the set of edges in the viterbi
@@ -46,34 +53,31 @@ void ComputeErrorSurface(const SentenceScorer& ss, const ViterbiEnvelope& ve, Er
       string tstr = os.str();
       TD::ConvertSentence(tstr.substr(tstr.rfind(" ||| ") + 5), &trans);
     } else {
+#endif
       seg.ConstructTranslation(&trans);
-    }
-    // cerr << "Scoring: " << TD::GetString(trans) << endl;
+    //}
+    //cerr << "Scoring: " << TD::GetString(trans) << endl;
     if (trans == prev_trans) {
       if (!minimize_segments) {
-        assert(prev_score); // if this fails, it means
-	                    // the decoder can generate null translations
         ErrorSegment& out = (*env)[j];
-        out.delta = prev_score->GetZero();
+        out.delta.fields.clear();
         out.x = seg.x;
 	++j;
       }
-      // cerr << "Identical translation, skipping scoring\n";
+      //cerr << "Identical translation, skipping scoring\n";
     } else {
-      ScoreP score = ss.ScoreCandidate(trans);
+      SufficientStats score;
+      ss.Evaluate(trans, &score);
       // cerr << "score= " << score->ComputeScore() << "\n";
-      ScoreP cur_delta_p = score->GetZero();
-      Score* cur_delta = cur_delta_p.get();
-      // just record the score diffs
-      if (!prev_score)
-        prev_score = score->GetZero();
-
-      score->Subtract(*prev_score, cur_delta);
+      //string x1; score.Encode(&x1); cerr << "STATS: " << x1 << endl;
+      const SufficientStats delta = score - prev_score;
+      //string x2; delta.Encode(&x2); cerr << "DELTA: " << x2 << endl;
+      //string xx; delta.Encode(&xx); cerr << xx << endl;
       prev_trans.swap(trans);
       prev_score = score;
-      if ((!minimize_segments) || (!cur_delta->IsAdditiveIdentity())) {
+      if ((!minimize_segments) || (!delta.IsAdditiveIdentity())) {
         ErrorSegment& out = (*env)[j];
-        out.delta = cur_delta_p;
+        out.delta = delta;
         out.x = seg.x;
         ++j;
       }
diff --git a/vest/ces.h b/vest/ces.h
index 2f098990..e021e715 100644
--- a/vest/ces.h
+++ b/vest/ces.h
@@ -1,12 +1,16 @@
 #ifndef _CES_H_
 #define _CES_H_
 
-#include "scorer.h"
-
 class ViterbiEnvelope;
 class Hypergraph;
+class SegmentEvaluator;
 class ErrorSurface;
+class EvaluationMetric;
 
-void ComputeErrorSurface(const SentenceScorer& ss, const ViterbiEnvelope& ve, ErrorSurface* es, const ScoreType type, const Hypergraph& hg);
+void ComputeErrorSurface(const SegmentEvaluator& ss,
+                         const ViterbiEnvelope& ve,
+                         ErrorSurface* es,
+                         const EvaluationMetric* metric,
+                         const Hypergraph& hg);
 
 #endif
diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl
index c382a972..8cde748b 100755
--- a/vest/dist-vest.pl
+++ b/vest/dist-vest.pl
@@ -364,7 +364,7 @@ while (1){
 			$mapoutput =~ s/mapinput/mapoutput/;
 			push @mapoutputs, "$dir/splag.$im1/$mapoutput";
 			$o2i{"$dir/splag.$im1/$mapoutput"} = "$dir/splag.$im1/$shard";
-			my $script = "$MAPPER -s $srcFile -l $metric $refs_comma_sep < $dir/splag.$im1/$shard | sort -t \$'\\t' -k 1 > $dir/splag.$im1/$mapoutput";
+			my $script = "$MAPPER -s $srcFile -m $metric $refs_comma_sep < $dir/splag.$im1/$shard | sort -t \$'\\t' -k 1 > $dir/splag.$im1/$mapoutput";
 			if ($use_make) {
 				my $script_file = "$dir/scripts/map.$shard";
 				open F, ">$script_file" or die "Can't write $script_file: $!";
@@ -424,7 +424,7 @@ while (1){
 		print STDERR "Results for $tol/$til lines\n";
 		print STDERR "\nSORTING AND RUNNING VEST REDUCER\n";
 		print STDERR unchecked_output("date");
-		$cmd="sort -t \$'\\t' -k 1 @mapoutputs | $REDUCER -l $metric > $dir/redoutput.$im1";
+		$cmd="sort -t \$'\\t' -k 1 @mapoutputs | $REDUCER -m $metric > $dir/redoutput.$im1";
 		print STDERR "COMMAND:\n$cmd\n";
 		check_bash_call($cmd);
 		$cmd="sort -nk3 $DIR_FLAG '-t|' $dir/redoutput.$im1 | head -1";
diff --git a/vest/error_surface.cc b/vest/error_surface.cc
index 754aa8de..515b67f8 100644
--- a/vest/error_surface.cc
+++ b/vest/error_surface.cc
@@ -5,8 +5,7 @@
 
 using namespace std;
 
-ErrorSurface::~ErrorSurface() {
-}
+ErrorSurface::~ErrorSurface() {}
 
 void ErrorSurface::Serialize(std::string* out) const {
   const int segments = this->size();
@@ -15,8 +14,8 @@ void ErrorSurface::Serialize(std::string* out) const {
   for (int i = 0; i < segments; ++i) {
     const ErrorSegment& cur = (*this)[i];
     string senc;
-    cur.delta->Encode(&senc);
-    assert(senc.size() < 256);
+    cur.delta.Encode(&senc);
+    assert(senc.size() < 1024);
     unsigned char len = senc.size();
     os.write((const char*)&cur.x, sizeof(cur.x));
     os.write((const char*)&len, sizeof(len));
@@ -25,7 +24,7 @@ void ErrorSurface::Serialize(std::string* out) const {
   *out = os.str();
 }
 
-void ErrorSurface::Deserialize(ScoreType type, const std::string& in) {
+void ErrorSurface::Deserialize(const std::string& in) {
   istringstream is(in, ios::binary);
   int segments;
   is.read((char*)&segments, sizeof(segments));
@@ -37,7 +36,7 @@ void ErrorSurface::Deserialize(ScoreType type, const std::string& in) {
     is.read((char*)&len, sizeof(len));
     string senc(len, '\0'); assert(senc.size() == len);
     is.read((char*)&senc[0], len);
-    cur.delta = SentenceScorer::CreateScoreFromString(type, senc);
+    cur.delta = SufficientStats(senc);
   }
 }
 
diff --git a/vest/error_surface.h b/vest/error_surface.h
index ad728cfa..bb65847b 100644
--- a/vest/error_surface.h
+++ b/vest/error_surface.h
@@ -4,13 +4,13 @@
 #include <vector>
 #include <string>
 
-#include "scorer.h"
+#include "ns.h"
 
 class Score;
 
 struct ErrorSegment {
   double x;
-  ScoreP delta;
+  SufficientStats delta;
   ErrorSegment() : x(0), delta() {}
 };
 
@@ -18,7 +18,7 @@ class ErrorSurface : public std::vector<ErrorSegment> {
  public:
   ~ErrorSurface();
   void Serialize(std::string* out) const;
-  void Deserialize(ScoreType type, const std::string& in);
+  void Deserialize(const std::string& in);
 };
 
 #endif
diff --git a/vest/line_optimizer.cc b/vest/line_optimizer.cc
index 7303df8d..49443fbe 100644
--- a/vest/line_optimizer.cc
+++ b/vest/line_optimizer.cc
@@ -4,7 +4,7 @@
 #include <algorithm>
 
 #include "sparse_vector.h"
-#include "scorer.h"
+#include "ns.h"
 
 using namespace std;
 
@@ -18,6 +18,7 @@ struct IntervalComp {
 };
 
 double LineOptimizer::LineOptimize(
+    const EvaluationMetric* metric,
     const vector<ErrorSurface>& surfaces,
     const LineOptimizer::ScoreType type,
     float* best_score,
@@ -32,8 +33,7 @@ double LineOptimizer::LineOptimize(
   }
   sort(all_ints.begin(), all_ints.end(), IntervalComp());
   double last_boundary = all_ints.front()->x;
-  ScoreP accp = all_ints.front()->delta->GetZero();
-  Score *acc=accp.get();
+  SufficientStats acc;
   float& cur_best_score = *best_score;
   cur_best_score = (type == MAXIMIZE_SCORE ?
     -numeric_limits<float>::max() : numeric_limits<float>::max());
@@ -42,9 +42,8 @@ double LineOptimizer::LineOptimize(
   for (vector<ErrorIter>::iterator i = all_ints.begin();
        i != all_ints.end(); ++i) {
     const ErrorSegment& seg = **i;
-    assert(seg.delta);
     if (seg.x - last_boundary > epsilon) {
-      float sco = acc->ComputeScore();
+      float sco = metric->ComputeScore(acc);
       if ((type == MAXIMIZE_SCORE && sco > cur_best_score) ||
           (type == MINIMIZE_SCORE && sco < cur_best_score) ) {
         cur_best_score = sco;
@@ -54,16 +53,18 @@ double LineOptimizer::LineOptimize(
 	} else {
 	  pos = last_boundary + (seg.x - last_boundary) / 2;
 	}
-	// cerr << "NEW BEST: " << pos << "  (score=" << cur_best_score << ")\n";
+	//cerr << "NEW BEST: " << pos << "  (score=" << cur_best_score << ")\n";
       }
-      // string xx; acc->ScoreDetails(&xx); cerr << "---- " << xx;
+      // string xx = metric->DetailedScore(acc); cerr << "---- " << xx;
       // cerr << "---- s=" << sco << "\n";
       last_boundary = seg.x;
     }
     // cerr << "x-boundary=" << seg.x << "\n";
-    acc->PlusEquals(*seg.delta);
+    //string x2; acc.Encode(&x2); cerr << "   ACC: " << x2 << endl;
+    //string x1; seg.delta.Encode(&x1); cerr << " DELTA: " << x1 << endl;
+    acc += seg.delta;
   }
-  float sco = acc->ComputeScore();
+  float sco = metric->ComputeScore(acc);
   if ((type == MAXIMIZE_SCORE && sco > cur_best_score) ||
       (type == MINIMIZE_SCORE && sco < cur_best_score) ) {
     cur_best_score = sco;
@@ -107,3 +108,4 @@ void LineOptimizer::CreateOptimizationDirections(
      RandomUnitVector(features_to_optimize, &out[i], rng);
   cerr << "Generated " << out.size() << " total axes to optimize along.\n";
 }
+
diff --git a/vest/line_optimizer.h b/vest/line_optimizer.h
index 99a591f4..83819f41 100644
--- a/vest/line_optimizer.h
+++ b/vest/line_optimizer.h
@@ -7,6 +7,7 @@
 #include "error_surface.h"
 #include "sampler.h"
 
+class EvaluationMetric;
 class Weights;
 
 struct LineOptimizer {
@@ -18,6 +19,7 @@ struct LineOptimizer {
   // merge all the error surfaces together into a global
   // error surface and find (the middle of) the best segment
   static double LineOptimize(
+     const EvaluationMetric* metric,
      const std::vector<ErrorSurface>& envs,
      const LineOptimizer::ScoreType type,
      float* best_score,
diff --git a/vest/lo_test.cc b/vest/lo_test.cc
index f5638600..a67f65e1 100644
--- a/vest/lo_test.cc
+++ b/vest/lo_test.cc
@@ -5,6 +5,8 @@
 #include <boost/shared_ptr.hpp>
 #include <gtest/gtest.h>
 
+#include "ns.h"
+#include "ns_docscorer.h"
 #include "ces.h"
 #include "fdict.h"
 #include "hg.h"
@@ -15,7 +17,6 @@
 #include "viterbi.h"
 #include "viterbi_envelope.h"
 #include "line_optimizer.h"
-#include "scorer.h"
 
 using namespace std;
 using boost::shared_ptr;
@@ -141,9 +142,6 @@ TEST_F(OptTest, TestS1) {
   TD::ConvertSentence(ref22, &refs2[1]);
   TD::ConvertSentence(ref32, &refs2[2]);
   TD::ConvertSentence(ref42, &refs2[3]);
-  ScoreType type = ScoreTypeFromString("ibm_bleu");
-  ScorerP scorer1 = SentenceScorer::CreateSentenceScorer(type, refs1);
-  ScorerP scorer2 = SentenceScorer::CreateSentenceScorer(type, refs2);
   vector<ViterbiEnvelope> envs(2);
 
   RandomNumberGenerator<boost::mt19937> rng;
@@ -167,14 +165,17 @@ TEST_F(OptTest, TestS1) {
   envs[1] = Inside<ViterbiEnvelope, ViterbiEnvelopeWeightFunction>(hg2, NULL, wf);
 
   vector<ErrorSurface> es(2);
-  ComputeErrorSurface(*scorer1, envs[0], &es[0], IBM_BLEU, hg);
-  ComputeErrorSurface(*scorer2, envs[1], &es[1], IBM_BLEU, hg2);
+  EvaluationMetric* metric = EvaluationMetric::Instance("IBM_BLEU");
+  boost::shared_ptr<SegmentEvaluator> scorer1 = metric->CreateSegmentEvaluator(refs1);
+  boost::shared_ptr<SegmentEvaluator> scorer2 = metric->CreateSegmentEvaluator(refs2);
+  ComputeErrorSurface(*scorer1, envs[0], &es[0], metric, hg);
+  ComputeErrorSurface(*scorer2, envs[1], &es[1], metric, hg2);
   cerr << envs[0].size() << " " << envs[1].size() << endl;
   cerr << es[0].size() << " " << es[1].size() << endl;
   envs.clear();
   clock_t t_env=clock();
   float score;
-  double m = LineOptimizer::LineOptimize(es, LineOptimizer::MAXIMIZE_SCORE, &score);
+  double m = LineOptimizer::LineOptimize(metric,es, LineOptimizer::MAXIMIZE_SCORE, &score);
   clock_t t_opt=clock();
   cerr << "line optimizer returned: " << m << " (SCORE=" << score << ")\n";
   EXPECT_FLOAT_EQ(0.48719698, score);
@@ -217,15 +218,15 @@ TEST_F(OptTest,TestZeroOrigin) {
   vector<ViterbiEnvelope> envs(1);
   envs[0] = Inside<ViterbiEnvelope, ViterbiEnvelopeWeightFunction>(hg, NULL, wf);
 
-  ScoreType type = ScoreTypeFromString("ibm_bleu");
   vector<vector<WordID> > mr(4);
   TD::ConvertSentence("untitled", &mr[0]);
   TD::ConvertSentence("with no title", &mr[1]);
   TD::ConvertSentence("without a title", &mr[2]);
   TD::ConvertSentence("without title", &mr[3]);
-  ScorerP scorer1 = SentenceScorer::CreateSentenceScorer(type, mr);
+  EvaluationMetric* metric = EvaluationMetric::Instance("IBM_BLEU");
+  boost::shared_ptr<SegmentEvaluator> scorer1 = metric->CreateSegmentEvaluator(mr);
   vector<ErrorSurface> es(1);
-  ComputeErrorSurface(*scorer1, envs[0], &es[0], IBM_BLEU, hg);
+  ComputeErrorSurface(*scorer1, envs[0], &es[0], metric, hg);
 }
 
 int main(int argc, char **argv) {
diff --git a/vest/mr_vest_map.cc b/vest/mr_vest_map.cc
index 71dda6d7..8f6e085d 100644
--- a/vest/mr_vest_map.cc
+++ b/vest/mr_vest_map.cc
@@ -6,11 +6,12 @@
 #include <boost/program_options.hpp>
 #include <boost/program_options/variables_map.hpp>
 
+#include "ns.h"
+#include "ns_docscorer.h"
 #include "ces.h"
 #include "filelib.h"
 #include "stringlib.h"
 #include "sparse_vector.h"
-#include "scorer.h"
 #include "viterbi_envelope.h"
 #include "inside_outside.h"
 #include "error_surface.h"
@@ -25,7 +26,7 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
   opts.add_options()
         ("reference,r",po::value<vector<string> >(), "[REQD] Reference translation (tokenized text)")
         ("source,s",po::value<string>(), "Source file (ignored, except for AER)")
-        ("loss_function,l",po::value<string>()->default_value("ibm_bleu"), "Loss function being optimized")
+        ("evaluation_metric,m",po::value<string>()->default_value("ibm_bleu"), "Evaluation metric being optimized")
         ("input,i",po::value<string>()->default_value("-"), "Input file to map (- is STDIN)")
         ("help,h", "Help");
   po::options_description dcmdline_options;
@@ -67,10 +68,10 @@ bool ReadSparseVectorString(const string& s, SparseVector<double>* v) {
 int main(int argc, char** argv) {
   po::variables_map conf;
   InitCommandLine(argc, argv, &conf);
-  const string loss_function = conf["loss_function"].as<string>();
-  ScoreType type = ScoreTypeFromString(loss_function);
-  DocScorer ds(type, conf["reference"].as<vector<string> >(), conf["source"].as<string>());
-  cerr << "Loaded " << ds.size() << " references for scoring with " << loss_function << endl;
+  const string evaluation_metric = conf["evaluation_metric"].as<string>();
+  EvaluationMetric* metric = EvaluationMetric::Instance(evaluation_metric);
+  DocumentScorer ds(metric, conf["reference"].as<vector<string> >());
+  cerr << "Loaded " << ds.size() << " references for scoring with " << evaluation_metric << endl;
   Hypergraph hg;
   string last_file;
   ReadFile in_read(conf["input"].as<string>());
@@ -97,7 +98,8 @@ int main(int argc, char** argv) {
     ViterbiEnvelopeWeightFunction wf(origin, axis);
     ViterbiEnvelope ve = Inside<ViterbiEnvelope, ViterbiEnvelopeWeightFunction>(hg, NULL, wf);
     ErrorSurface es;
-    ComputeErrorSurface(*ds[sent_id], ve, &es, type, hg);
+
+    ComputeErrorSurface(*ds[sent_id], ve, &es, metric, hg);
     //cerr << "Viterbi envelope has " << ve.size() << " segments\n";
     // cerr << "Error surface has " << es.size() << " segments\n";
     string val;
diff --git a/vest/mr_vest_reduce.cc b/vest/mr_vest_reduce.cc
index 3df52020..dda61f88 100644
--- a/vest/mr_vest_reduce.cc
+++ b/vest/mr_vest_reduce.cc
@@ -10,6 +10,7 @@
 #include "error_surface.h"
 #include "line_optimizer.h"
 #include "b64tools.h"
+#include "stringlib.h"
 
 using namespace std;
 namespace po = boost::program_options;
@@ -17,12 +18,12 @@ namespace po = boost::program_options;
 void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
   po::options_description opts("Configuration options");
   opts.add_options()
-        ("loss_function,l",po::value<string>(), "Loss function being optimized")
+        ("evaluation_metric,m",po::value<string>(), "Evaluation metric (IBM_BLEU, etc.)")
         ("help,h", "Help");
   po::options_description dcmdline_options;
   dcmdline_options.add(opts);
   po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
-  bool flag = conf->count("loss_function") == 0;
+  bool flag = conf->count("evaluation_metric") == 0;
   if (flag || conf->count("help")) {
     cerr << dcmdline_options << endl;
     exit(1);
@@ -32,30 +33,27 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
 int main(int argc, char** argv) {
   po::variables_map conf;
   InitCommandLine(argc, argv, &conf);
-  const string loss_function = conf["loss_function"].as<string>();
-  ScoreType type = ScoreTypeFromString(loss_function);
+  const string evaluation_metric = conf["evaluation_metric"].as<string>();
   LineOptimizer::ScoreType opt_type = LineOptimizer::MAXIMIZE_SCORE;
-  if (type == TER || type == AER) {
+  if (UppercaseString(evaluation_metric) == "TER")
     opt_type = LineOptimizer::MINIMIZE_SCORE;
-  }
-  string last_key;
+  EvaluationMetric* metric = EvaluationMetric::Instance(evaluation_metric);
+
   vector<ErrorSurface> esv;
-  while(cin) {
-    string line;
-    getline(cin, line);
-    if (line.empty()) continue;
+  string last_key, line, key, val;
+  while(getline(cin, line)) {
     size_t ks = line.find("\t");
     assert(string::npos != ks);
     assert(ks > 2);
-    string key = line.substr(2, ks - 2);
-    string val = line.substr(ks + 1);
+    key = line.substr(2, ks - 2);
+    val = line.substr(ks + 1);
     if (key != last_key) {
       if (!last_key.empty()) {
 	float score;
-        double x = LineOptimizer::LineOptimize(esv, opt_type, &score);
+        double x = LineOptimizer::LineOptimize(metric, esv, opt_type, &score);
 	cout << last_key << "|" << x << "|" << score << endl;
       }
-      last_key = key;
+      last_key.swap(key);
       esv.clear();
     }
     if (val.size() % 4 != 0) {
@@ -68,13 +66,11 @@ int main(int argc, char** argv) {
       continue;
     }
     esv.push_back(ErrorSurface());
-    esv.back().Deserialize(type, encoded);
+    esv.back().Deserialize(encoded);
   }
   if (!esv.empty()) {
-    // cerr << "ESV=" << esv.size() << endl;
-    // for (int i = 0; i < esv.size(); ++i) { cerr << esv[i].size() << endl; }
     float score;
-    double x = LineOptimizer::LineOptimize(esv, opt_type, &score);
+    double x = LineOptimizer::LineOptimize(metric, esv, opt_type, &score);
     cout << last_key << "|" << x << "|" << score << endl;
   }
   return 0;
-- 
cgit v1.2.3


From 203c3c3357b9ed8cfe44932c2bf5ea19eba6238c Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@cs.cmu.edu>
Date: Fri, 27 Jan 2012 13:19:27 -0500
Subject: migration to new metric api for vest, clean up of unsupported/not
 functional code

---
 mteval/mbr_kbest.cc                   |  21 +-
 utils/fast_sparse_vector.h            |   6 +
 vest/dist-vest.pl                     |  22 +--
 vest/mbr_kbest.cc                     | 138 -------------
 vest/mr_vest_generate_mapper_input.cc | 356 ++++++----------------------------
 vest/mr_vest_map.cc                   |  16 +-
 6 files changed, 84 insertions(+), 475 deletions(-)
 delete mode 100644 vest/mbr_kbest.cc

(limited to 'mteval')

diff --git a/mteval/mbr_kbest.cc b/mteval/mbr_kbest.cc
index 64a6a8bf..b5e4750c 100644
--- a/mteval/mbr_kbest.cc
+++ b/mteval/mbr_kbest.cc
@@ -5,7 +5,7 @@
 
 #include "prob.h"
 #include "tdict.h"
-#include "scorer.h"
+#include "ns.h"
 #include "filelib.h"
 #include "stringlib.h"
 
@@ -17,7 +17,7 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
   po::options_description opts("Configuration options");
   opts.add_options()
         ("scale,a",po::value<double>()->default_value(1.0), "Posterior scaling factor (alpha)")
-        ("loss_function,l",po::value<string>()->default_value("bleu"), "Loss function")
+        ("evaluation_metric,m",po::value<string>()->default_value("ibm_bleu"), "Evaluation metric")
         ("input,i",po::value<string>()->default_value("-"), "File to read k-best lists from")
         ("output_list,L", "Show reranked list as output")
         ("help,h", "Help");
@@ -75,13 +75,14 @@ bool ReadKBestList(istream* in, string* sent_id, vector<pair<vector<WordID>, pro
 int main(int argc, char** argv) {
   po::variables_map conf;
   InitCommandLine(argc, argv, &conf);
-  const string metric = conf["loss_function"].as<string>();
+  const string smetric = conf["evaluation_metric"].as<string>();
+  EvaluationMetric* metric = EvaluationMetric::Instance(smetric);
+  const bool is_loss = (UppercaseString(smetric) == "TER");
   const bool output_list = conf.count("output_list") > 0;
   const string file = conf["input"].as<string>();
   const double mbr_scale = conf["scale"].as<double>();
   cerr << "Posterior scaling factor (alpha) = " << mbr_scale << endl;
 
-  ScoreType type = ScoreTypeFromString(metric);
   vector<pair<vector<WordID>, prob_t> > list;
   ReadFile rf(file);
   string sent_id;
@@ -99,15 +100,15 @@ int main(int argc, char** argv) {
     vector<double> mbr_scores(output_list ? list.size() : 0);
     double mbr_loss = numeric_limits<double>::max();
     for (int i = 0 ; i < list.size(); ++i) {
-      vector<vector<WordID> > refs(1, list[i].first);
-      //cerr << i << ": " << list[i].second <<"\t" << TD::GetString(list[i].first) << endl;
-      ScorerP scorer = SentenceScorer::CreateSentenceScorer(type, refs);
+      const vector<vector<WordID> > refs(1, list[i].first);
+
       double wl_acc = 0;
       for (int j = 0; j < list.size(); ++j) {
         if (i != j) {
-          ScoreP s = scorer->ScoreCandidate(list[j].first);
-          double loss = 1.0 - s->ComputeScore();
-          if (type == TER || type == AER) loss = 1.0 - loss;
+          SufficientStats ss;
+          metric->ComputeSufficientStatistics(list[j].first, refs, &ss);
+          double loss = 1.0 - metric->ComputeScore(ss);
+          if (is_loss) loss = 1.0 - loss;
           double weighted_loss = loss * (joints[j] / marginal).as_float();
           wl_acc += weighted_loss;
           if ((!output_list) && wl_acc > mbr_loss) break;
diff --git a/utils/fast_sparse_vector.h b/utils/fast_sparse_vector.h
index 1301581a..17fa47bf 100644
--- a/utils/fast_sparse_vector.h
+++ b/utils/fast_sparse_vector.h
@@ -178,6 +178,12 @@ class FastSparseVector {
   T l2norm() const {
     return sqrt(l2norm_sq());
   }
+  T pnorm(const double p) const {
+    T sum = T();
+    for (const_iterator it = begin(), e = end(); it != e; ++it)
+      sum += pow(fabs(it->second), p);
+    return pow(sum, 1.0 / p);
+  }
   // if values are binary, gives |A intersect B|/|A union B|
   template<typename S>
   S tanimoto_coef(const FastSparseVector<S> &vec) const {
diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl
index 8cde748b..1ec8c6b1 100755
--- a/vest/dist-vest.pl
+++ b/vest/dist-vest.pl
@@ -65,8 +65,6 @@ my $oraclen=0;
 my $oracleb=20;
 my $bleu_weight=1;
 my $use_make = 1;  # use make to parallelize line search
-my $dirargs='';
-my $density_prune;
 my $useqsub;
 my $pass_suffix = '';
 my $cpbin=1;
@@ -75,7 +73,6 @@ Getopt::Long::Configure("no_auto_abbrev");
 if (GetOptions(
 	"decoder=s" => \$decoderOpt,
 	"jobs=i" => \$jobs,
-	"density-prune=f" => \$density_prune,
 	"dont-clean" => \$disable_clean,
 	"pass-suffix=s" => \$pass_suffix,
 	"dry-run" => \$dryrun,
@@ -87,15 +84,7 @@ if (GetOptions(
 	"normalize=s" => \$normalize,
 	"pmem=s" => \$pmem,
         "cpbin!" => \$cpbin,
-	"rand-directions=i" => \$rand_directions,
-	"random_directions=i" => \$rand_directions,
-        "bleu_weight=s" => \$bleu_weight,
-        "no-primary!" => \$noprimary,
-        "max-similarity=s" => \$maxsim,
-        "oracle-directions=i" => \$oraclen,
-        "n-oracle=i" => \$oraclen,
-        "oracle-batch=i" => \$oracleb,
-        "directions-args=s" => \$dirargs,
+	"random-directions=i" => \$rand_directions,
 	"ref-files=s" => \$refFiles,
 	"metric=s" => \$metric,
 	"source-file=s" => \$srcFile,
@@ -107,10 +96,6 @@ if (GetOptions(
 	exit;
 }
 
-if (defined $density_prune) {
-  die "--density_prune n: n must be greater than 1.0\n" unless $density_prune > 1.0;
-}
-
 if ($useqsub) {
   $use_make = 0;
   die "LocalEnvironment.pm does not have qsub configuration for this host. Cannot run with --qsub!\n" unless has_qsub();
@@ -328,10 +313,7 @@ while (1){
 		print STDERR "\nGENERATE OPTIMIZATION STRATEGY (OPT-ITERATION $opt_iter/$optimization_iters)\n";
 		print STDERR unchecked_output("date");
 		$icc++;
-		my $nop=$noprimary?"--no_primary":"";
-		my $targs=$oraclen ? "--decoder_translations='$runFile.gz' ".get_comma_sep_refs('-references',$refFiles):"";
-		my $bwargs=$bleu_weight!=1 ? "--bleu_weight=$bleu_weight":"";
-		$cmd="$MAPINPUT -w $inweights -r $dir/hgs $bwargs -s $devSize -d $rand_directions --max_similarity=$maxsim --oracle_directions=$oraclen --oracle_batch=$oracleb $targs $dirargs > $dir/agenda.$im1-$opt_iter";
+		$cmd="$MAPINPUT -w $inweights -r $dir/hgs -s $devSize -d $rand_directions > $dir/agenda.$im1-$opt_iter";
 		print STDERR "COMMAND:\n$cmd\n";
 		check_call($cmd);
 		check_call("mkdir -p $dir/splag.$im1");
diff --git a/vest/mbr_kbest.cc b/vest/mbr_kbest.cc
deleted file mode 100644
index 2867b36b..00000000
--- a/vest/mbr_kbest.cc
+++ /dev/null
@@ -1,138 +0,0 @@
-#include <iostream>
-#include <vector>
-
-#include <boost/program_options.hpp>
-
-#include "prob.h"
-#include "tdict.h"
-#include "scorer.h"
-#include "filelib.h"
-#include "stringlib.h"
-
-using namespace std;
-
-namespace po = boost::program_options;
-
-void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
-  po::options_description opts("Configuration options");
-  opts.add_options()
-        ("scale,a",po::value<double>()->default_value(1.0), "Posterior scaling factor (alpha)")
-        ("loss_function,l",po::value<string>()->default_value("bleu"), "Loss function")
-        ("input,i",po::value<string>()->default_value("-"), "File to read k-best lists from")
-        ("output_list,L", "Show reranked list as output")
-        ("help,h", "Help");
-  po::options_description dcmdline_options;
-  dcmdline_options.add(opts);
-  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
-  bool flag = false;
-  if (flag || conf->count("help")) {
-    cerr << dcmdline_options << endl;
-    exit(1);
-  }
-}
-
-struct LossComparer {
-  bool operator()(const pair<vector<WordID>, double>& a, const pair<vector<WordID>, double>& b) const {
-    return a.second < b.second;
-  }
-};
-
-bool ReadKBestList(istream* in, string* sent_id, vector<pair<vector<WordID>, prob_t> >* list) {
-  static string cache_id;
-  static pair<vector<WordID>, prob_t> cache_pair;
-  list->clear();
-  string cur_id;
-  if (cache_pair.first.size() > 0) {
-    list->push_back(cache_pair);
-    cur_id = cache_id;
-    cache_pair.first.clear();
-  }
-  string line;
-  string tstr;
-  while(*in) {
-    getline(*in, line);
-    if (line.empty()) continue;
-    size_t p1 = line.find(" ||| ");
-    if (p1 == string::npos) { cerr << "Bad format: " << line << endl; abort(); }
-    size_t p2 = line.find(" ||| ", p1 + 4);
-    if (p2 == string::npos) { cerr << "Bad format: " << line << endl; abort(); }
-    size_t p3 = line.rfind(" ||| ");
-    cache_id = line.substr(0, p1);
-    tstr = line.substr(p1 + 5, p2 - p1 - 5);
-    double val = strtod(line.substr(p3 + 5).c_str(), NULL);
-    TD::ConvertSentence(tstr, &cache_pair.first);
-    cache_pair.second.logeq(val);
-    if (cur_id.empty()) cur_id = cache_id;
-    if (cur_id == cache_id) {
-      list->push_back(cache_pair);
-      *sent_id = cur_id;
-      cache_pair.first.clear();
-    } else { break; }
-  }
-  return !list->empty();
-}
-
-int main(int argc, char** argv) {
-  po::variables_map conf;
-  InitCommandLine(argc, argv, &conf);
-  const string metric = conf["loss_function"].as<string>();
-  const bool output_list = conf.count("output_list") > 0;
-  const string file = conf["input"].as<string>();
-  const double mbr_scale = conf["scale"].as<double>();
-  cerr << "Posterior scaling factor (alpha) = " << mbr_scale << endl;
-
-  ScoreType type = ScoreTypeFromString(metric);
-  vector<pair<vector<WordID>, prob_t> > list;
-  ReadFile rf(file);
-  string sent_id;
-  while(ReadKBestList(rf.stream(), &sent_id, &list)) {
-    vector<prob_t> joints(list.size());
-    const prob_t max_score = pow(list.front().second, mbr_scale);
-    prob_t marginal = prob_t::Zero();
-    for (int i = 0 ; i < list.size(); ++i) {
-      const prob_t joint = pow(list[i].second, mbr_scale) / max_score;
-      joints[i] = joint;
-      // cerr << "list[" << i << "] joint=" << log(joint) << endl;
-      marginal += joint;
-    }
-    int mbr_idx = -1;
-    vector<double> mbr_scores(output_list ? list.size() : 0);
-    double mbr_loss = numeric_limits<double>::max();
-    for (int i = 0 ; i < list.size(); ++i) {
-      vector<vector<WordID> > refs(1, list[i].first);
-      //cerr << i << ": " << list[i].second <<"\t" << TD::GetString(list[i].first) << endl;
-      ScorerP scorer = SentenceScorer::CreateSentenceScorer(type, refs);
-      double wl_acc = 0;
-      for (int j = 0; j < list.size(); ++j) {
-        if (i != j) {
-          ScoreP s = scorer->ScoreCandidate(list[j].first);
-          double loss = 1.0 - s->ComputeScore();
-          if (type == TER || type == AER) loss = 1.0 - loss;
-          double weighted_loss = loss * (joints[j] / marginal);
-          wl_acc += weighted_loss;
-          if ((!output_list) && wl_acc > mbr_loss) break;
-        }
-      }
-      if (output_list) mbr_scores[i] = wl_acc;
-      if (wl_acc < mbr_loss) {
-        mbr_loss = wl_acc;
-        mbr_idx = i;
-      }
-    }
-    // cerr << "ML translation: " << TD::GetString(list[0].first) << endl;
-    cerr << "MBR Best idx: " << mbr_idx << endl;
-    if (output_list) {
-      for (int i = 0; i < list.size(); ++i)
-        list[i].second.logeq(mbr_scores[i]);
-      sort(list.begin(), list.end(), LossComparer());
-      for (int i = 0; i < list.size(); ++i)
-        cout << sent_id << " ||| "
-             << TD::GetString(list[i].first) << " ||| "
-             << log(list[i].second) << endl;
-    } else {
-      cout << TD::GetString(list[mbr_idx].first) << endl;
-    }
-  }
-  return 0;
-}
-
diff --git a/vest/mr_vest_generate_mapper_input.cc b/vest/mr_vest_generate_mapper_input.cc
index 0c094fd5..59d4f24f 100644
--- a/vest/mr_vest_generate_mapper_input.cc
+++ b/vest/mr_vest_generate_mapper_input.cc
@@ -1,320 +1,78 @@
-//TODO: debug segfault when references supplied, null shared_ptr when oracle
 #include <iostream>
 #include <vector>
-#include <sstream>
 
 #include <boost/program_options.hpp>
 #include <boost/program_options/variables_map.hpp>
 
-#include "sampler.h"
 #include "filelib.h"
 #include "weights.h"
 #include "line_optimizer.h"
-#include "hg.h"
-#include "hg_io.h"
-#include "scorer.h"
-#include "oracle_bleu.h"
-#include "ff_bleu.h"
-
-const bool DEBUG_ORACLE=true;
-
-//TODO: decide on cdec_ff ffs, or just bleumodel - if just bleumodel, then do existing features on serialized hypergraphs remain?  weights (origin) is passed to oracle_bleu.h:ComputeOracle
-//void register_feature_functions();
-//FFRegistry ff_registry;
-namespace {
-void init_bleumodel() {
-  ff_registry.clear();
-  ff_registry.Register(new FFFactory<BLEUModel>);
-}
-
-struct init_ff {
-  init_ff() {
-    init_bleumodel();
-  }
-};
-//init_ff reg; // order of initialization?  ff_registry may not be init yet.  call in Run() instead.
-}
 
 using namespace std;
 namespace po = boost::program_options;
 
-typedef SparseVector<double> Dir;
-typedef Dir Point;
-
-void compress_similar(vector<Dir> &dirs,double min_dist,ostream *log=&cerr,bool avg=true,bool verbose=true) {
-  //  return; //TODO: debug
-  if (min_dist<=0) return;
-  double max_s=1.-min_dist;
-  if (log&&verbose) *log<<"max allowed S="<<max_s<<endl;
-  unsigned N=dirs.size();
-  for (int i=0;i<N;++i) {
-    for (int j=i+1;j<N;++j) {
-      double s=dirs[i].tanimoto_coef(dirs[j]);
-      if (log&&verbose) *log<<"S["<<i<<","<<j<<"]="<<s<<' ';
-      if (s>max_s) {
-        if (log) *log << "Collapsing similar directions (T="<<s<<" > "<<max_s<<").  dirs["<<i<<"]="<<dirs[i]<<" dirs["<<j<<"]"<<endl;
-        if (avg) {
-          dirs[i]+=dirs[j];
-          dirs[i]/=2.;
-          if (log) *log<<" averaged="<<dirs[i];
-        }
-        if (log) *log<<endl;
-        swap(dirs[j],dirs[--N]);
-      }
-    }
-    if (log&&verbose) *log<<endl;
-
-  }
-  dirs.resize(N);
-}
-
-struct oracle_directions {
-  MT19937 rng;
-  OracleBleu oracle;
-  vector<Dir> directions;
-
-  bool start_random;
-  bool include_primary;
-  bool old_to_hope;
-  bool fear_to_hope;
-  unsigned n_random;
-  void AddPrimaryAndRandomDirections() {
-    LineOptimizer::CreateOptimizationDirections(
-      fids,n_random,&rng,&directions,include_primary);
-  }
-
-  void Print() {
-    for (int i = 0; i < dev_set_size; ++i)
-      for (int j = 0; j < directions.size(); ++j) {
-        cout << forest_file(i) <<" " << i<<" ";
-        print(cout,origin,"=",";");
-        cout<<" ";
-        print(cout,directions[j],"=",";");
-        cout<<"\n";
-      }
-  }
-
-  void AddOptions(po::options_description *opts) {
-    oracle.AddOptions(opts);
-    opts->add_options()
-      ("dev_set_size,s",po::value<unsigned>(&dev_set_size),"[REQD] Development set size (# of parallel sentences)")
-      ("forest_repository,r",po::value<string>(&forest_repository),"[REQD] Path to forest repository")
-      ("weights,w",po::value<string>(&weights_file),"[REQD] Current feature weights file")
-      ("optimize_feature,o",po::value<vector<string> >(), "Feature to optimize (if none specified, all weights listed in the weights file will be optimized)")
-      ("random_directions,d",po::value<unsigned>(&n_random)->default_value(10),"Number of random directions to run the line optimizer in")
-      ("no_primary,n","don't use the primary (orthogonal each feature alone) directions")
-      ("oracle_directions,O",po::value<unsigned>(&n_oracle)->default_value(0),"read the forests and choose this many directions based on heading toward a hope max (bleu+modelscore) translation.")
-      ("oracle_start_random",po::bool_switch(&start_random),"sample random subsets of dev set for ALL oracle directions, not just those after a sequential run through it")
-      ("oracle_batch,b",po::value<unsigned>(&oracle_batch)->default_value(10),"to produce each oracle direction, sum the 'gradient' over this many sentences")
-      ("max_similarity,m",po::value<double>(&max_similarity)->default_value(0),"remove directions that are too similar (Tanimoto coeff. less than (1-this)).  0 means don't filter, 1 means only 1 direction allowed?")
-      ("fear_to_hope,f",po::bool_switch(&fear_to_hope),"for each of the oracle_directions, also include a direction from fear to hope (as well as origin to hope)")
-      ("no_old_to_hope","don't emit the usual old -> hope oracle")
-      ("decoder_translations",po::value<string>(&decoder_translations_file)->default_value(""),"one per line decoder 1best translations for computing document BLEU vs. sentences-seen-so-far BLEU")
-      ;
-  }
-  void InitCommandLine(int argc, char *argv[], po::variables_map *conf) {
-    po::options_description opts("Configuration options");
-    AddOptions(&opts);
-    opts.add_options()("help,h", "Help");
-
-    po::options_description dcmdline_options;
-    dcmdline_options.add(opts);
-    po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
-    po::notify(*conf);
-    if (conf->count("dev_set_size") == 0) {
-      cerr << "Please specify the size of the development set using -s N\n";
-      goto bad_cmdline;
-    }
-    if (conf->count("weights") == 0) {
-      cerr << "Please specify the starting-point weights using -w <weightfile.txt>\n";
-      goto bad_cmdline;
-    }
-    if (conf->count("forest_repository") == 0) {
-      cerr << "Please specify the forest repository location using -r <DIR>\n";
-      goto bad_cmdline;
-    }
-    if (n_oracle && oracle.refs.empty()) {
-      cerr<<"Specify references when using oracle directions\n";
-      goto bad_cmdline;
-    }
-    if (conf->count("help")) {
-      cout << dcmdline_options << endl;
-      exit(0);
-    }
-
-    return;
-    bad_cmdline:
-      cerr << dcmdline_options << endl;
-      exit(1);
+void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
+  po::options_description opts("Configuration options");
+  opts.add_options()
+        ("dev_set_size,s",po::value<unsigned>(),"[REQD] Development set size (# of parallel sentences)")
+        ("forest_repository,r",po::value<string>(),"[REQD] Path to forest repository")
+        ("weights,w",po::value<string>(),"[REQD] Current feature weights file")
+        ("optimize_feature,o",po::value<vector<string> >(), "Feature to optimize (if none specified, all weights listed in the weights file will be optimized)")
+        ("random_directions,d",po::value<unsigned int>()->default_value(20),"Number of random directions to run the line optimizer in")
+        ("help,h", "Help");
+  po::options_description dcmdline_options;
+  dcmdline_options.add(opts);
+  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
+  bool flag = false;
+  if (conf->count("dev_set_size") == 0) {
+    cerr << "Please specify the size of the development set using -d N\n";
+    flag = true;
   }
-
-  int main(int argc, char *argv[]) {
-    po::variables_map conf;
-    InitCommandLine(argc,argv,&conf);
-    init_bleumodel();
-    UseConf(conf);
-    Run();
-    return 0;
+  if (conf->count("weights") == 0) {
+    cerr << "Please specify the starting-point weights using -w <weightfile.txt>\n";
+    flag = true;
   }
-  bool verbose() const { return oracle.verbose; }
-  void Run() {
-//    register_feature_functions();
-    AddPrimaryAndRandomDirections();
-    AddOracleDirections();
-    compress_similar(directions,max_similarity,&cerr,true,verbose());
-    Print();
+  if (conf->count("forest_repository") == 0) {
+    cerr << "Please specify the forest repository location using -r <DIR>\n";
+    flag = true;
   }
-
-
-  Point origin; // old weights that gave model 1best.
-  vector<string> optimize_features;
-  void UseConf(po::variables_map const& conf) {
-    oracle.UseConf(conf);
-    include_primary=!conf.count("no_primary");
-    old_to_hope=!conf.count("no_old_to_hope");
-
-    if (conf.count("optimize_feature") > 0)
-      optimize_features=conf["optimize_feature"].as<vector<string> >();
-    Init();
+  if (flag || conf->count("help")) {
+    cerr << dcmdline_options << endl;
+    exit(1);
   }
+}
 
-  string weights_file;
-  double max_similarity;
-  unsigned n_oracle, oracle_batch;
-  string forest_repository;
-  unsigned dev_set_size;
-  vector<Oracle> oracles;
-  vector<int> fids;
-  string forest_file(unsigned i) const {
-    ostringstream o;
-    o << forest_repository << '/' << i << ".json.gz";
-    return o.str();
-  }
-
-  oracle_directions() { }
-
-  Sentences model_hyps;
-
-  vector<ScoreP> model_scores;
-  bool have_doc;
-  void Init() {
-    have_doc=!decoder_translations_file.empty();
-    if (have_doc) {
-      model_hyps.Load(decoder_translations_file);
-      if (verbose()) model_hyps.Print(cerr,5);
-      model_scores.resize(model_hyps.size());
-      if (dev_set_size!=model_hyps.size()) {
-        cerr<<"You supplied decoder_translations with a different number of lines ("<<model_hyps.size()<<") than dev_set_size ("<<dev_set_size<<")"<<endl;
-        abort();
-      }
-      cerr << "Scoring model translations " << model_hyps << endl;
-      for (int i=0;i<model_hyps.size();++i) {
-        //TODO: what is scoreCcand? without clipping? do without for consistency w/ oracle
-        model_scores[i]=oracle.ds[i]->ScoreCandidate(model_hyps[i]);
-        assert(model_scores[i]);
-        if (verbose()) cerr<<"Before model["<<i<<"]: "<<ds().ScoreDetails()<<endl;
-        if (verbose()) cerr<<"model["<<i<<"]: "<<model_scores[i]->ScoreDetails()<<endl;
-        oracle.doc_score->PlusEquals(*model_scores[i]);
-        if (verbose()) cerr<<"After model["<<i<<"]: "<<ds().ScoreDetails()<<endl;
-      }
-      //TODO: compute doc bleu stats for each sentence, then when getting oracle temporarily exclude stats for that sentence (skip regular score updating)
-    }
-    start_random=false;
-    cerr << "Forest repo: " << forest_repository << endl;
-    assert(DirectoryExists(forest_repository));
-    vector<string> features;
-    vector<weight_t> dorigin;
-    Weights::InitFromFile(weights_file, &dorigin, &features);
-    if (optimize_features.size())
-      features=optimize_features;
-    Weights::InitSparseVector(dorigin, &origin);
-    fids.clear();
-    AddFeatureIds(features);
-    oracles.resize(dev_set_size);
-  }
-
-  void AddFeatureIds(vector<string> const& features) {
-    int i = fids.size();
-    fids.resize(fids.size()+features.size());
-    for (; i < features.size(); ++i)
-      fids[i] = FD::Convert(features[i]);
- }
-
-
-  std::string decoder_translations_file; // one per line
-  //TODO: is it worthwhile to get a complete document bleu first?  would take a list of 1best translations one per line from the decoders, rather than loading all the forests (expensive).  translations are in run.raw.N.gz - new arg
-  void adjust_doc(unsigned i,double scale=1.) {
-    oracle.doc_score->PlusEquals(*model_scores[i],scale);
-  }
-
-  Score &ds() {
-    return *oracle.doc_score;
-  }
-
-  Oracle const& ComputeOracle(unsigned i) {
-    Oracle &o=oracles[i];
-    if (o.is_null()) {
-      if (have_doc) {
-        if (verbose()) cerr<<"Before removing i="<<i<<" "<<ds().ScoreDetails()<<"\n";
-        adjust_doc(i,-1);
-      }
-      ReadFile rf(forest_file(i));
-      Hypergraph hg;
-      {
-        Timer t("Loading forest from JSON "+forest_file(i));
-        HypergraphIO::ReadFromJSON(rf.stream(), &hg);
-      }
-      if (verbose()) cerr<<"Before oracle["<<i<<"]: "<<ds().ScoreDetails()<<endl;
-      o=oracle.ComputeOracle(oracle.MakeMetadata(hg,i),&hg,origin);
-      if (verbose()) {
-        cerr << o;
-        ScoreP hopesc=oracle.GetScore(o.hope.sentence,i);
-        oracle.doc_score->PlusEquals(*hopesc,1);
-        cerr<<"With hope: "<<ds().ScoreDetails()<<endl;
-        oracle.doc_score->PlusEquals(*hopesc,-1);
-        cerr<<"Without hope: "<<ds().ScoreDetails()<<endl;
-        cerr<<" oracle="<<oracle.GetScore(o.hope.sentence,i)->ScoreDetails()<<endl
-            <<" model="<<oracle.GetScore(o.model.sentence,i)->ScoreDetails()<<endl;
-        if (have_doc)
-          cerr<<" doc (should = model): "<<model_scores[i]->ScoreDetails()<<endl;
-      }
-      if (have_doc) {
-        adjust_doc(i,1);
-      } else
-        oracle.IncludeLastScore();
-    }
-    return o;
-  }
-
-  // if start_random is true, immediately sample w/ replacement from src sentences; otherwise, consume them sequentially until exhausted, then random.  oracle vectors are summed
-  void AddOracleDirections() {
-    MT19937::IntRNG rsg=rng.inclusive(0,dev_set_size-1);
-    unsigned b=0;
-    for(unsigned i=0;i<n_oracle;++i) {
-      Dir o2hope;
-      Dir fear2hope;
-      for (unsigned j=0;j<oracle_batch;++j,++b) {
-        Oracle const& o=ComputeOracle((start_random||b>=dev_set_size) ? rsg() : b);
-
-        if (old_to_hope)
-          o2hope+=o.ModelHopeGradient();
-        if (fear_to_hope)
-          fear2hope+=o.FearHopeGradient();
-      }
-      double N=(double)oracle_batch;
-      if (old_to_hope) {
-        o2hope/=N;
-        directions.push_back(o2hope);
-      }
-      if (fear_to_hope) {
-        fear2hope/=N;
-        directions.push_back(fear2hope);
-      }
+int main(int argc, char** argv) {
+  RandomNumberGenerator<boost::mt19937> rng;
+  po::variables_map conf;
+  InitCommandLine(argc, argv, &conf);
+  vector<string> features;
+  SparseVector<weight_t> origin;
+  vector<weight_t> w;
+  Weights::InitFromFile(conf["weights"].as<string>(), &w, &features);
+  Weights::InitSparseVector(w, &origin);
+  const string forest_repository = conf["forest_repository"].as<string>();
+  assert(DirectoryExists(forest_repository));
+  if (conf.count("optimize_feature") > 0)
+    features=conf["optimize_feature"].as<vector<string> >();
+  vector<SparseVector<weight_t> > directions;
+  vector<int> fids(features.size());
+  for (int i = 0; i < features.size(); ++i)
+    fids[i] = FD::Convert(features[i]);
+  LineOptimizer::CreateOptimizationDirections(
+     fids,
+     conf["random_directions"].as<unsigned int>(),
+     &rng,
+     &directions);
+  unsigned dev_set_size = conf["dev_set_size"].as<unsigned>();
+  for (unsigned i = 0; i < dev_set_size; ++i) {
+    for (unsigned j = 0; j < directions.size(); ++j) {
+      cout << forest_repository << '/' << i << ".json.gz " << i << ' ';
+      print(cout, origin, "=", ";");
+      cout << ' ';
+      print(cout, directions[j], "=", ";");
+      cout << endl;
     }
   }
-};
-
-int main(int argc, char** argv) {
-  oracle_directions od;
-  return od.main(argc,argv);
+  return 0;
 }
diff --git a/vest/mr_vest_map.cc b/vest/mr_vest_map.cc
index 8f6e085d..7d9625bc 100644
--- a/vest/mr_vest_map.cc
+++ b/vest/mr_vest_map.cc
@@ -82,20 +82,20 @@ int main(int argc, char** argv) {
     if (line.empty()) continue;
     istringstream is(line);
     int sent_id;
-    string file, s_origin, s_axis;
+    string file, s_origin, s_direction;
     // path-to-file (JSON) sent_ed starting-point search-direction
-    is >> file >> sent_id >> s_origin >> s_axis;
+    is >> file >> sent_id >> s_origin >> s_direction;
     SparseVector<double> origin;
-    assert(ReadSparseVectorString(s_origin, &origin));
-    SparseVector<double> axis;
-    assert(ReadSparseVectorString(s_axis, &axis));
-    // cerr << "File: " << file << "\nAxis: " << axis << "\n   X: " << origin << endl;
+    ReadSparseVectorString(s_origin, &origin);
+    SparseVector<double> direction;
+    ReadSparseVectorString(s_direction, &direction);
+    // cerr << "File: " << file << "\nDir: " << direction << "\n   X: " << origin << endl;
     if (last_file != file) {
       last_file = file;
       ReadFile rf(file);
       HypergraphIO::ReadFromJSON(rf.stream(), &hg);
     }
-    ViterbiEnvelopeWeightFunction wf(origin, axis);
+    ViterbiEnvelopeWeightFunction wf(origin, direction);
     ViterbiEnvelope ve = Inside<ViterbiEnvelope, ViterbiEnvelopeWeightFunction>(hg, NULL, wf);
     ErrorSurface es;
 
@@ -104,7 +104,7 @@ int main(int argc, char** argv) {
     // cerr << "Error surface has " << es.size() << " segments\n";
     string val;
     es.Serialize(&val);
-    cout << 'M' << ' ' << s_origin << ' ' << s_axis << '\t';
+    cout << 'M' << ' ' << s_origin << ' ' << s_direction << '\t';
     B64::b64encode(val.c_str(), val.size(), &cout);
     cout << endl << flush;
   }
-- 
cgit v1.2.3


From dbf367e0fc9d3faf906340d1f51f2dbda1892081 Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@cs.cmu.edu>
Date: Fri, 3 Feb 2012 17:19:16 -0500
Subject: make pro use new interface

---
 .gitignore              | 77 ++++++++++++++++++++++++++++++++++++++++---------
 mteval/ns.cc            |  4 +++
 mteval/ns.h             |  4 +++
 mteval/ns_ter.h         |  1 +
 pro-train/dist-pro.pl   |  4 +--
 pro-train/mr_pro_map.cc | 37 +++++++++++++++---------
 6 files changed, 98 insertions(+), 29 deletions(-)

(limited to 'mteval')

diff --git a/.gitignore b/.gitignore
index 5efe37b0..ab8bf2c7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,46 @@
+mira/kbest_mira
+sa-extract/calignment.c
+sa-extract/calignment.so
+sa-extract/cdat.c
+sa-extract/cdat.so
+sa-extract/cfloatlist.c
+sa-extract/cfloatlist.so
+sa-extract/cintlist.c
+sa-extract/cintlist.so
+sa-extract/clex.c
+sa-extract/clex.so
+sa-extract/cn.pyc
+sa-extract/context_model.pyc
+sa-extract/cstrmap.c
+sa-extract/cstrmap.so
+sa-extract/csuf.c
+sa-extract/csuf.so
+sa-extract/cveb.c
+sa-extract/cveb.so
+sa-extract/lcp.c
+sa-extract/lcp.so
+sa-extract/log.pyc
+sa-extract/manager.pyc
+sa-extract/model.pyc
+sa-extract/monitor.pyc
+sa-extract/precomputation.c
+sa-extract/precomputation.so
+sa-extract/rule.c
+sa-extract/rule.so
+sa-extract/rulefactory.c
+sa-extract/rulefactory.so
+sa-extract/sgml.pyc
+sa-extract/sym.c
+sa-extract/sym.so
+training/mpi_flex_optimize
+training/test_ngram
+utils/dict_test
+utils/logval_test
+utils/mfcr_test
+utils/phmt
+utils/small_vector_test
+utils/ts
+utils/weights_test
 pro-train/.deps
 pro-train/mr_pro_map
 pro-train/mr_pro_reduce
@@ -38,8 +81,8 @@ utils/.deps/
 utils/libutils.a
 *swp
 *.o
-vest/sentserver
-vest/sentclient
+dpmert/sentserver
+dpmert/sentclient
 gi/pyp-topics/src/contexts_lexer.cc
 config.guess
 config.sub
@@ -61,12 +104,12 @@ training/mr_em_map_adapter
 training/mr_reduce_to_weights
 training/optimize_test
 training/plftools
-vest/fast_score
-vest/lo_test
-vest/mr_vest_map
-vest/mr_vest_reduce
-vest/scorer_test
-vest/union_forests
+dpmert/fast_score
+dpmert/lo_test
+dpmert/mr_dpmert_map
+dpmert/mr_dpmert_reduce
+dpmert/scorer_test
+dpmert/union_forests
 Makefile
 Makefile.in
 aclocal.m4
@@ -99,11 +142,11 @@ training/Makefile.in
 training/*.o
 training/grammar_convert
 training/model1
-vest/.deps/
-vest/Makefile
-vest/Makefile.in
-vest/mr_vest_generate_mapper_input
-vest/*.o
+dpmert/.deps/
+dpmert/Makefile
+dpmert/Makefile.in
+dpmert/mr_dpmert_generate_mapper_input
+dpmert/*.o
 decoder/logval_test
 extools/build_lexical_translation
 extools/filter_grammar
@@ -124,7 +167,6 @@ m4/ltoptions.m4
 m4/ltsugar.m4
 m4/ltversion.m4
 m4/lt~obsolete.m4
-vest/mbr_kbest
 extools/featurize_grammar
 extools/filter_score_grammar
 gi/posterior-regularisation/prjava/build/
@@ -143,3 +185,10 @@ gi/posterior-regularisation/prjava/lib/prjava-20100715.jar
 *.ps
 *.toc
 *~
+gi/pf/align-lexonly
+gi/pf/align-lexonly-pyp
+gi/pf/condnaive
+mteval/scorer_test
+phrasinator/gibbs_train_plm
+phrasinator/gibbs_train_plm_notables
+.*
diff --git a/mteval/ns.cc b/mteval/ns.cc
index da678b84..788f809a 100644
--- a/mteval/ns.cc
+++ b/mteval/ns.cc
@@ -21,6 +21,10 @@ map<string, EvaluationMetric*> EvaluationMetric::instances_;
 SegmentEvaluator::~SegmentEvaluator() {}
 EvaluationMetric::~EvaluationMetric() {}
 
+bool EvaluationMetric::IsErrorMetric() const {
+  return false;
+}
+
 struct DefaultSegmentEvaluator : public SegmentEvaluator {
   DefaultSegmentEvaluator(const vector<vector<WordID> >& refs, const EvaluationMetric* em) : refs_(refs), em_(em) {}
   void Evaluate(const vector<WordID>& hyp, SufficientStats* out) const {
diff --git a/mteval/ns.h b/mteval/ns.h
index d88c263b..4e4c6975 100644
--- a/mteval/ns.h
+++ b/mteval/ns.h
@@ -94,6 +94,10 @@ class EvaluationMetric {
  public:
   const std::string& MetricId() const { return name_; }
 
+  // returns true for metrics like WER and TER where lower scores are better
+  // false for metrics like BLEU and METEOR where higher scores are better
+  virtual bool IsErrorMetric() const;
+
   virtual unsigned SufficientStatisticsVectorSize() const;
   virtual float ComputeScore(const SufficientStats& stats) const = 0;
   virtual std::string DetailedScore(const SufficientStats& stats) const;
diff --git a/mteval/ns_ter.h b/mteval/ns_ter.h
index 3190fc1b..c5c25413 100644
--- a/mteval/ns_ter.h
+++ b/mteval/ns_ter.h
@@ -9,6 +9,7 @@ class TERMetric : public EvaluationMetric {
   TERMetric() : EvaluationMetric("TER") {}
 
  public:
+  virtual bool IsErrorMetric() const;
   virtual unsigned SufficientStatisticsVectorSize() const;
   virtual std::string DetailedScore(const SufficientStats& stats) const;
   virtual void ComputeSufficientStatistics(const std::vector<WordID>& hyp,
diff --git a/pro-train/dist-pro.pl b/pro-train/dist-pro.pl
index ba9cdc06..31258fa6 100755
--- a/pro-train/dist-pro.pl
+++ b/pro-train/dist-pro.pl
@@ -12,7 +12,7 @@ use POSIX ":sys_wait_h";
 my $QSUB_CMD = qsub_args(mert_memory());
 my $default_jobs = env_default_jobs();
 
-my $VEST_DIR="$SCRIPT_DIR/../vest";
+my $VEST_DIR="$SCRIPT_DIR/../dpmert";
 require "$VEST_DIR/libcall.pl";
 
 # Default settings
@@ -338,7 +338,7 @@ while (1){
 		$mapoutput =~ s/mapinput/mapoutput/;
 		push @mapoutputs, "$dir/splag.$im1/$mapoutput";
 		$o2i{"$dir/splag.$im1/$mapoutput"} = "$dir/splag.$im1/$shard";
-		my $script = "$MAPPER -s $srcFile -l $metric $refs_comma_sep -w $inweights -K $dir/kbest < $dir/splag.$im1/$shard > $dir/splag.$im1/$mapoutput";
+		my $script = "$MAPPER -s $srcFile -m $metric $refs_comma_sep -w $inweights -K $dir/kbest < $dir/splag.$im1/$shard > $dir/splag.$im1/$mapoutput";
 		if ($use_make) {
 			my $script_file = "$dir/scripts/map.$shard";
 			open F, ">$script_file" or die "Can't write $script_file: $!";
diff --git a/pro-train/mr_pro_map.cc b/pro-train/mr_pro_map.cc
index 0a9b75d7..52b67f32 100644
--- a/pro-train/mr_pro_map.cc
+++ b/pro-train/mr_pro_map.cc
@@ -13,11 +13,12 @@
 #include "filelib.h"
 #include "stringlib.h"
 #include "weights.h"
-#include "scorer.h"
 #include "inside_outside.h"
 #include "hg_io.h"
 #include "kbest.h"
 #include "viterbi.h"
+#include "ns.h"
+#include "ns_docscorer.h"
 
 // This is Figure 4 (Algorithm Sampler) from Hopkins&May (2011)
 
@@ -80,7 +81,7 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
         ("kbest_repository,K",po::value<string>()->default_value("./kbest"),"K-best list repository (directory)")
         ("input,i",po::value<string>()->default_value("-"), "Input file to map (- is STDIN)")
         ("source,s",po::value<string>()->default_value(""), "Source file (ignored, except for AER)")
-        ("loss_function,l",po::value<string>()->default_value("ibm_bleu"), "Loss function being optimized")
+        ("evaluation_metric,m",po::value<string>()->default_value("IBM_BLEU"), "Evaluation metric (ibm_bleu, koehn_bleu, nist_bleu, ter, meteor, etc.)")
         ("kbest_size,k",po::value<unsigned>()->default_value(1500u), "Top k-hypotheses to extract")
         ("candidate_pairs,G", po::value<unsigned>()->default_value(5000u), "Number of pairs to sample per hypothesis (Gamma)")
         ("best_pairs,X", po::value<unsigned>()->default_value(50u), "Number of pairs, ranked by magnitude of objective delta, to retain (Xi)")
@@ -109,9 +110,12 @@ struct HypInfo {
   HypInfo(const vector<WordID>& h, const SparseVector<weight_t>& feats) : hyp(h), g_(-100.0f), x(feats) {}
 
   // lazy evaluation
-  double g(const SentenceScorer& scorer) const {
-    if (g_ == -100.0f)
-      g_ = scorer.ScoreCandidate(hyp)->ComputeScore();
+  double g(const SegmentEvaluator& scorer, const EvaluationMetric* metric) const {
+    if (g_ == -100.0f) {
+      SufficientStats ss;
+      scorer.Evaluate(hyp, &ss);
+      g_ = metric->ComputeScore(ss);
+    }
     return g_;
   }
   vector<WordID> hyp;
@@ -233,15 +237,21 @@ struct DiffOrder {
   }
 };
 
-void Sample(const unsigned gamma, const unsigned xi, const vector<HypInfo>& J_i, const SentenceScorer& scorer, const bool invert_score, vector<TrainingInstance>* pv) {
+void Sample(const unsigned gamma,
+            const unsigned xi,
+            const vector<HypInfo>& J_i,
+            const SegmentEvaluator& scorer,
+            const EvaluationMetric* metric,
+            vector<TrainingInstance>* pv) {
+  const bool invert_score = metric->IsErrorMetric();
   vector<TrainingInstance> v1, v2;
   float avg_diff = 0;
   for (unsigned i = 0; i < gamma; ++i) {
     const size_t a = rng->inclusive(0, J_i.size() - 1)();
     const size_t b = rng->inclusive(0, J_i.size() - 1)();
     if (a == b) continue;
-    float ga = J_i[a].g(scorer);
-    float gb = J_i[b].g(scorer);
+    float ga = J_i[a].g(scorer, metric);
+    float gb = J_i[b].g(scorer, metric);
     bool positive = gb < ga;
     if (invert_score) positive = !positive;
     const float gdiff = fabs(ga - gb);
@@ -288,11 +298,12 @@ int main(int argc, char** argv) {
     rng.reset(new MT19937(conf["random_seed"].as<uint32_t>()));
   else
     rng.reset(new MT19937);
-  const string loss_function = conf["loss_function"].as<string>();
+  const string evaluation_metric = conf["evaluation_metric"].as<string>();
+
+  EvaluationMetric* metric = EvaluationMetric::Instance(evaluation_metric);
+  DocumentScorer ds(metric, conf["reference"].as<vector<string> >());
+  cerr << "Loaded " << ds.size() << " references for scoring with " << evaluation_metric << endl;
 
-  ScoreType type = ScoreTypeFromString(loss_function);
-  DocScorer ds(type, conf["reference"].as<vector<string> >(), conf["source"].as<string>());
-  cerr << "Loaded " << ds.size() << " references for scoring with " << loss_function << endl;
   Hypergraph hg;
   string last_file;
   ReadFile in_read(conf["input"].as<string>());
@@ -335,7 +346,7 @@ int main(int argc, char** argv) {
     Dedup(&J_i);
     WriteKBest(kbest_file, J_i);
 
-    Sample(gamma, xi, J_i, *ds[sent_id], (type == TER), &v);
+    Sample(gamma, xi, J_i, *ds[sent_id], metric, &v);
     for (unsigned i = 0; i < v.size(); ++i) {
       const TrainingInstance& vi = v[i];
       cout << vi.y << "\t" << vi.x << endl;
-- 
cgit v1.2.3


From 3a2fc36378337147a956e439db31baf91bfb95c8 Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@cs.cmu.edu>
Date: Fri, 3 Feb 2012 18:03:49 -0500
Subject: escaping tool for grammar extractor

---
 mteval/ns_ter.cc             |  4 ++++
 sa-extract/Makefile          |  4 ++--
 sa-extract/README            | 14 +++++++++++++-
 sa-extract/escape-testset.pl | 35 +++++++++++++++++++++++++++++++++++
 sa-extract/example/README    |  2 +-
 5 files changed, 55 insertions(+), 4 deletions(-)
 create mode 100755 sa-extract/escape-testset.pl

(limited to 'mteval')

diff --git a/mteval/ns_ter.cc b/mteval/ns_ter.cc
index 91a17f0d..0e1008db 100644
--- a/mteval/ns_ter.cc
+++ b/mteval/ns_ter.cc
@@ -22,6 +22,10 @@ static const unsigned kDUMMY_LAST_ENTRY = 5;
 using namespace std;
 using namespace std::tr1;
 
+bool TERMetric::IsErrorMetric() const {
+  return true;
+}
+
 namespace NewScorer {
 
 struct COSTS {
diff --git a/sa-extract/Makefile b/sa-extract/Makefile
index e2b6158d..7b39ae4d 100644
--- a/sa-extract/Makefile
+++ b/sa-extract/Makefile
@@ -1,7 +1,7 @@
 PYVER=python2.7
-PYDIR=/usr
+PYDIR=/usr/local/Cellar/python/2.7.2
 PYINCLUDE=$(PYDIR)/include/$(PYVER)
-CYTHON=/usr/bin/cython
+CYTHON=/usr/local/share/python/cython
 PYTHON=$(PYDIR)/bin/python
 
 %.c: %.pyx
diff --git a/sa-extract/README b/sa-extract/README
index f43e58cc..e4022c7e 100644
--- a/sa-extract/README
+++ b/sa-extract/README
@@ -28,10 +28,22 @@ COMPILING A PARALLEL CORPUS AND WORD ALIGNMENT
                 -a alignment_name=alignment.txt > extract.ini
 
 
+  The training data should be in two parallel text files (source.fr,source.en)
+  and the alignments are expected in "0-0 1-2 2-1 ..." format produced by
+  most alignment toolkits. The text files should NOT be escaped for non-XML
+  characters.
+
+
 EXTRACTION OF PER-SENTENCE GRAMMARS
 ==============================================================================
+The most common use-case we support is extraction of "per-sentence" grammars
+for each segment in a testset. You may run the extractor on test set, but it
+will try to interpret tags as SGML markup, so we provide a script that does
+escaping: ./escape-testset.pl.
+
 - Example:
-  cat test.fr | extractor.py -c extract.ini
+
+  cat test.fr | ./escape-testset.pl | ./extractor.py -c extract.ini
 
 
 EXTRACTION OF COMPLETE TEST-SET GRAMMARS
diff --git a/sa-extract/escape-testset.pl b/sa-extract/escape-testset.pl
new file mode 100755
index 00000000..02fd7445
--- /dev/null
+++ b/sa-extract/escape-testset.pl
@@ -0,0 +1,35 @@
+#!/usr/bin/perl -w
+
+use utf8;
+use strict;
+
+binmode(STDIN,":utf8");
+binmode(STDOUT,":utf8");
+
+my @fh = ();
+if (scalar @ARGV == 0) {
+  push @fh, \*STDIN;
+} else {
+  for my $file (@ARGV) {
+    my $f;
+    open $f, "<$file" or die "Can't read $file: $!\n";
+    binmode $f, ":utf8";
+    push @fh, $f;
+  }
+}
+
+my $id = -1;
+for my $f (@fh) {
+  while(<$f>) {
+    chomp;
+    die "Empty line in test set" if /^\s*$/;
+    die "Please remove <seg> tags from input:\n$_" if /^\s*<seg/i;
+    $id++;
+    s/&/\&amp;/g;
+    s/</\&lt;/g;
+    s/>/\&gt;/g;
+    print "<seg id=\"$id\"> $_ </seg>\n";
+  }
+}
+
+
diff --git a/sa-extract/example/README b/sa-extract/example/README
index 9819ba5f..f6eac52b 100644
--- a/sa-extract/example/README
+++ b/sa-extract/example/README
@@ -4,5 +4,5 @@ Commands to compile a corpus and extract some grammars
 # compile
 ../sa-compile.pl -b nc=corpus.de.gz,corpus.en.gz -a gdfa=corpus.align.gz > extract.ini
 # extract
-cat test.de | ../extractor.py -c extract.ini
+cat test.de | ../escape-testset.pl | ../extractor.py -c extract.ini
 
-- 
cgit v1.2.3


From 77d35a1475adf7144b1109680377d17bff4233f7 Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@cs.cmu.edu>
Date: Wed, 8 Feb 2012 18:32:12 -0500
Subject: oops, broke mbr when i switched to the new scoring API

---
 mteval/mbr_kbest.cc | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'mteval')

diff --git a/mteval/mbr_kbest.cc b/mteval/mbr_kbest.cc
index b5e4750c..2bd31566 100644
--- a/mteval/mbr_kbest.cc
+++ b/mteval/mbr_kbest.cc
@@ -77,6 +77,7 @@ int main(int argc, char** argv) {
   InitCommandLine(argc, argv, &conf);
   const string smetric = conf["evaluation_metric"].as<string>();
   EvaluationMetric* metric = EvaluationMetric::Instance(smetric);
+
   const bool is_loss = (UppercaseString(smetric) == "TER");
   const bool output_list = conf.count("output_list") > 0;
   const string file = conf["input"].as<string>();
@@ -101,12 +102,14 @@ int main(int argc, char** argv) {
     double mbr_loss = numeric_limits<double>::max();
     for (int i = 0 ; i < list.size(); ++i) {
       const vector<vector<WordID> > refs(1, list[i].first);
+      boost::shared_ptr<SegmentEvaluator> segeval = metric->
+          CreateSegmentEvaluator(refs);
 
       double wl_acc = 0;
       for (int j = 0; j < list.size(); ++j) {
         if (i != j) {
           SufficientStats ss;
-          metric->ComputeSufficientStatistics(list[j].first, refs, &ss);
+          segeval->Evaluate(list[j].first, &ss);
           double loss = 1.0 - metric->ComputeScore(ss);
           if (is_loss) loss = 1.0 - loss;
           double weighted_loss = loss * (joints[j] / marginal).as_float();
-- 
cgit v1.2.3