summaryrefslogtreecommitdiff
path: root/mteval
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2012-03-13 09:24:47 +0100
committerPatrick Simianer <p@simianer.de>2012-03-13 09:24:47 +0100
commitef6085e558e26c8819f1735425761103021b6470 (patch)
tree5cf70e4c48c64d838e1326b5a505c8c4061bff4a /mteval
parent10a232656a0c882b3b955d2bcfac138ce11e8a2e (diff)
parentdfbc278c1057555fda9312291c8024049e00b7d8 (diff)
merge with upstream
Diffstat (limited to 'mteval')
-rw-r--r--mteval/Makefile.am2
-rw-r--r--mteval/fast_score.cc40
-rw-r--r--mteval/mbr_kbest.cc24
-rw-r--r--mteval/ns.cc290
-rw-r--r--mteval/ns.h115
-rw-r--r--mteval/ns_comb.cc87
-rw-r--r--mteval/ns_comb.h19
-rw-r--r--mteval/ns_docscorer.cc60
-rw-r--r--mteval/ns_docscorer.h31
-rw-r--r--mteval/ns_ext.cc130
-rw-r--r--mteval/ns_ext.h21
-rw-r--r--mteval/ns_ter.cc492
-rw-r--r--mteval/ns_ter.h21
-rw-r--r--mteval/scorer_test.cc48
14 files changed, 1352 insertions, 28 deletions
diff --git a/mteval/Makefile.am b/mteval/Makefile.am
index 95845090..e7126675 100644
--- a/mteval/Makefile.am
+++ b/mteval/Makefile.am
@@ -10,7 +10,7 @@ endif
noinst_LIBRARIES = libmteval.a
-libmteval_a_SOURCES = ter.cc comb_scorer.cc aer_scorer.cc scorer.cc external_scorer.cc
+libmteval_a_SOURCES = ter.cc comb_scorer.cc aer_scorer.cc scorer.cc external_scorer.cc ns.cc ns_ter.cc ns_ext.cc ns_comb.cc ns_docscorer.cc
fast_score_SOURCES = fast_score.cc
fast_score_LDADD = libmteval.a $(top_srcdir)/utils/libutils.a -lz
diff --git a/mteval/fast_score.cc b/mteval/fast_score.cc
index 5ee264a6..a271ccc5 100644
--- a/mteval/fast_score.cc
+++ b/mteval/fast_score.cc
@@ -4,9 +4,11 @@
#include <boost/program_options.hpp>
#include <boost/program_options/variables_map.hpp>
+#include "stringlib.h"
#include "filelib.h"
#include "tdict.h"
-#include "scorer.h"
+#include "ns.h"
+#include "ns_docscorer.h"
using namespace std;
namespace po = boost::program_options;
@@ -14,8 +16,8 @@ namespace po = boost::program_options;
void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
po::options_description opts("Configuration options");
opts.add_options()
- ("reference,r",po::value<vector<string> >(), "[REQD] Reference translation(s) (tokenized text file)")
- ("loss_function,l",po::value<string>()->default_value("ibm_bleu"), "Scoring metric (ibm_bleu, nist_bleu, koehn_bleu, ter, combi)")
+ ("reference,r",po::value<vector<string> >(), "[1 or more required] Reference translation(s) in tokenized text files")
+ ("evaluation_metric,m",po::value<string>()->default_value("IBM_BLEU"), "Evaluation metric (ibm_bleu, koehn_bleu, nist_bleu, ter, meteor, etc.)")
("in_file,i", po::value<string>()->default_value("-"), "Input file")
("help,h", "Help");
po::options_description dcmdline_options;
@@ -35,24 +37,29 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
int main(int argc, char** argv) {
po::variables_map conf;
InitCommandLine(argc, argv, &conf);
- const string loss_function = conf["loss_function"].as<string>();
- ScoreType type = ScoreTypeFromString(loss_function);
- DocScorer ds(type, conf["reference"].as<vector<string> >(), "");
+ string loss_function = UppercaseString(conf["evaluation_metric"].as<string>());
+ if (loss_function == "COMBI") {
+ cerr << "WARNING: 'combi' metric is no longer supported, switching to 'COMB:TER=-0.5;IBM_BLEU=0.5'\n";
+ loss_function = "COMB:TER=-0.5;IBM_BLEU=0.5";
+ } else if (loss_function == "BLEU") {
+ cerr << "WARNING: 'BLEU' is ambiguous, assuming 'IBM_BLEU'\n";
+ loss_function = "IBM_BLEU";
+ }
+ EvaluationMetric* metric = EvaluationMetric::Instance(loss_function);
+ DocumentScorer ds(metric, conf["reference"].as<vector<string> >());
cerr << "Loaded " << ds.size() << " references for scoring with " << loss_function << endl;
ReadFile rf(conf["in_file"].as<string>());
- ScoreP acc;
+ SufficientStats acc;
istream& in = *rf.stream();
int lc = 0;
- while(in) {
- string line;
- getline(in, line);
- if (line.empty() && !in) break;
+ string line;
+ while(getline(in, line)) {
vector<WordID> sent;
TD::ConvertSentence(line, &sent);
- ScoreP sentscore = ds[lc]->ScoreCandidate(sent);
- if (!acc) { acc = sentscore->GetZero(); }
- acc->PlusEquals(*sentscore);
+ SufficientStats t;
+ ds[lc]->Evaluate(sent, &t);
+ acc += t;
++lc;
}
assert(lc > 0);
@@ -63,9 +70,8 @@ int main(int argc, char** argv) {
if (lc != ds.size())
cerr << "Fewer sentences in hyp (" << lc << ") than refs ("
<< ds.size() << "): scoring partial set!\n";
- float score = acc->ComputeScore();
- string details;
- acc->ScoreDetails(&details);
+ float score = metric->ComputeScore(acc);
+ const string details = metric->DetailedScore(acc);
cerr << details << endl;
cout << score << endl;
return 0;
diff --git a/mteval/mbr_kbest.cc b/mteval/mbr_kbest.cc
index 64a6a8bf..2bd31566 100644
--- a/mteval/mbr_kbest.cc
+++ b/mteval/mbr_kbest.cc
@@ -5,7 +5,7 @@
#include "prob.h"
#include "tdict.h"
-#include "scorer.h"
+#include "ns.h"
#include "filelib.h"
#include "stringlib.h"
@@ -17,7 +17,7 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
po::options_description opts("Configuration options");
opts.add_options()
("scale,a",po::value<double>()->default_value(1.0), "Posterior scaling factor (alpha)")
- ("loss_function,l",po::value<string>()->default_value("bleu"), "Loss function")
+ ("evaluation_metric,m",po::value<string>()->default_value("ibm_bleu"), "Evaluation metric")
("input,i",po::value<string>()->default_value("-"), "File to read k-best lists from")
("output_list,L", "Show reranked list as output")
("help,h", "Help");
@@ -75,13 +75,15 @@ bool ReadKBestList(istream* in, string* sent_id, vector<pair<vector<WordID>, pro
int main(int argc, char** argv) {
po::variables_map conf;
InitCommandLine(argc, argv, &conf);
- const string metric = conf["loss_function"].as<string>();
+ const string smetric = conf["evaluation_metric"].as<string>();
+ EvaluationMetric* metric = EvaluationMetric::Instance(smetric);
+
+ const bool is_loss = (UppercaseString(smetric) == "TER");
const bool output_list = conf.count("output_list") > 0;
const string file = conf["input"].as<string>();
const double mbr_scale = conf["scale"].as<double>();
cerr << "Posterior scaling factor (alpha) = " << mbr_scale << endl;
- ScoreType type = ScoreTypeFromString(metric);
vector<pair<vector<WordID>, prob_t> > list;
ReadFile rf(file);
string sent_id;
@@ -99,15 +101,17 @@ int main(int argc, char** argv) {
vector<double> mbr_scores(output_list ? list.size() : 0);
double mbr_loss = numeric_limits<double>::max();
for (int i = 0 ; i < list.size(); ++i) {
- vector<vector<WordID> > refs(1, list[i].first);
- //cerr << i << ": " << list[i].second <<"\t" << TD::GetString(list[i].first) << endl;
- ScorerP scorer = SentenceScorer::CreateSentenceScorer(type, refs);
+ const vector<vector<WordID> > refs(1, list[i].first);
+ boost::shared_ptr<SegmentEvaluator> segeval = metric->
+ CreateSegmentEvaluator(refs);
+
double wl_acc = 0;
for (int j = 0; j < list.size(); ++j) {
if (i != j) {
- ScoreP s = scorer->ScoreCandidate(list[j].first);
- double loss = 1.0 - s->ComputeScore();
- if (type == TER || type == AER) loss = 1.0 - loss;
+ SufficientStats ss;
+ segeval->Evaluate(list[j].first, &ss);
+ double loss = 1.0 - metric->ComputeScore(ss);
+ if (is_loss) loss = 1.0 - loss;
double weighted_loss = loss * (joints[j] / marginal).as_float();
wl_acc += weighted_loss;
if ((!output_list) && wl_acc > mbr_loss) break;
diff --git a/mteval/ns.cc b/mteval/ns.cc
new file mode 100644
index 00000000..788f809a
--- /dev/null
+++ b/mteval/ns.cc
@@ -0,0 +1,290 @@
+#include "ns.h"
+#include "ns_ter.h"
+#include "ns_ext.h"
+#include "ns_comb.h"
+
+#include <cstdio>
+#include <cassert>
+#include <cmath>
+#include <cstdlib>
+#include <iostream>
+#include <sstream>
+
+#include "tdict.h"
+#include "stringlib.h"
+
+using namespace std;
+using boost::shared_ptr;
+
+map<string, EvaluationMetric*> EvaluationMetric::instances_;
+
+SegmentEvaluator::~SegmentEvaluator() {}
+EvaluationMetric::~EvaluationMetric() {}
+
+bool EvaluationMetric::IsErrorMetric() const {
+ return false;
+}
+
+struct DefaultSegmentEvaluator : public SegmentEvaluator {
+ DefaultSegmentEvaluator(const vector<vector<WordID> >& refs, const EvaluationMetric* em) : refs_(refs), em_(em) {}
+ void Evaluate(const vector<WordID>& hyp, SufficientStats* out) const {
+ em_->ComputeSufficientStatistics(hyp, refs_, out);
+ out->id_ = em_->MetricId();
+ }
+ const vector<vector<WordID> > refs_;
+ const EvaluationMetric* em_;
+};
+
+shared_ptr<SegmentEvaluator> EvaluationMetric::CreateSegmentEvaluator(const vector<vector<WordID> >& refs) const {
+ return shared_ptr<SegmentEvaluator>(new DefaultSegmentEvaluator(refs, this));
+}
+
+#define MAX_SS_VECTOR_SIZE 50
+unsigned EvaluationMetric::SufficientStatisticsVectorSize() const {
+ return MAX_SS_VECTOR_SIZE;
+}
+
+void EvaluationMetric::ComputeSufficientStatistics(const vector<WordID>&,
+ const vector<vector<WordID> >&,
+ SufficientStats*) const {
+ cerr << "Base class ComputeSufficientStatistics should not be called.\n";
+ abort();
+}
+
+string EvaluationMetric::DetailedScore(const SufficientStats& stats) const {
+ ostringstream os;
+ os << MetricId() << "=" << ComputeScore(stats);
+ return os.str();
+}
+
+enum BleuType { IBM, Koehn, NIST };
+template <unsigned int N = 4u, BleuType BrevityType = IBM>
+struct BleuSegmentEvaluator : public SegmentEvaluator {
+ BleuSegmentEvaluator(const vector<vector<WordID> >& refs, const EvaluationMetric* em) : evaluation_metric(em) {
+ assert(refs.size() > 0);
+ float tot = 0;
+ int smallest = 9999999;
+ for (vector<vector<WordID> >::const_iterator ci = refs.begin();
+ ci != refs.end(); ++ci) {
+ lengths_.push_back(ci->size());
+ tot += lengths_.back();
+ if (lengths_.back() < smallest) smallest = lengths_.back();
+ CountRef(*ci);
+ }
+ if (BrevityType == Koehn)
+ lengths_[0] = tot / refs.size();
+ if (BrevityType == NIST)
+ lengths_[0] = smallest;
+ }
+
+ void Evaluate(const vector<WordID>& hyp, SufficientStats* out) const {
+ out->fields.resize(N + N + 2);
+ out->id_ = evaluation_metric->MetricId();
+ for (unsigned i = 0; i < N+N+2; ++i) out->fields[i] = 0;
+
+ ComputeNgramStats(hyp, &out->fields[0], &out->fields[N], true);
+ float& hyp_len = out->fields[2*N];
+ float& ref_len = out->fields[2*N + 1];
+ hyp_len = hyp.size();
+ ref_len = lengths_[0];
+ if (lengths_.size() > 1 && BrevityType == IBM) {
+ float bestd = 2000000;
+ float hl = hyp.size();
+ float bl = -1;
+ for (vector<float>::const_iterator ci = lengths_.begin(); ci != lengths_.end(); ++ci) {
+ if (fabs(*ci - hl) < bestd) {
+ bestd = fabs(*ci - hl);
+ bl = *ci;
+ }
+ }
+ ref_len = bl;
+ }
+ }
+
+ struct NGramCompare {
+ int operator() (const vector<WordID>& a, const vector<WordID>& b) {
+ const size_t as = a.size();
+ const size_t bs = b.size();
+ const size_t s = (as < bs ? as : bs);
+ for (size_t i = 0; i < s; ++i) {
+ int d = a[i] - b[i];
+ if (d < 0) return true;
+ if (d > 0) return false;
+ }
+ return as < bs;
+ }
+ };
+ typedef map<vector<WordID>, pair<int,int>, NGramCompare> NGramCountMap;
+
+ void CountRef(const vector<WordID>& ref) {
+ NGramCountMap tc;
+ vector<WordID> ngram(N);
+ int s = ref.size();
+ for (int j=0; j<s; ++j) {
+ int remaining = s-j;
+ int k = (N < remaining ? N : remaining);
+ ngram.clear();
+ for (int i=1; i<=k; ++i) {
+ ngram.push_back(ref[j + i - 1]);
+ tc[ngram].first++;
+ }
+ }
+ for (typename NGramCountMap::iterator i = tc.begin(); i != tc.end(); ++i) {
+ pair<int,int>& p = ngrams_[i->first];
+ if (p.first < i->second.first)
+ p = i->second;
+ }
+ }
+
+ void ComputeNgramStats(const vector<WordID>& sent,
+ float* correct, // N elements reserved
+ float* hyp, // N elements reserved
+ bool clip_counts = true) const {
+ // clear clipping stats
+ for (typename NGramCountMap::iterator it = ngrams_.begin(); it != ngrams_.end(); ++it)
+ it->second.second = 0;
+
+ vector<WordID> ngram(N);
+ *correct *= 0;
+ *hyp *= 0;
+ int s = sent.size();
+ for (int j=0; j<s; ++j) {
+ int remaining = s-j;
+ int k = (N < remaining ? N : remaining);
+ ngram.clear();
+ for (int i=1; i<=k; ++i) {
+ ngram.push_back(sent[j + i - 1]);
+ pair<int,int>& p = ngrams_[ngram];
+ if(clip_counts){
+ if (p.second < p.first) {
+ ++p.second;
+ correct[i-1]++;
+ }
+ } else {
+ ++p.second;
+ correct[i-1]++;
+ }
+ // if the 1 gram isn't found, don't try to match don't need to match any 2- 3- .. grams:
+ if (!p.first) {
+ for (; i<=k; ++i)
+ hyp[i-1]++;
+ } else {
+ hyp[i-1]++;
+ }
+ }
+ }
+ }
+
+ const EvaluationMetric* evaluation_metric;
+ vector<float> lengths_;
+ mutable NGramCountMap ngrams_;
+};
+
+template <unsigned int N = 4u, BleuType BrevityType = IBM>
+struct BleuMetric : public EvaluationMetric {
+ BleuMetric() : EvaluationMetric(BrevityType == IBM ? "IBM_BLEU" : (BrevityType == Koehn ? "KOEHN_BLEU" : "NIST_BLEU")) {}
+ unsigned SufficientStatisticsVectorSize() const { return N*2 + 2; }
+ shared_ptr<SegmentEvaluator> CreateSegmentEvaluator(const vector<vector<WordID> >& refs) const {
+ return shared_ptr<SegmentEvaluator>(new BleuSegmentEvaluator<N,BrevityType>(refs, this));
+ }
+ float ComputeBreakdown(const SufficientStats& stats, float* bp, vector<float>* out) const {
+ if (out) { out->clear(); }
+ float log_bleu = 0;
+ int count = 0;
+ for (int i = 0; i < N; ++i) {
+ if (stats.fields[i+N] > 0) {
+ float cor_count = stats.fields[i]; // correct_ngram_hit_counts[i];
+ // smooth bleu
+ if (!cor_count) { cor_count = 0.01; }
+ float lprec = log(cor_count) - log(stats.fields[i+N]); // log(hyp_ngram_counts[i]);
+ if (out) out->push_back(exp(lprec));
+ log_bleu += lprec;
+ ++count;
+ }
+ }
+ log_bleu /= count;
+ float lbp = 0.0;
+ const float& hyp_len = stats.fields[2*N];
+ const float& ref_len = stats.fields[2*N + 1];
+ if (hyp_len < ref_len)
+ lbp = (hyp_len - ref_len) / hyp_len;
+ log_bleu += lbp;
+ if (bp) *bp = exp(lbp);
+ return exp(log_bleu);
+ }
+ string DetailedScore(const SufficientStats& stats) const {
+ char buf[2000];
+ vector<float> precs(N);
+ float bp;
+ float bleu = ComputeBreakdown(stats, &bp, &precs);
+ sprintf(buf, "%s = %.2f, %.1f|%.1f|%.1f|%.1f (brev=%.3f)",
+ MetricId().c_str(),
+ bleu*100.0,
+ precs[0]*100.0,
+ precs[1]*100.0,
+ precs[2]*100.0,
+ precs[3]*100.0,
+ bp);
+ return buf;
+ }
+ float ComputeScore(const SufficientStats& stats) const {
+ return ComputeBreakdown(stats, NULL, NULL);
+ }
+};
+
+EvaluationMetric* EvaluationMetric::Instance(const string& imetric_id) {
+ static bool is_first = true;
+ if (is_first) {
+ instances_["NULL"] = NULL;
+ is_first = false;
+ }
+ const string metric_id = UppercaseString(imetric_id);
+
+ map<string, EvaluationMetric*>::iterator it = instances_.find(metric_id);
+ if (it == instances_.end()) {
+ EvaluationMetric* m = NULL;
+ if (metric_id == "IBM_BLEU") {
+ m = new BleuMetric<4, IBM>;
+ } else if (metric_id == "NIST_BLEU") {
+ m = new BleuMetric<4, NIST>;
+ } else if (metric_id == "KOEHN_BLEU") {
+ m = new BleuMetric<4, Koehn>;
+ } else if (metric_id == "TER") {
+ m = new TERMetric;
+ } else if (metric_id == "METEOR") {
+ m = new ExternalMetric("METEOR", "java -Xmx1536m -jar /Users/cdyer/software/meteor/meteor-1.3.jar - - -mira -lower -t tune -l en");
+ } else if (metric_id.find("COMB:") == 0) {
+ m = new CombinationMetric(metric_id);
+ } else {
+ cerr << "Implement please: " << metric_id << endl;
+ abort();
+ }
+ if (m->MetricId() != metric_id) {
+ cerr << "Registry error: " << metric_id << " vs. " << m->MetricId() << endl;
+ abort();
+ }
+ return instances_[metric_id] = m;
+ } else {
+ return it->second;
+ }
+}
+
+SufficientStats::SufficientStats(const string& encoded) {
+ istringstream is(encoded);
+ is >> id_;
+ float val;
+ while(is >> val)
+ fields.push_back(val);
+}
+
+void SufficientStats::Encode(string* out) const {
+ ostringstream os;
+ if (id_.size() > 0)
+ os << id_;
+ else
+ os << "NULL";
+ for (unsigned i = 0; i < fields.size(); ++i)
+ os << ' ' << fields[i];
+ *out = os.str();
+}
+
diff --git a/mteval/ns.h b/mteval/ns.h
new file mode 100644
index 00000000..4e4c6975
--- /dev/null
+++ b/mteval/ns.h
@@ -0,0 +1,115 @@
+#ifndef _NS_H_
+#define _NS_H_
+
+#include <string>
+#include <vector>
+#include <map>
+#include <boost/shared_ptr.hpp>
+#include "wordid.h"
+#include <iostream>
+
+class SufficientStats {
+ public:
+ SufficientStats() : id_() {}
+ explicit SufficientStats(const std::string& encoded);
+ SufficientStats(const std::string& mid, const std::vector<float>& f) :
+ id_(mid), fields(f) {}
+
+ SufficientStats& operator+=(const SufficientStats& delta) {
+ if (id_.empty() && delta.id_.size()) id_ = delta.id_;
+ if (fields.size() != delta.fields.size())
+ fields.resize(std::max(fields.size(), delta.fields.size()));
+ for (unsigned i = 0; i < delta.fields.size(); ++i)
+ fields[i] += delta.fields[i];
+ return *this;
+ }
+ SufficientStats& operator-=(const SufficientStats& delta) {
+ if (id_.empty() && delta.id_.size()) id_ = delta.id_;
+ if (fields.size() != delta.fields.size())
+ fields.resize(std::max(fields.size(), delta.fields.size()));
+ for (unsigned i = 0; i < delta.fields.size(); ++i)
+ fields[i] -= delta.fields[i];
+ return *this;
+ }
+ SufficientStats& operator*=(const double& scalar) {
+ for (unsigned i = 0; i < fields.size(); ++i)
+ fields[i] *= scalar;
+ return *this;
+ }
+ SufficientStats& operator/=(const double& scalar) {
+ for (unsigned i = 0; i < fields.size(); ++i)
+ fields[i] /= scalar;
+ return *this;
+ }
+ bool operator==(const SufficientStats& other) const {
+ return other.fields == fields;
+ }
+ bool IsAdditiveIdentity() const {
+ for (unsigned i = 0; i < fields.size(); ++i)
+ if (fields[i]) return false;
+ return true;
+ }
+ size_t size() const { return fields.size(); }
+ float operator[](size_t i) const {
+ if (i < fields.size()) return fields[i];
+ return 0;
+ }
+ void Encode(std::string* out) const;
+
+ std::string id_;
+ std::vector<float> fields;
+};
+
+inline const SufficientStats operator+(const SufficientStats& a, const SufficientStats& b) {
+ SufficientStats res(a);
+ return res += b;
+}
+
+inline const SufficientStats operator-(const SufficientStats& a, const SufficientStats& b) {
+ SufficientStats res(a);
+ return res -= b;
+}
+
+struct SegmentEvaluator {
+ virtual ~SegmentEvaluator();
+ virtual void Evaluate(const std::vector<WordID>& hyp, SufficientStats* out) const = 0;
+};
+
+// Instructions for implementing a new metric
+// To Instance(), add something that creates the metric
+// Implement ComputeScore(const SufficientStats& stats) const;
+// Implement ONE of the following:
+// 1) void ComputeSufficientStatistics(const std::vector<std::vector<WordID> >& refs, SufficientStats* out) const;
+// 2) a new SegmentEvaluator class AND CreateSegmentEvaluator(const std::vector<std::vector<WordID> >& refs) const;
+// [The later (#2) is only used when it is necessary to precompute per-segment data from a set of refs]
+// OPTIONAL: Override SufficientStatisticsVectorSize() if it is easy to do so
+class EvaluationMetric {
+ public:
+ static EvaluationMetric* Instance(const std::string& metric_id = "IBM_BLEU");
+
+ protected:
+ EvaluationMetric(const std::string& id) : name_(id) {}
+ virtual ~EvaluationMetric();
+
+ public:
+ const std::string& MetricId() const { return name_; }
+
+ // returns true for metrics like WER and TER where lower scores are better
+ // false for metrics like BLEU and METEOR where higher scores are better
+ virtual bool IsErrorMetric() const;
+
+ virtual unsigned SufficientStatisticsVectorSize() const;
+ virtual float ComputeScore(const SufficientStats& stats) const = 0;
+ virtual std::string DetailedScore(const SufficientStats& stats) const;
+ virtual boost::shared_ptr<SegmentEvaluator> CreateSegmentEvaluator(const std::vector<std::vector<WordID> >& refs) const;
+ virtual void ComputeSufficientStatistics(const std::vector<WordID>& hyp,
+ const std::vector<std::vector<WordID> >& refs,
+ SufficientStats* out) const;
+
+ private:
+ static std::map<std::string, EvaluationMetric*> instances_;
+ const std::string name_;
+};
+
+#endif
+
diff --git a/mteval/ns_comb.cc b/mteval/ns_comb.cc
new file mode 100644
index 00000000..41c634cd
--- /dev/null
+++ b/mteval/ns_comb.cc
@@ -0,0 +1,87 @@
+#include "ns_comb.h"
+
+#include <iostream>
+
+#include "stringlib.h"
+
+using namespace std;
+
+// e.g. COMB:IBM_BLEU=0.5;TER=0.5
+CombinationMetric::CombinationMetric(const std::string& cmd) :
+ EvaluationMetric(cmd),
+ total_size() {
+ if (cmd.find("COMB:") != 0 || cmd.size() < 9) {
+ cerr << "Error in combination metric specifier: " << cmd << endl;
+ exit(1);
+ }
+ string mix = cmd.substr(5);
+ vector<string> comps;
+ Tokenize(cmd.substr(5), ';', &comps);
+ if(comps.size() < 2) {
+ cerr << "Error in combination metric specifier: " << cmd << endl;
+ exit(1);
+ }
+ vector<string> cwpairs;
+ for (unsigned i = 0; i < comps.size(); ++i) {
+ Tokenize(comps[i], '=', &cwpairs);
+ if (cwpairs.size() != 2) { cerr << "Error in combination metric specifier: " << cmd << endl; exit(1); }
+ metrics.push_back(EvaluationMetric::Instance(cwpairs[0]));
+ coeffs.push_back(atof(cwpairs[1].c_str()));
+ offsets.push_back(total_size);
+ total_size += metrics.back()->SufficientStatisticsVectorSize();
+ cerr << (i > 0 ? " + " : "( ") << coeffs.back() << " * " << cwpairs[0];
+ }
+ cerr << " )\n";
+}
+
+struct CombinationSegmentEvaluator : public SegmentEvaluator {
+ CombinationSegmentEvaluator(const string& id,
+ const vector<vector<WordID> >& refs,
+ const vector<EvaluationMetric*>& metrics,
+ const vector<unsigned>& offsets,
+ const unsigned ts) : id_(id), offsets_(offsets), total_size_(ts), component_evaluators_(metrics.size()) {
+ for (unsigned i = 0; i < metrics.size(); ++i)
+ component_evaluators_[i] = metrics[i]->CreateSegmentEvaluator(refs);
+ }
+ virtual void Evaluate(const std::vector<WordID>& hyp, SufficientStats* out) const {
+ out->id_ = id_;
+ out->fields.resize(total_size_);
+ for (unsigned i = 0; i < component_evaluators_.size(); ++i) {
+ SufficientStats t;
+ component_evaluators_[i]->Evaluate(hyp, &t);
+ for (unsigned j = 0; j < t.fields.size(); ++j) {
+ unsigned op = j + offsets_[i];
+ assert(op < out->fields.size());
+ out->fields[op] = t[j];
+ }
+ }
+ }
+ const string& id_;
+ const vector<unsigned>& offsets_;
+ const unsigned total_size_;
+ vector<boost::shared_ptr<SegmentEvaluator> > component_evaluators_;
+};
+
+boost::shared_ptr<SegmentEvaluator> CombinationMetric::CreateSegmentEvaluator(const std::vector<std::vector<WordID> >& refs) const {
+ boost::shared_ptr<SegmentEvaluator> res;
+ res.reset(new CombinationSegmentEvaluator(MetricId(), refs, metrics, offsets, total_size));
+ return res;
+}
+
+float CombinationMetric::ComputeScore(const SufficientStats& stats) const {
+ float tot = 0;
+ for (unsigned i = 0; i < metrics.size(); ++i) {
+ SufficientStats t;
+ unsigned next = total_size;
+ if (i + 1 < offsets.size()) next = offsets[i+1];
+ for (unsigned j = offsets[i]; j < next; ++j)
+ t.fields.push_back(stats[j]);
+ tot += metrics[i]->ComputeScore(t) * coeffs[i];
+ }
+ return tot;
+}
+
+unsigned CombinationMetric::SufficientStatisticsVectorSize() const {
+ return total_size;
+}
+
diff --git a/mteval/ns_comb.h b/mteval/ns_comb.h
new file mode 100644
index 00000000..140e7e6a
--- /dev/null
+++ b/mteval/ns_comb.h
@@ -0,0 +1,19 @@
+#ifndef _NS_COMB_H_
+#define _NS_COMB_H_
+
+#include "ns.h"
+
+class CombinationMetric : public EvaluationMetric {
+ public:
+ CombinationMetric(const std::string& cmd);
+ virtual boost::shared_ptr<SegmentEvaluator> CreateSegmentEvaluator(const std::vector<std::vector<WordID> >& refs) const;
+ virtual float ComputeScore(const SufficientStats& stats) const;
+ virtual unsigned SufficientStatisticsVectorSize() const;
+ private:
+ std::vector<EvaluationMetric*> metrics;
+ std::vector<float> coeffs;
+ std::vector<unsigned> offsets;
+ unsigned total_size;
+};
+
+#endif
diff --git a/mteval/ns_docscorer.cc b/mteval/ns_docscorer.cc
new file mode 100644
index 00000000..28a2fd09
--- /dev/null
+++ b/mteval/ns_docscorer.cc
@@ -0,0 +1,60 @@
+#include "ns_docscorer.h"
+
+#include <iostream>
+#include <cstring>
+
+#include "tdict.h"
+#include "filelib.h"
+#include "ns.h"
+
+using namespace std;
+
+DocumentScorer::~DocumentScorer() {}
+
+void DocumentScorer::Init(const EvaluationMetric* metric,
+ const vector<string>& ref_files,
+ const string& src_file,
+ bool verbose) {
+ scorers_.clear();
+ cerr << "Loading references (" << ref_files.size() << " files)\n";
+ assert(src_file.empty());
+ std::vector<ReadFile> ifs(ref_files.begin(),ref_files.end());
+ for (int i=0; i < ref_files.size(); ++i) ifs[i].Init(ref_files[i]);
+ char buf[64000];
+ bool expect_eof = false;
+ int line=0;
+ while (ifs[0].get()) {
+ vector<vector<WordID> > refs(ref_files.size());
+ for (int i=0; i < ref_files.size(); ++i) {
+ istream &in=ifs[i].get();
+ if (in.eof()) break;
+ in.getline(buf, 64000);
+ refs[i].clear();
+ if (strlen(buf) == 0) {
+ if (in.eof()) {
+ if (!expect_eof) {
+ assert(i == 0);
+ expect_eof = true;
+ }
+ break;
+ }
+ } else {
+ TD::ConvertSentence(buf, &refs[i]);
+ assert(!refs[i].empty());
+ }
+ assert(!expect_eof);
+ }
+ if (!expect_eof) {
+ string src_line;
+ //if (srcrf) {
+ // getline(srcrf.get(), src_line);
+ // map<string,string> dummy;
+ // ProcessAndStripSGML(&src_line, &dummy);
+ //}
+ scorers_.push_back(metric->CreateSegmentEvaluator(refs));
+ ++line;
+ }
+ }
+ cerr << "Loaded reference translations for " << scorers_.size() << " sentences.\n";
+}
+
diff --git a/mteval/ns_docscorer.h b/mteval/ns_docscorer.h
new file mode 100644
index 00000000..170ac627
--- /dev/null
+++ b/mteval/ns_docscorer.h
@@ -0,0 +1,31 @@
+#ifndef _NS_DOC_SCORER_H_
+#define _NS_DOC_SCORER_H_
+
+#include <vector>
+#include <string>
+#include <boost/shared_ptr.hpp>
+
+struct EvaluationMetric;
+struct SegmentEvaluator;
+class DocumentScorer {
+ public:
+ ~DocumentScorer();
+ DocumentScorer() { }
+ DocumentScorer(const EvaluationMetric* metric,
+ const std::vector<std::string>& ref_files,
+ const std::string& src_file = "",
+ bool verbose=false) {
+ Init(metric,ref_files,src_file,verbose);
+ }
+ void Init(const EvaluationMetric* metric,
+ const std::vector<std::string>& ref_files,
+ const std::string& src_file = "",
+ bool verbose=false);
+
+ int size() const { return scorers_.size(); }
+ const SegmentEvaluator* operator[](size_t i) const { return scorers_[i].get(); }
+ private:
+ std::vector<boost::shared_ptr<SegmentEvaluator> > scorers_;
+};
+
+#endif
diff --git a/mteval/ns_ext.cc b/mteval/ns_ext.cc
new file mode 100644
index 00000000..956708af
--- /dev/null
+++ b/mteval/ns_ext.cc
@@ -0,0 +1,130 @@
+#include "ns_ext.h"
+
+#include <cstdio> // popen
+#include <cstdlib>
+#include <cstring>
+#include <unistd.h>
+#include <sstream>
+#include <iostream>
+#include <cassert>
+
+#include "stringlib.h"
+#include "tdict.h"
+
+using namespace std;
+
+struct NScoreServer {
+ NScoreServer(const std::string& cmd);
+ ~NScoreServer();
+
+ float ComputeScore(const std::vector<float>& fields);
+ void Evaluate(const std::vector<std::vector<WordID> >& refs, const std::vector<WordID>& hyp, std::vector<float>* fields);
+
+ private:
+ void RequestResponse(const std::string& request, std::string* response);
+ int p2c[2];
+ int c2p[2];
+};
+
+NScoreServer::NScoreServer(const string& cmd) {
+ cerr << "Invoking " << cmd << " ..." << endl;
+ if (pipe(p2c) < 0) { perror("pipe"); exit(1); }
+ if (pipe(c2p) < 0) { perror("pipe"); exit(1); }
+ pid_t cpid = fork();
+ if (cpid < 0) { perror("fork"); exit(1); }
+ if (cpid == 0) { // child
+ close(p2c[1]);
+ close(c2p[0]);
+ dup2(p2c[0], 0);
+ close(p2c[0]);
+ dup2(c2p[1], 1);
+ close(c2p[1]);
+ cerr << "Exec'ing from child " << cmd << endl;
+ vector<string> vargs;
+ SplitOnWhitespace(cmd, &vargs);
+ const char** cargv = static_cast<const char**>(malloc(sizeof(const char*) * vargs.size()));
+ for (unsigned i = 1; i < vargs.size(); ++i) cargv[i-1] = vargs[i].c_str();
+ cargv[vargs.size() - 1] = NULL;
+ execvp(vargs[0].c_str(), (char* const*)cargv);
+ } else { // parent
+ close(c2p[1]);
+ close(p2c[0]);
+ }
+ string dummy;
+ RequestResponse("SCORE ||| Reference initialization string . ||| Testing initialization string .", &dummy);
+ assert(dummy.size() > 0);
+ cerr << "Connection established.\n";
+}
+
+NScoreServer::~NScoreServer() {
+ // TODO close stuff, join stuff
+}
+
+float NScoreServer::ComputeScore(const vector<float>& fields) {
+ ostringstream os;
+ os << "EVAL |||";
+ for (unsigned i = 0; i < fields.size(); ++i)
+ os << ' ' << fields[i];
+ string sres;
+ RequestResponse(os.str(), &sres);
+ return strtod(sres.c_str(), NULL);
+}
+
+void NScoreServer::Evaluate(const vector<vector<WordID> >& refs, const vector<WordID>& hyp, vector<float>* fields) {
+ ostringstream os;
+ os << "SCORE";
+ for (unsigned i = 0; i < refs.size(); ++i) {
+ os << " |||";
+ for (unsigned j = 0; j < refs[i].size(); ++j) {
+ os << ' ' << TD::Convert(refs[i][j]);
+ }
+ }
+ os << " |||";
+ for (unsigned i = 0; i < hyp.size(); ++i) {
+ os << ' ' << TD::Convert(hyp[i]);
+ }
+ string sres;
+ RequestResponse(os.str(), &sres);
+ istringstream is(sres);
+ float val;
+ fields->clear();
+ while(is >> val)
+ fields->push_back(val);
+}
+
+#define MAX_BUF 16000
+
+void NScoreServer::RequestResponse(const string& request, string* response) {
+// cerr << "@SERVER: " << request << endl;
+ string x = request + "\n";
+ write(p2c[1], x.c_str(), x.size());
+ char buf[MAX_BUF];
+ size_t n = read(c2p[0], buf, MAX_BUF);
+ while (n < MAX_BUF && buf[n-1] != '\n')
+ n += read(c2p[0], &buf[n], MAX_BUF - n);
+
+ buf[n-1] = 0;
+ if (n < 2) {
+ cerr << "Malformed response: " << buf << endl;
+ }
+ *response = Trim(buf, " \t\n");
+// cerr << "@RESPONSE: '" << *response << "'\n";
+}
+
+void ExternalMetric::ComputeSufficientStatistics(const std::vector<WordID>& hyp,
+ const std::vector<std::vector<WordID> >& refs,
+ SufficientStats* out) const {
+ eval_server->Evaluate(refs, hyp, &out->fields);
+}
+
+float ExternalMetric::ComputeScore(const SufficientStats& stats) const {
+ eval_server->ComputeScore(stats.fields);
+}
+
+ExternalMetric::ExternalMetric(const string& metric_name, const std::string& command) :
+ EvaluationMetric(metric_name),
+ eval_server(new NScoreServer(command)) {}
+
+ExternalMetric::~ExternalMetric() {
+ delete eval_server;
+}
diff --git a/mteval/ns_ext.h b/mteval/ns_ext.h
new file mode 100644
index 00000000..78badb2e
--- /dev/null
+++ b/mteval/ns_ext.h
@@ -0,0 +1,21 @@
+#ifndef _NS_EXTERNAL_SCORER_H_
+#define _NS_EXTERNAL_SCORER_H_
+
+#include "ns.h"
+
+struct NScoreServer;
+class ExternalMetric : public EvaluationMetric {
+ public:
+ ExternalMetric(const std::string& metricid, const std::string& command);
+ ~ExternalMetric();
+
+ virtual void ComputeSufficientStatistics(const std::vector<WordID>& hyp,
+ const std::vector<std::vector<WordID> >& refs,
+ SufficientStats* out) const;
+ virtual float ComputeScore(const SufficientStats& stats) const;
+
+ protected:
+ NScoreServer* eval_server;
+};
+
+#endif
diff --git a/mteval/ns_ter.cc b/mteval/ns_ter.cc
new file mode 100644
index 00000000..0e1008db
--- /dev/null
+++ b/mteval/ns_ter.cc
@@ -0,0 +1,492 @@
+#include "ns_ter.h"
+
+#include <cstdio>
+#include <cassert>
+#include <iostream>
+#include <limits>
+#include <tr1/unordered_map>
+#include <set>
+#include <boost/functional/hash.hpp>
+#include "tdict.h"
+
+static const bool ter_use_average_ref_len = true;
+static const int ter_short_circuit_long_sentences = -1;
+
+static const unsigned kINSERTIONS = 0;
+static const unsigned kDELETIONS = 1;
+static const unsigned kSUBSTITUTIONS = 2;
+static const unsigned kSHIFTS = 3;
+static const unsigned kREF_WORDCOUNT = 4;
+static const unsigned kDUMMY_LAST_ENTRY = 5;
+
+using namespace std;
+using namespace std::tr1;
+
+bool TERMetric::IsErrorMetric() const {
+ return true;
+}
+
+namespace NewScorer {
+
+struct COSTS {
+ static const float substitution;
+ static const float deletion;
+ static const float insertion;
+ static const float shift;
+};
+const float COSTS::substitution = 1.0f;
+const float COSTS::deletion = 1.0f;
+const float COSTS::insertion = 1.0f;
+const float COSTS::shift = 1.0f;
+
+static const int MAX_SHIFT_SIZE = 10;
+static const int MAX_SHIFT_DIST = 50;
+
+struct Shift {
+ unsigned int d_;
+ Shift() : d_() {}
+ Shift(int b, int e, int m) : d_() {
+ begin(b);
+ end(e);
+ moveto(m);
+ }
+ inline int begin() const {
+ return d_ & 0x3ff;
+ }
+ inline int end() const {
+ return (d_ >> 10) & 0x3ff;
+ }
+ inline int moveto() const {
+ int m = (d_ >> 20) & 0x7ff;
+ if (m > 1024) { m -= 1024; m *= -1; }
+ return m;
+ }
+ inline void begin(int b) {
+ d_ &= 0xfffffc00u;
+ d_ |= (b & 0x3ff);
+ }
+ inline void end(int e) {
+ d_ &= 0xfff003ffu;
+ d_ |= (e & 0x3ff) << 10;
+ }
+ inline void moveto(int m) {
+ bool neg = (m < 0);
+ if (neg) { m *= -1; m += 1024; }
+ d_ &= 0xfffff;
+ d_ |= (m & 0x7ff) << 20;
+ }
+};
+
+class TERScorerImpl {
+
+ public:
+ enum TransType { MATCH, SUBSTITUTION, INSERTION, DELETION };
+
+ explicit TERScorerImpl(const vector<WordID>& ref) : ref_(ref) {
+ for (unsigned i = 0; i < ref.size(); ++i)
+ rwexists_.insert(ref[i]);
+ }
+
+ float Calculate(const vector<WordID>& hyp, int* subs, int* ins, int* dels, int* shifts) const {
+ return CalculateAllShifts(hyp, subs, ins, dels, shifts);
+ }
+
+ inline int GetRefLength() const {
+ return ref_.size();
+ }
+
+ private:
+ const vector<WordID>& ref_;
+ set<WordID> rwexists_;
+
+ typedef unordered_map<vector<WordID>, set<int>, boost::hash<vector<WordID> > > NgramToIntsMap;
+ mutable NgramToIntsMap nmap_;
+
+ static float MinimumEditDistance(
+ const vector<WordID>& hyp,
+ const vector<WordID>& ref,
+ vector<TransType>* path) {
+ vector<vector<TransType> > bmat(hyp.size() + 1, vector<TransType>(ref.size() + 1, MATCH));
+ vector<vector<float> > cmat(hyp.size() + 1, vector<float>(ref.size() + 1, 0));
+ for (int i = 0; i <= hyp.size(); ++i)
+ cmat[i][0] = i;
+ for (int j = 0; j <= ref.size(); ++j)
+ cmat[0][j] = j;
+ for (int i = 1; i <= hyp.size(); ++i) {
+ const WordID& hw = hyp[i-1];
+ for (int j = 1; j <= ref.size(); ++j) {
+ const WordID& rw = ref[j-1];
+ float& cur_c = cmat[i][j];
+ TransType& cur_b = bmat[i][j];
+
+ if (rw == hw) {
+ cur_c = cmat[i-1][j-1];
+ cur_b = MATCH;
+ } else {
+ cur_c = cmat[i-1][j-1] + COSTS::substitution;
+ cur_b = SUBSTITUTION;
+ }
+ float cwoi = cmat[i-1][j];
+ if (cur_c > cwoi + COSTS::insertion) {
+ cur_c = cwoi + COSTS::insertion;
+ cur_b = INSERTION;
+ }
+ float cwod = cmat[i][j-1];
+ if (cur_c > cwod + COSTS::deletion) {
+ cur_c = cwod + COSTS::deletion;
+ cur_b = DELETION;
+ }
+ }
+ }
+
+ // trace back along the best path and record the transition types
+ path->clear();
+ int i = hyp.size();
+ int j = ref.size();
+ while (i > 0 || j > 0) {
+ if (j == 0) {
+ --i;
+ path->push_back(INSERTION);
+ } else if (i == 0) {
+ --j;
+ path->push_back(DELETION);
+ } else {
+ TransType t = bmat[i][j];
+ path->push_back(t);
+ switch (t) {
+ case SUBSTITUTION:
+ case MATCH:
+ --i; --j; break;
+ case INSERTION:
+ --i; break;
+ case DELETION:
+ --j; break;
+ }
+ }
+ }
+ reverse(path->begin(), path->end());
+ return cmat[hyp.size()][ref.size()];
+ }
+
+ void BuildWordMatches(const vector<WordID>& hyp, NgramToIntsMap* nmap) const {
+ nmap->clear();
+ set<WordID> exists_both;
+ for (int i = 0; i < hyp.size(); ++i)
+ if (rwexists_.find(hyp[i]) != rwexists_.end())
+ exists_both.insert(hyp[i]);
+ for (int start=0; start<ref_.size(); ++start) {
+ if (exists_both.find(ref_[start]) == exists_both.end()) continue;
+ vector<WordID> cp;
+ int mlen = min(MAX_SHIFT_SIZE, static_cast<int>(ref_.size() - start));
+ for (int len=0; len<mlen; ++len) {
+ if (len && exists_both.find(ref_[start + len]) == exists_both.end()) break;
+ cp.push_back(ref_[start + len]);
+ (*nmap)[cp].insert(start);
+ }
+ }
+ }
+
+ static void PerformShift(const vector<WordID>& in,
+ int start, int end, int moveto, vector<WordID>* out) {
+ // cerr << "ps: " << start << " " << end << " " << moveto << endl;
+ out->clear();
+ if (moveto == -1) {
+ for (int i = start; i <= end; ++i)
+ out->push_back(in[i]);
+ for (int i = 0; i < start; ++i)
+ out->push_back(in[i]);
+ for (int i = end+1; i < in.size(); ++i)
+ out->push_back(in[i]);
+ } else if (moveto < start) {
+ for (int i = 0; i <= moveto; ++i)
+ out->push_back(in[i]);
+ for (int i = start; i <= end; ++i)
+ out->push_back(in[i]);
+ for (int i = moveto+1; i < start; ++i)
+ out->push_back(in[i]);
+ for (int i = end+1; i < in.size(); ++i)
+ out->push_back(in[i]);
+ } else if (moveto > end) {
+ for (int i = 0; i < start; ++i)
+ out->push_back(in[i]);
+ for (int i = end+1; i <= moveto; ++i)
+ out->push_back(in[i]);
+ for (int i = start; i <= end; ++i)
+ out->push_back(in[i]);
+ for (int i = moveto+1; i < in.size(); ++i)
+ out->push_back(in[i]);
+ } else {
+ for (int i = 0; i < start; ++i)
+ out->push_back(in[i]);
+ for (int i = end+1; (i < in.size()) && (i <= end + (moveto - start)); ++i)
+ out->push_back(in[i]);
+ for (int i = start; i <= end; ++i)
+ out->push_back(in[i]);
+ for (int i = (end + (moveto - start))+1; i < in.size(); ++i)
+ out->push_back(in[i]);
+ }
+ if (out->size() != in.size()) {
+ cerr << "ps: " << start << " " << end << " " << moveto << endl;
+ cerr << "in=" << TD::GetString(in) << endl;
+ cerr << "out=" << TD::GetString(*out) << endl;
+ }
+ assert(out->size() == in.size());
+ // cerr << "ps: " << TD::GetString(*out) << endl;
+ }
+
+ void GetAllPossibleShifts(const vector<WordID>& hyp,
+ const vector<int>& ralign,
+ const vector<bool>& herr,
+ const vector<bool>& rerr,
+ const int min_size,
+ vector<vector<Shift> >* shifts) const {
+ for (int start = 0; start < hyp.size(); ++start) {
+ vector<WordID> cp(1, hyp[start]);
+ NgramToIntsMap::iterator niter = nmap_.find(cp);
+ if (niter == nmap_.end()) continue;
+ bool ok = false;
+ int moveto;
+ for (set<int>::iterator i = niter->second.begin(); i != niter->second.end(); ++i) {
+ moveto = *i;
+ int rm = ralign[moveto];
+ ok = (start != rm &&
+ (rm - start) < MAX_SHIFT_DIST &&
+ (start - rm - 1) < MAX_SHIFT_DIST);
+ if (ok) break;
+ }
+ if (!ok) continue;
+ cp.clear();
+ for (int end = start + min_size - 1;
+ ok && end < hyp.size() && end < (start + MAX_SHIFT_SIZE); ++end) {
+ cp.push_back(hyp[end]);
+ vector<Shift>& sshifts = (*shifts)[end - start];
+ ok = false;
+ NgramToIntsMap::iterator niter = nmap_.find(cp);
+ if (niter == nmap_.end()) break;
+ bool any_herr = false;
+ for (int i = start; i <= end && !any_herr; ++i)
+ any_herr = herr[i];
+ if (!any_herr) {
+ ok = true;
+ continue;
+ }
+ for (set<int>::iterator mi = niter->second.begin();
+ mi != niter->second.end(); ++mi) {
+ int moveto = *mi;
+ int rm = ralign[moveto];
+ if (! ((rm != start) &&
+ ((rm < start) || (rm > end)) &&
+ (rm - start <= MAX_SHIFT_DIST) &&
+ ((start - rm - 1) <= MAX_SHIFT_DIST))) continue;
+ ok = true;
+ bool any_rerr = false;
+ for (int i = 0; (i <= end - start) && (!any_rerr); ++i)
+ any_rerr = rerr[moveto+i];
+ if (!any_rerr) continue;
+ for (int roff = 0; roff <= (end - start); ++roff) {
+ int rmr = ralign[moveto+roff];
+ if ((start != rmr) && ((roff == 0) || (rmr != ralign[moveto])))
+ sshifts.push_back(Shift(start, end, moveto + roff));
+ }
+ }
+ }
+ }
+ }
+
+ bool CalculateBestShift(const vector<WordID>& cur,
+ const vector<WordID>& hyp,
+ float curerr,
+ const vector<TransType>& path,
+ vector<WordID>* new_hyp,
+ float* newerr,
+ vector<TransType>* new_path) const {
+ vector<bool> herr, rerr;
+ vector<int> ralign;
+ int hpos = -1;
+ for (int i = 0; i < path.size(); ++i) {
+ switch (path[i]) {
+ case MATCH:
+ ++hpos;
+ herr.push_back(false);
+ rerr.push_back(false);
+ ralign.push_back(hpos);
+ break;
+ case SUBSTITUTION:
+ ++hpos;
+ herr.push_back(true);
+ rerr.push_back(true);
+ ralign.push_back(hpos);
+ break;
+ case INSERTION:
+ ++hpos;
+ herr.push_back(true);
+ break;
+ case DELETION:
+ rerr.push_back(true);
+ ralign.push_back(hpos);
+ break;
+ }
+ }
+#if 0
+ cerr << "RALIGN: ";
+ for (int i = 0; i < rerr.size(); ++i)
+ cerr << ralign[i] << " ";
+ cerr << endl;
+ cerr << "RERR: ";
+ for (int i = 0; i < rerr.size(); ++i)
+ cerr << (bool)rerr[i] << " ";
+ cerr << endl;
+ cerr << "HERR: ";
+ for (int i = 0; i < herr.size(); ++i)
+ cerr << (bool)herr[i] << " ";
+ cerr << endl;
+#endif
+
+ vector<vector<Shift> > shifts(MAX_SHIFT_SIZE + 1);
+ GetAllPossibleShifts(cur, ralign, herr, rerr, 1, &shifts);
+ float cur_best_shift_cost = 0;
+ *newerr = curerr;
+ vector<TransType> cur_best_path;
+ vector<WordID> cur_best_hyp;
+
+ bool res = false;
+ for (int i = shifts.size() - 1; i >=0; --i) {
+ float curfix = curerr - (cur_best_shift_cost + *newerr);
+ float maxfix = 2.0f * (1 + i) - COSTS::shift;
+ if ((curfix > maxfix) || ((cur_best_shift_cost == 0) && (curfix == maxfix))) break;
+ for (int j = 0; j < shifts[i].size(); ++j) {
+ const Shift& s = shifts[i][j];
+ curfix = curerr - (cur_best_shift_cost + *newerr);
+ maxfix = 2.0f * (1 + i) - COSTS::shift; // TODO remove?
+ if ((curfix > maxfix) || ((cur_best_shift_cost == 0) && (curfix == maxfix))) continue;
+ vector<WordID> shifted(cur.size());
+ PerformShift(cur, s.begin(), s.end(), ralign[s.moveto()], &shifted);
+ vector<TransType> try_path;
+ float try_cost = MinimumEditDistance(shifted, ref_, &try_path);
+ float gain = (*newerr + cur_best_shift_cost) - (try_cost + COSTS::shift);
+ if (gain > 0.0f || ((cur_best_shift_cost == 0.0f) && (gain == 0.0f))) {
+ *newerr = try_cost;
+ cur_best_shift_cost = COSTS::shift;
+ new_path->swap(try_path);
+ new_hyp->swap(shifted);
+ res = true;
+ // cerr << "Found better shift " << s.begin() << "..." << s.end() << " moveto " << s.moveto() << endl;
+ }
+ }
+ }
+
+ return res;
+ }
+
+ static void GetPathStats(const vector<TransType>& path, int* subs, int* ins, int* dels) {
+ *subs = *ins = *dels = 0;
+ for (int i = 0; i < path.size(); ++i) {
+ switch (path[i]) {
+ case SUBSTITUTION:
+ ++(*subs);
+ case MATCH:
+ break;
+ case INSERTION:
+ ++(*ins); break;
+ case DELETION:
+ ++(*dels); break;
+ }
+ }
+ }
+
+ float CalculateAllShifts(const vector<WordID>& hyp,
+ int* subs, int* ins, int* dels, int* shifts) const {
+ BuildWordMatches(hyp, &nmap_);
+ vector<TransType> path;
+ float med_cost = MinimumEditDistance(hyp, ref_, &path);
+ float edits = 0;
+ vector<WordID> cur = hyp;
+ *shifts = 0;
+ if (ter_short_circuit_long_sentences < 0 ||
+ ref_.size() < ter_short_circuit_long_sentences) {
+ while (true) {
+ vector<WordID> new_hyp;
+ vector<TransType> new_path;
+ float new_med_cost;
+ if (!CalculateBestShift(cur, hyp, med_cost, path, &new_hyp, &new_med_cost, &new_path))
+ break;
+ edits += COSTS::shift;
+ ++(*shifts);
+ med_cost = new_med_cost;
+ path.swap(new_path);
+ cur.swap(new_hyp);
+ }
+ }
+ GetPathStats(path, subs, ins, dels);
+ return med_cost + edits;
+ }
+};
+
+#if 0
+void TERScore::ScoreDetails(std::string* details) const {
+ char buf[200];
+ sprintf(buf, "TER = %.2f, %3d|%3d|%3d|%3d (len=%d)",
+ ComputeScore() * 100.0f,
+ stats[kINSERTIONS],
+ stats[kDELETIONS],
+ stats[kSUBSTITUTIONS],
+ stats[kSHIFTS],
+ stats[kREF_WORDCOUNT]);
+ *details = buf;
+}
+#endif
+
+} // namespace NewScorer
+
+void TERMetric::ComputeSufficientStatistics(const vector<WordID>& hyp,
+ const vector<vector<WordID> >& refs,
+ SufficientStats* out) const {
+ out->fields.resize(kDUMMY_LAST_ENTRY);
+ float best_score = numeric_limits<float>::max();
+ unsigned avg_len = 0;
+ for (int i = 0; i < refs.size(); ++i)
+ avg_len += refs[i].size();
+ avg_len /= refs.size();
+
+ for (int i = 0; i < refs.size(); ++i) {
+ int subs, ins, dels, shifts;
+ NewScorer::TERScorerImpl ter(refs[i]);
+ float score = ter.Calculate(hyp, &subs, &ins, &dels, &shifts);
+ // cerr << "Component TER cost: " << score << endl;
+ if (score < best_score) {
+ out->fields[kINSERTIONS] = ins;
+ out->fields[kDELETIONS] = dels;
+ out->fields[kSUBSTITUTIONS] = subs;
+ out->fields[kSHIFTS] = shifts;
+ if (ter_use_average_ref_len) {
+ out->fields[kREF_WORDCOUNT] = avg_len;
+ } else {
+ out->fields[kREF_WORDCOUNT] = refs[i].size();
+ }
+
+ best_score = score;
+ }
+ }
+}
+
+unsigned TERMetric::SufficientStatisticsVectorSize() const {
+ return kDUMMY_LAST_ENTRY;
+}
+
+float TERMetric::ComputeScore(const SufficientStats& stats) const {
+ float edits = static_cast<float>(stats[kINSERTIONS] + stats[kDELETIONS] + stats[kSUBSTITUTIONS] + stats[kSHIFTS]);
+ return edits / static_cast<float>(stats[kREF_WORDCOUNT]);
+}
+
+string TERMetric::DetailedScore(const SufficientStats& stats) const {
+ char buf[200];
+ sprintf(buf, "TER = %.2f, %3.f|%3.f|%3.f|%3.f (len=%3.f)",
+ ComputeScore(stats) * 100.0f,
+ stats[kINSERTIONS],
+ stats[kDELETIONS],
+ stats[kSUBSTITUTIONS],
+ stats[kSHIFTS],
+ stats[kREF_WORDCOUNT]);
+ return buf;
+}
+
diff --git a/mteval/ns_ter.h b/mteval/ns_ter.h
new file mode 100644
index 00000000..c5c25413
--- /dev/null
+++ b/mteval/ns_ter.h
@@ -0,0 +1,21 @@
+#ifndef _NS_TER_H_
+#define _NS_TER_H_
+
+#include "ns.h"
+
+class TERMetric : public EvaluationMetric {
+ friend class EvaluationMetric;
+ protected:
+ TERMetric() : EvaluationMetric("TER") {}
+
+ public:
+ virtual bool IsErrorMetric() const;
+ virtual unsigned SufficientStatisticsVectorSize() const;
+ virtual std::string DetailedScore(const SufficientStats& stats) const;
+ virtual void ComputeSufficientStatistics(const std::vector<WordID>& hyp,
+ const std::vector<std::vector<WordID> >& refs,
+ SufficientStats* out) const;
+ virtual float ComputeScore(const SufficientStats& stats) const;
+};
+
+#endif
diff --git a/mteval/scorer_test.cc b/mteval/scorer_test.cc
index a07a8c4b..73159557 100644
--- a/mteval/scorer_test.cc
+++ b/mteval/scorer_test.cc
@@ -3,9 +3,11 @@
#include <valarray>
#include <gtest/gtest.h>
+#include "ns.h"
#include "tdict.h"
#include "scorer.h"
#include "aer_scorer.h"
+#include "kernel_string_subseq.h"
using namespace std;
@@ -175,6 +177,52 @@ TEST_F(ScorerTest, AERTest) {
EXPECT_EQ(d2, details);
}
+TEST_F(ScorerTest, Kernel) {
+ for (int i = 1; i < 10; ++i) {
+ const float l = (i / 10.0);
+ float f = ssk<4>(refs0[0], hyp1, l) +
+ ssk<4>(refs0[1], hyp1, l) +
+ ssk<4>(refs0[2], hyp1, l) +
+ ssk<4>(refs0[3], hyp1, l);
+ float f2= ssk<4>(refs1[0], hyp2, l) +
+ ssk<4>(refs1[1], hyp2, l) +
+ ssk<4>(refs1[2], hyp2, l) +
+ ssk<4>(refs1[3], hyp2, l);
+ f /= 4;
+ f2 /= 4;
+ float f3= ssk<4>(refs0[0], hyp2, l) +
+ ssk<4>(refs0[1], hyp2, l) +
+ ssk<4>(refs0[2], hyp2, l) +
+ ssk<4>(refs0[3], hyp2, l);
+ float f4= ssk<4>(refs1[0], hyp1, l) +
+ ssk<4>(refs1[1], hyp1, l) +
+ ssk<4>(refs1[2], hyp1, l) +
+ ssk<4>(refs1[3], hyp1, l);
+ f3 += f4;
+ f3 /= 8;
+ cerr << "LAMBDA=" << l << "\t" << f << " " << f2 << "\tf=" << ((f + f2)/2 - f3) << " (bad=" << f3 << ")" << endl;
+ }
+}
+
+TEST_F(ScorerTest, NewScoreAPI) {
+ //EvaluationMetric* metric = EvaluationMetric::Instance("IBM_BLEU");
+ //EvaluationMetric* metric = EvaluationMetric::Instance("METEOR");
+ EvaluationMetric* metric = EvaluationMetric::Instance("COMB:IBM_BLEU=0.5;TER=-0.5");
+ boost::shared_ptr<SegmentEvaluator> e1 = metric->CreateSegmentEvaluator(refs0);
+ boost::shared_ptr<SegmentEvaluator> e2 = metric->CreateSegmentEvaluator(refs1);
+ SufficientStats stats1;
+ e1->Evaluate(hyp1, &stats1);
+ SufficientStats stats2;
+ e2->Evaluate(hyp2, &stats2);
+ stats1 += stats2;
+ string ss;
+ stats1.Encode(&ss);
+ cerr << "SS: " << ss << endl;
+ cerr << metric->ComputeScore(stats1) << endl;
+ //SufficientStats statse("IBM_BLEU 53 32 18 11 65 63 61 59 65 72");
+ //cerr << metric->ComputeScore(statse) << endl;
+}
+
int main(int argc, char **argv) {
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();