summaryrefslogtreecommitdiff
path: root/mteval
diff options
context:
space:
mode:
Diffstat (limited to 'mteval')
-rw-r--r--mteval/Jamfile8
-rw-r--r--mteval/Makefile.am37
-rw-r--r--mteval/mbr_kbest.cc38
-rw-r--r--mteval/meteor_jar.cc.in3
-rw-r--r--mteval/ns.cc42
-rw-r--r--mteval/ns_docscorer.cc26
-rw-r--r--mteval/ns_docscorer.h12
-rw-r--r--mteval/ns_ssk.cc32
-rw-r--r--mteval/ns_ssk.h22
-rw-r--r--mteval/scorer_test.cc2
10 files changed, 179 insertions, 43 deletions
diff --git a/mteval/Jamfile b/mteval/Jamfile
deleted file mode 100644
index 3ed2c2cc..00000000
--- a/mteval/Jamfile
+++ /dev/null
@@ -1,8 +0,0 @@
-import testing ;
-
-lib mteval : ter.cc comb_scorer.cc aer_scorer.cc scorer.cc external_scorer.cc ns.cc ns_ter.cc ns_ext.cc ns_comb.cc ns_docscorer.cc ns_cer.cc ..//utils : <include>. : : <include>. <library>..//z ;
-exe fast_score : fast_score.cc mteval ..//utils ..//boost_program_options ;
-exe mbr_kbest : mbr_kbest.cc mteval ..//utils ..//boost_program_options ;
-alias programs : fast_score mbr_kbest ;
-
-unit-test scorer_test : scorer_test.cc mteval ..//utils ..//z ..//boost_unit_test_framework : <testing.arg>$(TOP)/mteval/test_data ;
diff --git a/mteval/Makefile.am b/mteval/Makefile.am
index 22550c99..681e798e 100644
--- a/mteval/Makefile.am
+++ b/mteval/Makefile.am
@@ -8,15 +8,42 @@ TESTS = scorer_test
noinst_LIBRARIES = libmteval.a
-libmteval_a_SOURCES = ter.cc comb_scorer.cc aer_scorer.cc scorer.cc external_scorer.cc ns.cc ns_ter.cc ns_ext.cc ns_comb.cc ns_docscorer.cc ns_cer.cc
+EXTRA_DIST = test_data
+
+libmteval_a_SOURCES = \
+ aer_scorer.h \
+ comb_scorer.h \
+ external_scorer.h \
+ ns.h \
+ ns_cer.h \
+ ns_comb.h \
+ ns_docscorer.h \
+ ns_ext.h \
+ ns_ssk.h \
+ ns_ter.h \
+ scorer.h \
+ ter.h \
+ aer_scorer.cc \
+ comb_scorer.cc \
+ external_scorer.cc \
+ meteor_jar.cc \
+ ns.cc \
+ ns_cer.cc \
+ ns_comb.cc \
+ ns_docscorer.cc \
+ ns_ext.cc \
+ ns_ssk.cc \
+ ns_ter.cc \
+ scorer.cc \
+ ter.cc
fast_score_SOURCES = fast_score.cc
-fast_score_LDADD = libmteval.a $(top_srcdir)/utils/libutils.a -lz
+fast_score_LDADD = libmteval.a ../utils/libutils.a
mbr_kbest_SOURCES = mbr_kbest.cc
-mbr_kbest_LDADD = libmteval.a $(top_srcdir)/utils/libutils.a -lz
+mbr_kbest_LDADD = libmteval.a ../utils/libutils.a
scorer_test_SOURCES = scorer_test.cc
-scorer_test_LDADD = libmteval.a $(top_srcdir)/utils/libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz
+scorer_test_LDADD = libmteval.a ../utils/libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS)
-AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils
+AM_CPPFLAGS = -DTEST_DATA=\"$(top_srcdir)/mteval/test_data\" -DBOOST_TEST_DYN_LINK -W -Wall -Wno-sign-compare -I$(top_srcdir) -I$(top_srcdir)/utils
diff --git a/mteval/mbr_kbest.cc b/mteval/mbr_kbest.cc
index 2bd31566..2519bc01 100644
--- a/mteval/mbr_kbest.cc
+++ b/mteval/mbr_kbest.cc
@@ -1,7 +1,9 @@
#include <iostream>
#include <vector>
+#include <tr1/unordered_map>
#include <boost/program_options.hpp>
+#include <boost/functional/hash.hpp>
#include "prob.h"
#include "tdict.h"
@@ -10,6 +12,7 @@
#include "stringlib.h"
using namespace std;
+using namespace std::tr1;
namespace po = boost::program_options;
@@ -31,27 +34,33 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
}
}
+struct ScoreComparer {
+ bool operator()(const pair<vector<WordID>, prob_t>& a, const pair<vector<WordID>, prob_t>& b) const {
+ return a.second > b.second;
+ }
+};
+
struct LossComparer {
bool operator()(const pair<vector<WordID>, prob_t>& a, const pair<vector<WordID>, prob_t>& b) const {
return a.second < b.second;
}
};
-bool ReadKBestList(istream* in, string* sent_id, vector<pair<vector<WordID>, prob_t> >* list) {
+bool ReadKBestList(const double mbr_scale, istream* in, string* sent_id, vector<pair<vector<WordID>, prob_t> >* list) {
static string cache_id;
static pair<vector<WordID>, prob_t> cache_pair;
list->clear();
string cur_id;
+ unordered_map<vector<WordID>, unsigned, boost::hash<vector<WordID> > > sent2id;
if (cache_pair.first.size() > 0) {
list->push_back(cache_pair);
+ sent2id[cache_pair.first] = 0;
cur_id = cache_id;
cache_pair.first.clear();
}
string line;
string tstr;
- while(*in) {
- getline(*in, line);
- if (line.empty()) continue;
+ while(getline(*in, line)) {
size_t p1 = line.find(" ||| ");
if (p1 == string::npos) { cerr << "Bad format: " << line << endl; abort(); }
size_t p2 = line.find(" ||| ", p1 + 4);
@@ -59,16 +68,25 @@ bool ReadKBestList(istream* in, string* sent_id, vector<pair<vector<WordID>, pro
size_t p3 = line.rfind(" ||| ");
cache_id = line.substr(0, p1);
tstr = line.substr(p1 + 5, p2 - p1 - 5);
- double val = strtod(line.substr(p3 + 5).c_str(), NULL);
+ double val = strtod(line.substr(p3 + 5).c_str(), NULL) * mbr_scale;
TD::ConvertSentence(tstr, &cache_pair.first);
cache_pair.second.logeq(val);
if (cur_id.empty()) cur_id = cache_id;
if (cur_id == cache_id) {
- list->push_back(cache_pair);
+ unordered_map<vector<WordID>, unsigned, boost::hash<vector<WordID> > >::iterator it =
+ sent2id.find(cache_pair.first);
+ if (it == sent2id.end()) {
+ sent2id.insert(make_pair(cache_pair.first, unsigned(list->size())));
+ list->push_back(cache_pair);
+ } else {
+ (*list)[it->second].second += cache_pair.second;
+ // cerr << "Cruch: " << line << "\n newp=" << (*list)[it->second].second << endl;
+ }
*sent_id = cur_id;
cache_pair.first.clear();
} else { break; }
}
+ sort(list->begin(), list->end(), ScoreComparer());
return !list->empty();
}
@@ -87,14 +105,14 @@ int main(int argc, char** argv) {
vector<pair<vector<WordID>, prob_t> > list;
ReadFile rf(file);
string sent_id;
- while(ReadKBestList(rf.stream(), &sent_id, &list)) {
+ while(ReadKBestList(mbr_scale, rf.stream(), &sent_id, &list)) {
vector<prob_t> joints(list.size());
- const prob_t max_score = pow(list.front().second, mbr_scale);
+ const prob_t max_score = list.front().second;
prob_t marginal = prob_t::Zero();
for (int i = 0 ; i < list.size(); ++i) {
- const prob_t joint = pow(list[i].second, mbr_scale) / max_score;
+ const prob_t joint = list[i].second / max_score;
joints[i] = joint;
- // cerr << "list[" << i << "] joint=" << log(joint) << endl;
+ //cerr << "list[" << i << "] joint=" << log(joint) << endl;
marginal += joint;
}
int mbr_idx = -1;
diff --git a/mteval/meteor_jar.cc.in b/mteval/meteor_jar.cc.in
new file mode 100644
index 00000000..fe45a72a
--- /dev/null
+++ b/mteval/meteor_jar.cc.in
@@ -0,0 +1,3 @@
+
+const char* meteor_jar_path = "@METEOR_JAR@";
+
diff --git a/mteval/ns.cc b/mteval/ns.cc
index 3af7cc63..b64d4798 100644
--- a/mteval/ns.cc
+++ b/mteval/ns.cc
@@ -3,6 +3,7 @@
#include "ns_ext.h"
#include "ns_comb.h"
#include "ns_cer.h"
+#include "ns_ssk.h"
#include <cstdio>
#include <cassert>
@@ -12,12 +13,15 @@
#include <sstream>
#include "tdict.h"
+#include "filelib.h"
#include "stringlib.h"
using namespace std;
map<string, EvaluationMetric*> EvaluationMetric::instances_;
+extern const char* meteor_jar_path;
+
SegmentEvaluator::~SegmentEvaluator() {}
EvaluationMetric::~EvaluationMetric() {}
@@ -57,7 +61,7 @@ string EvaluationMetric::DetailedScore(const SufficientStats& stats) const {
return os.str();
}
-enum BleuType { IBM, Koehn, NIST };
+enum BleuType { IBM, Koehn, NIST, QCRI };
template <unsigned int N = 4u, BleuType BrevityType = IBM>
struct BleuSegmentEvaluator : public SegmentEvaluator {
BleuSegmentEvaluator(const vector<vector<WordID> >& refs, const EvaluationMetric* em) : evaluation_metric(em) {
@@ -87,7 +91,7 @@ struct BleuSegmentEvaluator : public SegmentEvaluator {
float& ref_len = out->fields[2*N + 1];
hyp_len = hyp.size();
ref_len = lengths_[0];
- if (lengths_.size() > 1 && BrevityType == IBM) {
+ if (lengths_.size() > 1 && (BrevityType == IBM || BrevityType == QCRI)) {
float bestd = 2000000;
float hl = hyp.size();
float bl = -1;
@@ -182,7 +186,7 @@ struct BleuSegmentEvaluator : public SegmentEvaluator {
template <unsigned int N = 4u, BleuType BrevityType = IBM>
struct BleuMetric : public EvaluationMetric {
- BleuMetric() : EvaluationMetric(BrevityType == IBM ? "IBM_BLEU" : (BrevityType == Koehn ? "KOEHN_BLEU" : "NIST_BLEU")) {}
+ BleuMetric() : EvaluationMetric(BrevityType == IBM ? "IBM_BLEU" : (BrevityType == Koehn ? "KOEHN_BLEU" : (BrevityType == NIST ? "NIST_BLEU" : "QCRI_BLEU"))) {}
unsigned SufficientStatisticsVectorSize() const { return N*2 + 2; }
boost::shared_ptr<SegmentEvaluator> CreateSegmentEvaluator(const vector<vector<WordID> >& refs) const {
return boost::shared_ptr<SegmentEvaluator>(new BleuSegmentEvaluator<N,BrevityType>(refs, this));
@@ -190,26 +194,37 @@ struct BleuMetric : public EvaluationMetric {
float ComputeBreakdown(const SufficientStats& stats, float* bp, vector<float>* out) const {
if (out) { out->clear(); }
float log_bleu = 0;
+ float log_bleu_adj = 0; // for QCRI
int count = 0;
+ float alpha = BrevityType == QCRI ? 1 : 0.01;
for (int i = 0; i < N; ++i) {
if (stats.fields[i+N] > 0) {
float cor_count = stats.fields[i]; // correct_ngram_hit_counts[i];
// smooth bleu
- if (!cor_count) { cor_count = 0.01; }
+ if (!cor_count) { cor_count = alpha; }
float lprec = log(cor_count) - log(stats.fields[i+N]); // log(hyp_ngram_counts[i]);
if (out) out->push_back(exp(lprec));
log_bleu += lprec;
+ if (BrevityType == QCRI)
+ log_bleu_adj += log(alpha) - log(stats.fields[i+N] + alpha);
++count;
}
}
log_bleu /= count;
+ log_bleu_adj /= count;
float lbp = 0.0;
const float& hyp_len = stats.fields[2*N];
const float& ref_len = stats.fields[2*N + 1];
- if (hyp_len < ref_len)
- lbp = (hyp_len - ref_len) / hyp_len;
+ if (hyp_len < ref_len) {
+ if (BrevityType == QCRI)
+ lbp = (hyp_len - ref_len - alpha) / hyp_len;
+ else
+ lbp = (hyp_len - ref_len) / hyp_len;
+ }
log_bleu += lbp;
if (bp) *bp = exp(lbp);
+ if (BrevityType == QCRI)
+ return exp(log_bleu) - exp(lbp + log_bleu_adj);
return exp(log_bleu);
}
string DetailedScore(const SufficientStats& stats) const {
@@ -249,10 +264,23 @@ EvaluationMetric* EvaluationMetric::Instance(const string& imetric_id) {
m = new BleuMetric<4, NIST>;
} else if (metric_id == "KOEHN_BLEU") {
m = new BleuMetric<4, Koehn>;
+ } else if (metric_id == "QCRI_BLEU") {
+ m = new BleuMetric<4, QCRI>;
+ } else if (metric_id == "SSK") {
+ m = new SSKMetric;
} else if (metric_id == "TER") {
m = new TERMetric;
} else if (metric_id == "METEOR") {
- m = new ExternalMetric("METEOR", "java -Xmx1536m -jar /cab0/tools/meteor-1.3/meteor-1.3.jar - - -mira -lower -t tune -l en");
+#if HAVE_METEOR
+ if (!FileExists(meteor_jar_path)) {
+ cerr << meteor_jar_path << " not found!\n";
+ abort();
+ }
+ m = new ExternalMetric("METEOR", string("java -Xmx1536m -jar ") + meteor_jar_path + " - - -mira -lower -t tune -l en");
+#else
+ cerr << "cdec was not built with the --with-meteor option." << endl;
+ abort();
+#endif
} else if (metric_id.find("COMB:") == 0) {
m = new CombinationMetric(metric_id);
} else if (metric_id == "CER") {
diff --git a/mteval/ns_docscorer.cc b/mteval/ns_docscorer.cc
index 28a2fd09..83bd1a29 100644
--- a/mteval/ns_docscorer.cc
+++ b/mteval/ns_docscorer.cc
@@ -11,25 +11,29 @@ using namespace std;
DocumentScorer::~DocumentScorer() {}
+DocumentScorer::DocumentScorer() {}
+
void DocumentScorer::Init(const EvaluationMetric* metric,
const vector<string>& ref_files,
const string& src_file,
bool verbose) {
scorers_.clear();
- cerr << "Loading references (" << ref_files.size() << " files)\n";
+ static const WordID kDIV = TD::Convert("|||");
+ if (verbose) cerr << "Loading references (" << ref_files.size() << " files)\n";
assert(src_file.empty());
std::vector<ReadFile> ifs(ref_files.begin(),ref_files.end());
for (int i=0; i < ref_files.size(); ++i) ifs[i].Init(ref_files[i]);
char buf[64000];
bool expect_eof = false;
int line=0;
+ vector<WordID> tmp;
+ vector<vector<WordID> > refs;
while (ifs[0].get()) {
- vector<vector<WordID> > refs(ref_files.size());
+ refs.clear();
for (int i=0; i < ref_files.size(); ++i) {
istream &in=ifs[i].get();
if (in.eof()) break;
in.getline(buf, 64000);
- refs[i].clear();
if (strlen(buf) == 0) {
if (in.eof()) {
if (!expect_eof) {
@@ -38,9 +42,17 @@ void DocumentScorer::Init(const EvaluationMetric* metric,
}
break;
}
- } else {
- TD::ConvertSentence(buf, &refs[i]);
- assert(!refs[i].empty());
+ } else { // read a line from a reference file
+ tmp.clear();
+ TD::ConvertSentence(buf, &tmp);
+ unsigned last = 0;
+ for (unsigned j = 0; j < tmp.size(); ++j) {
+ if (tmp[j] == kDIV) {
+ refs.push_back(vector<WordID>(tmp.begin() + last, tmp.begin() + j));
+ last = j + 1;
+ }
+ }
+ refs.push_back(vector<WordID>(tmp.begin() + last, tmp.end()));
}
assert(!expect_eof);
}
@@ -55,6 +67,6 @@ void DocumentScorer::Init(const EvaluationMetric* metric,
++line;
}
}
- cerr << "Loaded reference translations for " << scorers_.size() << " sentences.\n";
+ if (verbose) cerr << "Loaded reference translations for " << scorers_.size() << " sentences.\n";
}
diff --git a/mteval/ns_docscorer.h b/mteval/ns_docscorer.h
index 170ac627..b3c28fc9 100644
--- a/mteval/ns_docscorer.h
+++ b/mteval/ns_docscorer.h
@@ -5,26 +5,28 @@
#include <string>
#include <boost/shared_ptr.hpp>
-struct EvaluationMetric;
+class EvaluationMetric;
struct SegmentEvaluator;
class DocumentScorer {
public:
~DocumentScorer();
- DocumentScorer() { }
+ DocumentScorer();
DocumentScorer(const EvaluationMetric* metric,
const std::vector<std::string>& ref_files,
const std::string& src_file = "",
bool verbose=false) {
Init(metric,ref_files,src_file,verbose);
}
+ DocumentScorer(const EvaluationMetric* metric,
+ const std::string& src_ref_composite_file);
+ int size() const { return scorers_.size(); }
+ const SegmentEvaluator* operator[](size_t i) const { return scorers_[i].get(); }
+ private:
void Init(const EvaluationMetric* metric,
const std::vector<std::string>& ref_files,
const std::string& src_file = "",
bool verbose=false);
- int size() const { return scorers_.size(); }
- const SegmentEvaluator* operator[](size_t i) const { return scorers_[i].get(); }
- private:
std::vector<boost::shared_ptr<SegmentEvaluator> > scorers_;
};
diff --git a/mteval/ns_ssk.cc b/mteval/ns_ssk.cc
new file mode 100644
index 00000000..c94e62ca
--- /dev/null
+++ b/mteval/ns_ssk.cc
@@ -0,0 +1,32 @@
+#include "ns_ssk.h"
+
+#include <vector>
+
+#include "kernel_string_subseq.h"
+#include "tdict.h"
+
+static const unsigned kNUMFIELDS = 2;
+static const unsigned kSIMILARITY = 0;
+static const unsigned kCOUNT = 1;
+
+unsigned SSKMetric::SufficientStatisticsVectorSize() const {
+ return kNUMFIELDS;
+}
+
+void SSKMetric::ComputeSufficientStatistics(const std::vector<WordID>& hyp,
+ const std::vector<std::vector<WordID> >& refs,
+ SufficientStats* out) const {
+ out->fields.resize(kNUMFIELDS);
+ out->fields[kCOUNT] = 1;
+ float bestsim = 0;
+ for (unsigned i = 0; i < refs.size(); ++i) {
+ float s = ssk<4>(hyp, refs[i], 0.8);
+ if (s > bestsim) bestsim = s;
+ }
+ out->fields[kSIMILARITY] = bestsim;
+}
+
+float SSKMetric::ComputeScore(const SufficientStats& stats) const {
+ return stats.fields[kSIMILARITY] / stats.fields[kCOUNT];
+}
+
diff --git a/mteval/ns_ssk.h b/mteval/ns_ssk.h
new file mode 100644
index 00000000..0d418770
--- /dev/null
+++ b/mteval/ns_ssk.h
@@ -0,0 +1,22 @@
+#ifndef _NS_SSK_H_
+#define _NS_SSK_H_
+
+#include "ns.h"
+
+class SSKMetric : public EvaluationMetric {
+ friend class EvaluationMetric;
+ private:
+ unsigned EditDistance(const std::string& hyp,
+ const std::string& ref) const;
+ protected:
+ SSKMetric() : EvaluationMetric("SSK") {}
+
+ public:
+ virtual unsigned SufficientStatisticsVectorSize() const;
+ virtual void ComputeSufficientStatistics(const std::vector<WordID>& hyp,
+ const std::vector<std::vector<WordID> >& refs,
+ SufficientStats* out) const;
+ virtual float ComputeScore(const SufficientStats& stats) const;
+};
+
+#endif
diff --git a/mteval/scorer_test.cc b/mteval/scorer_test.cc
index 9b765d0f..da07f154 100644
--- a/mteval/scorer_test.cc
+++ b/mteval/scorer_test.cc
@@ -36,7 +36,7 @@ struct Stuff {
BOOST_FIXTURE_TEST_SUITE( s, Stuff );
BOOST_AUTO_TEST_CASE(TestCreateFromFiles) {
- std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : "test_data");
+ std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA);
vector<string> files;
files.push_back(path + "/re.txt.0");
files.push_back(path + "/re.txt.1");