diff options
Diffstat (limited to 'mteval')
-rw-r--r-- | mteval/Jamfile | 8 | ||||
-rw-r--r-- | mteval/Makefile.am | 37 | ||||
-rw-r--r-- | mteval/mbr_kbest.cc | 38 | ||||
-rw-r--r-- | mteval/meteor_jar.cc.in | 3 | ||||
-rw-r--r-- | mteval/ns.cc | 42 | ||||
-rw-r--r-- | mteval/ns_docscorer.cc | 26 | ||||
-rw-r--r-- | mteval/ns_docscorer.h | 12 | ||||
-rw-r--r-- | mteval/ns_ssk.cc | 32 | ||||
-rw-r--r-- | mteval/ns_ssk.h | 22 | ||||
-rw-r--r-- | mteval/scorer_test.cc | 2 |
10 files changed, 179 insertions, 43 deletions
diff --git a/mteval/Jamfile b/mteval/Jamfile deleted file mode 100644 index 3ed2c2cc..00000000 --- a/mteval/Jamfile +++ /dev/null @@ -1,8 +0,0 @@ -import testing ; - -lib mteval : ter.cc comb_scorer.cc aer_scorer.cc scorer.cc external_scorer.cc ns.cc ns_ter.cc ns_ext.cc ns_comb.cc ns_docscorer.cc ns_cer.cc ..//utils : <include>. : : <include>. <library>..//z ; -exe fast_score : fast_score.cc mteval ..//utils ..//boost_program_options ; -exe mbr_kbest : mbr_kbest.cc mteval ..//utils ..//boost_program_options ; -alias programs : fast_score mbr_kbest ; - -unit-test scorer_test : scorer_test.cc mteval ..//utils ..//z ..//boost_unit_test_framework : <testing.arg>$(TOP)/mteval/test_data ; diff --git a/mteval/Makefile.am b/mteval/Makefile.am index 22550c99..681e798e 100644 --- a/mteval/Makefile.am +++ b/mteval/Makefile.am @@ -8,15 +8,42 @@ TESTS = scorer_test noinst_LIBRARIES = libmteval.a -libmteval_a_SOURCES = ter.cc comb_scorer.cc aer_scorer.cc scorer.cc external_scorer.cc ns.cc ns_ter.cc ns_ext.cc ns_comb.cc ns_docscorer.cc ns_cer.cc +EXTRA_DIST = test_data + +libmteval_a_SOURCES = \ + aer_scorer.h \ + comb_scorer.h \ + external_scorer.h \ + ns.h \ + ns_cer.h \ + ns_comb.h \ + ns_docscorer.h \ + ns_ext.h \ + ns_ssk.h \ + ns_ter.h \ + scorer.h \ + ter.h \ + aer_scorer.cc \ + comb_scorer.cc \ + external_scorer.cc \ + meteor_jar.cc \ + ns.cc \ + ns_cer.cc \ + ns_comb.cc \ + ns_docscorer.cc \ + ns_ext.cc \ + ns_ssk.cc \ + ns_ter.cc \ + scorer.cc \ + ter.cc fast_score_SOURCES = fast_score.cc -fast_score_LDADD = libmteval.a $(top_srcdir)/utils/libutils.a -lz +fast_score_LDADD = libmteval.a ../utils/libutils.a mbr_kbest_SOURCES = mbr_kbest.cc -mbr_kbest_LDADD = libmteval.a $(top_srcdir)/utils/libutils.a -lz +mbr_kbest_LDADD = libmteval.a ../utils/libutils.a scorer_test_SOURCES = scorer_test.cc -scorer_test_LDADD = libmteval.a $(top_srcdir)/utils/libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz +scorer_test_LDADD = libmteval.a ../utils/libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils +AM_CPPFLAGS = -DTEST_DATA=\"$(top_srcdir)/mteval/test_data\" -DBOOST_TEST_DYN_LINK -W -Wall -Wno-sign-compare -I$(top_srcdir) -I$(top_srcdir)/utils diff --git a/mteval/mbr_kbest.cc b/mteval/mbr_kbest.cc index 2bd31566..2519bc01 100644 --- a/mteval/mbr_kbest.cc +++ b/mteval/mbr_kbest.cc @@ -1,7 +1,9 @@ #include <iostream> #include <vector> +#include <tr1/unordered_map> #include <boost/program_options.hpp> +#include <boost/functional/hash.hpp> #include "prob.h" #include "tdict.h" @@ -10,6 +12,7 @@ #include "stringlib.h" using namespace std; +using namespace std::tr1; namespace po = boost::program_options; @@ -31,27 +34,33 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) { } } +struct ScoreComparer { + bool operator()(const pair<vector<WordID>, prob_t>& a, const pair<vector<WordID>, prob_t>& b) const { + return a.second > b.second; + } +}; + struct LossComparer { bool operator()(const pair<vector<WordID>, prob_t>& a, const pair<vector<WordID>, prob_t>& b) const { return a.second < b.second; } }; -bool ReadKBestList(istream* in, string* sent_id, vector<pair<vector<WordID>, prob_t> >* list) { +bool ReadKBestList(const double mbr_scale, istream* in, string* sent_id, vector<pair<vector<WordID>, prob_t> >* list) { static string cache_id; static pair<vector<WordID>, prob_t> cache_pair; list->clear(); string cur_id; + unordered_map<vector<WordID>, unsigned, boost::hash<vector<WordID> > > sent2id; if (cache_pair.first.size() > 0) { list->push_back(cache_pair); + sent2id[cache_pair.first] = 0; cur_id = cache_id; cache_pair.first.clear(); } string line; string tstr; - while(*in) { - getline(*in, line); - if (line.empty()) continue; + while(getline(*in, line)) { size_t p1 = line.find(" ||| "); if (p1 == string::npos) { cerr << "Bad format: " << line << endl; abort(); } size_t p2 = line.find(" ||| ", p1 + 4); @@ -59,16 +68,25 @@ bool ReadKBestList(istream* in, string* sent_id, vector<pair<vector<WordID>, pro size_t p3 = line.rfind(" ||| "); cache_id = line.substr(0, p1); tstr = line.substr(p1 + 5, p2 - p1 - 5); - double val = strtod(line.substr(p3 + 5).c_str(), NULL); + double val = strtod(line.substr(p3 + 5).c_str(), NULL) * mbr_scale; TD::ConvertSentence(tstr, &cache_pair.first); cache_pair.second.logeq(val); if (cur_id.empty()) cur_id = cache_id; if (cur_id == cache_id) { - list->push_back(cache_pair); + unordered_map<vector<WordID>, unsigned, boost::hash<vector<WordID> > >::iterator it = + sent2id.find(cache_pair.first); + if (it == sent2id.end()) { + sent2id.insert(make_pair(cache_pair.first, unsigned(list->size()))); + list->push_back(cache_pair); + } else { + (*list)[it->second].second += cache_pair.second; + // cerr << "Cruch: " << line << "\n newp=" << (*list)[it->second].second << endl; + } *sent_id = cur_id; cache_pair.first.clear(); } else { break; } } + sort(list->begin(), list->end(), ScoreComparer()); return !list->empty(); } @@ -87,14 +105,14 @@ int main(int argc, char** argv) { vector<pair<vector<WordID>, prob_t> > list; ReadFile rf(file); string sent_id; - while(ReadKBestList(rf.stream(), &sent_id, &list)) { + while(ReadKBestList(mbr_scale, rf.stream(), &sent_id, &list)) { vector<prob_t> joints(list.size()); - const prob_t max_score = pow(list.front().second, mbr_scale); + const prob_t max_score = list.front().second; prob_t marginal = prob_t::Zero(); for (int i = 0 ; i < list.size(); ++i) { - const prob_t joint = pow(list[i].second, mbr_scale) / max_score; + const prob_t joint = list[i].second / max_score; joints[i] = joint; - // cerr << "list[" << i << "] joint=" << log(joint) << endl; + //cerr << "list[" << i << "] joint=" << log(joint) << endl; marginal += joint; } int mbr_idx = -1; diff --git a/mteval/meteor_jar.cc.in b/mteval/meteor_jar.cc.in new file mode 100644 index 00000000..fe45a72a --- /dev/null +++ b/mteval/meteor_jar.cc.in @@ -0,0 +1,3 @@ + +const char* meteor_jar_path = "@METEOR_JAR@"; + diff --git a/mteval/ns.cc b/mteval/ns.cc index 3af7cc63..b64d4798 100644 --- a/mteval/ns.cc +++ b/mteval/ns.cc @@ -3,6 +3,7 @@ #include "ns_ext.h" #include "ns_comb.h" #include "ns_cer.h" +#include "ns_ssk.h" #include <cstdio> #include <cassert> @@ -12,12 +13,15 @@ #include <sstream> #include "tdict.h" +#include "filelib.h" #include "stringlib.h" using namespace std; map<string, EvaluationMetric*> EvaluationMetric::instances_; +extern const char* meteor_jar_path; + SegmentEvaluator::~SegmentEvaluator() {} EvaluationMetric::~EvaluationMetric() {} @@ -57,7 +61,7 @@ string EvaluationMetric::DetailedScore(const SufficientStats& stats) const { return os.str(); } -enum BleuType { IBM, Koehn, NIST }; +enum BleuType { IBM, Koehn, NIST, QCRI }; template <unsigned int N = 4u, BleuType BrevityType = IBM> struct BleuSegmentEvaluator : public SegmentEvaluator { BleuSegmentEvaluator(const vector<vector<WordID> >& refs, const EvaluationMetric* em) : evaluation_metric(em) { @@ -87,7 +91,7 @@ struct BleuSegmentEvaluator : public SegmentEvaluator { float& ref_len = out->fields[2*N + 1]; hyp_len = hyp.size(); ref_len = lengths_[0]; - if (lengths_.size() > 1 && BrevityType == IBM) { + if (lengths_.size() > 1 && (BrevityType == IBM || BrevityType == QCRI)) { float bestd = 2000000; float hl = hyp.size(); float bl = -1; @@ -182,7 +186,7 @@ struct BleuSegmentEvaluator : public SegmentEvaluator { template <unsigned int N = 4u, BleuType BrevityType = IBM> struct BleuMetric : public EvaluationMetric { - BleuMetric() : EvaluationMetric(BrevityType == IBM ? "IBM_BLEU" : (BrevityType == Koehn ? "KOEHN_BLEU" : "NIST_BLEU")) {} + BleuMetric() : EvaluationMetric(BrevityType == IBM ? "IBM_BLEU" : (BrevityType == Koehn ? "KOEHN_BLEU" : (BrevityType == NIST ? "NIST_BLEU" : "QCRI_BLEU"))) {} unsigned SufficientStatisticsVectorSize() const { return N*2 + 2; } boost::shared_ptr<SegmentEvaluator> CreateSegmentEvaluator(const vector<vector<WordID> >& refs) const { return boost::shared_ptr<SegmentEvaluator>(new BleuSegmentEvaluator<N,BrevityType>(refs, this)); @@ -190,26 +194,37 @@ struct BleuMetric : public EvaluationMetric { float ComputeBreakdown(const SufficientStats& stats, float* bp, vector<float>* out) const { if (out) { out->clear(); } float log_bleu = 0; + float log_bleu_adj = 0; // for QCRI int count = 0; + float alpha = BrevityType == QCRI ? 1 : 0.01; for (int i = 0; i < N; ++i) { if (stats.fields[i+N] > 0) { float cor_count = stats.fields[i]; // correct_ngram_hit_counts[i]; // smooth bleu - if (!cor_count) { cor_count = 0.01; } + if (!cor_count) { cor_count = alpha; } float lprec = log(cor_count) - log(stats.fields[i+N]); // log(hyp_ngram_counts[i]); if (out) out->push_back(exp(lprec)); log_bleu += lprec; + if (BrevityType == QCRI) + log_bleu_adj += log(alpha) - log(stats.fields[i+N] + alpha); ++count; } } log_bleu /= count; + log_bleu_adj /= count; float lbp = 0.0; const float& hyp_len = stats.fields[2*N]; const float& ref_len = stats.fields[2*N + 1]; - if (hyp_len < ref_len) - lbp = (hyp_len - ref_len) / hyp_len; + if (hyp_len < ref_len) { + if (BrevityType == QCRI) + lbp = (hyp_len - ref_len - alpha) / hyp_len; + else + lbp = (hyp_len - ref_len) / hyp_len; + } log_bleu += lbp; if (bp) *bp = exp(lbp); + if (BrevityType == QCRI) + return exp(log_bleu) - exp(lbp + log_bleu_adj); return exp(log_bleu); } string DetailedScore(const SufficientStats& stats) const { @@ -249,10 +264,23 @@ EvaluationMetric* EvaluationMetric::Instance(const string& imetric_id) { m = new BleuMetric<4, NIST>; } else if (metric_id == "KOEHN_BLEU") { m = new BleuMetric<4, Koehn>; + } else if (metric_id == "QCRI_BLEU") { + m = new BleuMetric<4, QCRI>; + } else if (metric_id == "SSK") { + m = new SSKMetric; } else if (metric_id == "TER") { m = new TERMetric; } else if (metric_id == "METEOR") { - m = new ExternalMetric("METEOR", "java -Xmx1536m -jar /cab0/tools/meteor-1.3/meteor-1.3.jar - - -mira -lower -t tune -l en"); +#if HAVE_METEOR + if (!FileExists(meteor_jar_path)) { + cerr << meteor_jar_path << " not found!\n"; + abort(); + } + m = new ExternalMetric("METEOR", string("java -Xmx1536m -jar ") + meteor_jar_path + " - - -mira -lower -t tune -l en"); +#else + cerr << "cdec was not built with the --with-meteor option." << endl; + abort(); +#endif } else if (metric_id.find("COMB:") == 0) { m = new CombinationMetric(metric_id); } else if (metric_id == "CER") { diff --git a/mteval/ns_docscorer.cc b/mteval/ns_docscorer.cc index 28a2fd09..83bd1a29 100644 --- a/mteval/ns_docscorer.cc +++ b/mteval/ns_docscorer.cc @@ -11,25 +11,29 @@ using namespace std; DocumentScorer::~DocumentScorer() {} +DocumentScorer::DocumentScorer() {} + void DocumentScorer::Init(const EvaluationMetric* metric, const vector<string>& ref_files, const string& src_file, bool verbose) { scorers_.clear(); - cerr << "Loading references (" << ref_files.size() << " files)\n"; + static const WordID kDIV = TD::Convert("|||"); + if (verbose) cerr << "Loading references (" << ref_files.size() << " files)\n"; assert(src_file.empty()); std::vector<ReadFile> ifs(ref_files.begin(),ref_files.end()); for (int i=0; i < ref_files.size(); ++i) ifs[i].Init(ref_files[i]); char buf[64000]; bool expect_eof = false; int line=0; + vector<WordID> tmp; + vector<vector<WordID> > refs; while (ifs[0].get()) { - vector<vector<WordID> > refs(ref_files.size()); + refs.clear(); for (int i=0; i < ref_files.size(); ++i) { istream &in=ifs[i].get(); if (in.eof()) break; in.getline(buf, 64000); - refs[i].clear(); if (strlen(buf) == 0) { if (in.eof()) { if (!expect_eof) { @@ -38,9 +42,17 @@ void DocumentScorer::Init(const EvaluationMetric* metric, } break; } - } else { - TD::ConvertSentence(buf, &refs[i]); - assert(!refs[i].empty()); + } else { // read a line from a reference file + tmp.clear(); + TD::ConvertSentence(buf, &tmp); + unsigned last = 0; + for (unsigned j = 0; j < tmp.size(); ++j) { + if (tmp[j] == kDIV) { + refs.push_back(vector<WordID>(tmp.begin() + last, tmp.begin() + j)); + last = j + 1; + } + } + refs.push_back(vector<WordID>(tmp.begin() + last, tmp.end())); } assert(!expect_eof); } @@ -55,6 +67,6 @@ void DocumentScorer::Init(const EvaluationMetric* metric, ++line; } } - cerr << "Loaded reference translations for " << scorers_.size() << " sentences.\n"; + if (verbose) cerr << "Loaded reference translations for " << scorers_.size() << " sentences.\n"; } diff --git a/mteval/ns_docscorer.h b/mteval/ns_docscorer.h index 170ac627..b3c28fc9 100644 --- a/mteval/ns_docscorer.h +++ b/mteval/ns_docscorer.h @@ -5,26 +5,28 @@ #include <string> #include <boost/shared_ptr.hpp> -struct EvaluationMetric; +class EvaluationMetric; struct SegmentEvaluator; class DocumentScorer { public: ~DocumentScorer(); - DocumentScorer() { } + DocumentScorer(); DocumentScorer(const EvaluationMetric* metric, const std::vector<std::string>& ref_files, const std::string& src_file = "", bool verbose=false) { Init(metric,ref_files,src_file,verbose); } + DocumentScorer(const EvaluationMetric* metric, + const std::string& src_ref_composite_file); + int size() const { return scorers_.size(); } + const SegmentEvaluator* operator[](size_t i) const { return scorers_[i].get(); } + private: void Init(const EvaluationMetric* metric, const std::vector<std::string>& ref_files, const std::string& src_file = "", bool verbose=false); - int size() const { return scorers_.size(); } - const SegmentEvaluator* operator[](size_t i) const { return scorers_[i].get(); } - private: std::vector<boost::shared_ptr<SegmentEvaluator> > scorers_; }; diff --git a/mteval/ns_ssk.cc b/mteval/ns_ssk.cc new file mode 100644 index 00000000..c94e62ca --- /dev/null +++ b/mteval/ns_ssk.cc @@ -0,0 +1,32 @@ +#include "ns_ssk.h" + +#include <vector> + +#include "kernel_string_subseq.h" +#include "tdict.h" + +static const unsigned kNUMFIELDS = 2; +static const unsigned kSIMILARITY = 0; +static const unsigned kCOUNT = 1; + +unsigned SSKMetric::SufficientStatisticsVectorSize() const { + return kNUMFIELDS; +} + +void SSKMetric::ComputeSufficientStatistics(const std::vector<WordID>& hyp, + const std::vector<std::vector<WordID> >& refs, + SufficientStats* out) const { + out->fields.resize(kNUMFIELDS); + out->fields[kCOUNT] = 1; + float bestsim = 0; + for (unsigned i = 0; i < refs.size(); ++i) { + float s = ssk<4>(hyp, refs[i], 0.8); + if (s > bestsim) bestsim = s; + } + out->fields[kSIMILARITY] = bestsim; +} + +float SSKMetric::ComputeScore(const SufficientStats& stats) const { + return stats.fields[kSIMILARITY] / stats.fields[kCOUNT]; +} + diff --git a/mteval/ns_ssk.h b/mteval/ns_ssk.h new file mode 100644 index 00000000..0d418770 --- /dev/null +++ b/mteval/ns_ssk.h @@ -0,0 +1,22 @@ +#ifndef _NS_SSK_H_ +#define _NS_SSK_H_ + +#include "ns.h" + +class SSKMetric : public EvaluationMetric { + friend class EvaluationMetric; + private: + unsigned EditDistance(const std::string& hyp, + const std::string& ref) const; + protected: + SSKMetric() : EvaluationMetric("SSK") {} + + public: + virtual unsigned SufficientStatisticsVectorSize() const; + virtual void ComputeSufficientStatistics(const std::vector<WordID>& hyp, + const std::vector<std::vector<WordID> >& refs, + SufficientStats* out) const; + virtual float ComputeScore(const SufficientStats& stats) const; +}; + +#endif diff --git a/mteval/scorer_test.cc b/mteval/scorer_test.cc index 9b765d0f..da07f154 100644 --- a/mteval/scorer_test.cc +++ b/mteval/scorer_test.cc @@ -36,7 +36,7 @@ struct Stuff { BOOST_FIXTURE_TEST_SUITE( s, Stuff ); BOOST_AUTO_TEST_CASE(TestCreateFromFiles) { - std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : "test_data"); + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); vector<string> files; files.push_back(path + "/re.txt.0"); files.push_back(path + "/re.txt.1"); |