From 80686d4e567bae579ea39e009826a2de92cd4ace Mon Sep 17 00:00:00 2001 From: redpony Date: Wed, 11 Aug 2010 02:37:10 +0000 Subject: major refactor, break bad circular deps git-svn-id: https://ws10smt.googlecode.com/svn/trunk@509 ec762483-ff6d-05da-a07a-a48fb63a330f --- mteval/Makefile.am | 23 ++ mteval/aer_scorer.cc | 135 ++++++++++ mteval/aer_scorer.h | 23 ++ mteval/comb_scorer.cc | 97 +++++++ mteval/comb_scorer.h | 17 ++ mteval/fast_score.cc | 72 ++++++ mteval/mbr_kbest.cc | 138 ++++++++++ mteval/scorer.cc | 630 ++++++++++++++++++++++++++++++++++++++++++++++ mteval/scorer.h | 110 ++++++++ mteval/scorer_test.cc | 182 ++++++++++++++ mteval/ter.cc | 535 +++++++++++++++++++++++++++++++++++++++ mteval/ter.h | 19 ++ mteval/test_data/re.txt.0 | 5 + mteval/test_data/re.txt.1 | 5 + mteval/test_data/re.txt.2 | 5 + mteval/test_data/re.txt.3 | 5 + 16 files changed, 2001 insertions(+) create mode 100644 mteval/Makefile.am create mode 100644 mteval/aer_scorer.cc create mode 100644 mteval/aer_scorer.h create mode 100644 mteval/comb_scorer.cc create mode 100644 mteval/comb_scorer.h create mode 100644 mteval/fast_score.cc create mode 100644 mteval/mbr_kbest.cc create mode 100644 mteval/scorer.cc create mode 100644 mteval/scorer.h create mode 100644 mteval/scorer_test.cc create mode 100644 mteval/ter.cc create mode 100644 mteval/ter.h create mode 100644 mteval/test_data/re.txt.0 create mode 100644 mteval/test_data/re.txt.1 create mode 100644 mteval/test_data/re.txt.2 create mode 100644 mteval/test_data/re.txt.3 (limited to 'mteval') diff --git a/mteval/Makefile.am b/mteval/Makefile.am new file mode 100644 index 00000000..7ae14045 --- /dev/null +++ b/mteval/Makefile.am @@ -0,0 +1,23 @@ +bin_PROGRAMS = \ + fast_score \ + mbr_kbest + +if HAVE_GTEST +noinst_PROGRAMS = \ + scorer_test +endif + +noinst_LIBRARIES = libmteval.a + +libmteval_a_SOURCES = ter.cc comb_scorer.cc aer_scorer.cc scorer.cc + +fast_score_SOURCES = fast_score.cc +fast_score_LDADD = $(top_srcdir)/utils/libutils.a libmteval.a -lz + +mbr_kbest_SOURCES = mbr_kbest.cc +mbr_kbest_LDADD = $(top_srcdir)/utils/libutils.a libmteval.a -lz + +scorer_test_SOURCES = scorer_test.cc +scorer_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) $(top_srcdir)/utils/libutils.a libmteval.a -lz + +AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils diff --git a/mteval/aer_scorer.cc b/mteval/aer_scorer.cc new file mode 100644 index 00000000..edd4390f --- /dev/null +++ b/mteval/aer_scorer.cc @@ -0,0 +1,135 @@ +#include "aer_scorer.h" + +#include +#include +#include + +#include "tdict.h" +#include "alignment_pharaoh.h" + +using namespace std; + +class AERScore : public ScoreBase { + friend class AERScorer; + public: + AERScore() : num_matches(), num_predicted(), num_in_ref() {} + AERScore(int m, int p, int r) : + num_matches(m), num_predicted(p), num_in_ref(r) {} + virtual void PlusPartialEquals(const Score& rhs, int oracle_e_cover, int oracle_f_cover, int src_len){} + virtual void PlusEquals(const Score& delta, const float scale) { + const AERScore& other = static_cast(delta); + num_matches += scale*other.num_matches; + num_predicted += scale*other.num_predicted; + num_in_ref += scale*other.num_in_ref; + } + virtual void PlusEquals(const Score& delta) { + const AERScore& other = static_cast(delta); + num_matches += other.num_matches; + num_predicted += other.num_predicted; + num_in_ref += other.num_in_ref; + } + + + virtual ScoreP GetZero() const { + return ScoreP(new AERScore); + } + virtual ScoreP GetOne() const { + return ScoreP(new AERScore); + } + virtual void Subtract(const Score& rhs, Score* out) const { + AERScore* res = static_cast(out); + const AERScore& other = static_cast(rhs); + res->num_matches = num_matches - other.num_matches; + res->num_predicted = num_predicted - other.num_predicted; + res->num_in_ref = num_in_ref - other.num_in_ref; + } + float Precision() const { + return static_cast(num_matches) / num_predicted; + } + float Recall() const { + return static_cast(num_matches) / num_in_ref; + } + float ComputePartialScore() const { return 0.0;} + virtual float ComputeScore() const { + const float prec = Precision(); + const float rec = Recall(); + const float f = (2.0 * prec * rec) / (rec + prec); + if (isnan(f)) return 1.0f; + return 1.0f - f; + } + virtual bool IsAdditiveIdentity() const { + return (num_matches == 0) && (num_predicted == 0) && (num_in_ref == 0); + } + virtual void ScoreDetails(std::string* out) const { + ostringstream os; + os << "AER=" << (ComputeScore() * 100.0) + << " F=" << (100 - ComputeScore() * 100.0) + << " P=" << (Precision() * 100.0) << " R=" << (Recall() * 100.0) + << " [" << num_matches << " " << num_predicted << " " << num_in_ref << "]"; + *out = os.str(); + } + virtual void Encode(std::string*out) const { + out->resize(sizeof(int) * 3); + *(int *)&(*out)[sizeof(int) * 0] = num_matches; + *(int *)&(*out)[sizeof(int) * 1] = num_predicted; + *(int *)&(*out)[sizeof(int) * 2] = num_in_ref; + } + private: + int num_matches; + int num_predicted; + int num_in_ref; +}; + +AERScorer::AERScorer(const vector >& refs, const string& src) : src_(src) { + if (refs.size() != 1) { + cerr << "AERScorer can only take a single reference!\n"; + abort(); + } + ref_ = AlignmentPharaoh::ReadPharaohAlignmentGrid(TD::GetString(refs.front())); +} + +static inline bool Safe(const Array2D& a, int i, int j) { + if (i >= 0 && j >= 0 && i < a.width() && j < a.height()) + return a(i,j); + else + return false; +} + +ScoreP AERScorer::ScoreCCandidate(const vector& shyp) const { + return ScoreP(); +} + +ScoreP AERScorer::ScoreCandidate(const vector& shyp) const { + boost::shared_ptr > hyp = + AlignmentPharaoh::ReadPharaohAlignmentGrid(TD::GetString(shyp)); + + int m = 0; + int r = 0; + int p = 0; + int i_len = ref_->width(); + int j_len = ref_->height(); + for (int i = 0; i < i_len; ++i) { + for (int j = 0; j < j_len; ++j) { + if ((*ref_)(i,j)) { + ++r; + if (Safe(*hyp, i, j)) ++m; + } + } + } + for (int i = 0; i < hyp->width(); ++i) + for (int j = 0; j < hyp->height(); ++j) + if ((*hyp)(i,j)) ++p; + + return ScoreP(new AERScore(m,p,r)); +} + +ScoreP AERScorer::ScoreFromString(const string& in) { + AERScore* res = new AERScore; + res->num_matches = *(const int *)&in[sizeof(int) * 0]; + res->num_predicted = *(const int *)&in[sizeof(int) * 1]; + res->num_in_ref = *(const int *)&in[sizeof(int) * 2]; + return ScoreP(res); +} + +const std::string* AERScorer::GetSource() const { return &src_; } + diff --git a/mteval/aer_scorer.h b/mteval/aer_scorer.h new file mode 100644 index 00000000..6d53d359 --- /dev/null +++ b/mteval/aer_scorer.h @@ -0,0 +1,23 @@ +#ifndef _AER_SCORER_ +#define _AER_SCORER_ + +#include + +#include "scorer.h" +#include "array2d.h" + +class AERScorer : public SentenceScorer { + public: + // when constructing alignment strings from a hypergraph, the source + // is necessary. + AERScorer(const std::vector >& refs, const std::string& src = ""); + ScoreP ScoreCandidate(const std::vector& hyp) const; + ScoreP ScoreCCandidate(const std::vector& hyp) const; + static ScoreP ScoreFromString(const std::string& in); + const std::string* GetSource() const; + private: + std::string src_; + boost::shared_ptr > ref_; +}; + +#endif diff --git a/mteval/comb_scorer.cc b/mteval/comb_scorer.cc new file mode 100644 index 00000000..9fc37868 --- /dev/null +++ b/mteval/comb_scorer.cc @@ -0,0 +1,97 @@ +#include "comb_scorer.h" + +#include + +using namespace std; + +class BLEUTERCombinationScore : public ScoreBase { + friend class BLEUTERCombinationScorer; + public: + ~BLEUTERCombinationScore(); + float ComputePartialScore() const { return 0.0;} + float ComputeScore() const { + return (bleu->ComputeScore() - ter->ComputeScore()) / 2.0f; + } + void ScoreDetails(string* details) const { + char buf[160]; + sprintf(buf, "Combi = %.2f, BLEU = %.2f, TER = %.2f", + ComputeScore()*100.0f, bleu->ComputeScore()*100.0f, ter->ComputeScore()*100.0f); + *details = buf; + } + void PlusPartialEquals(const Score& rhs, int oracle_e_cover, int oracle_f_cover, int src_len){} + + void PlusEquals(const Score& delta, const float scale) { + bleu->PlusEquals(*static_cast(delta).bleu, scale); + ter->PlusEquals(*static_cast(delta).ter, scale); + } + void PlusEquals(const Score& delta) { + bleu->PlusEquals(*static_cast(delta).bleu); + ter->PlusEquals(*static_cast(delta).ter); + } + + + + ScoreP GetOne() const { + BLEUTERCombinationScore* res = new BLEUTERCombinationScore; + res->bleu = bleu->GetOne(); + res->ter = ter->GetOne(); + return ScoreP(res); + } + ScoreP GetZero() const { + BLEUTERCombinationScore* res = new BLEUTERCombinationScore; + res->bleu = bleu->GetZero(); + res->ter = ter->GetZero(); + return ScoreP(res); + } + void Subtract(const Score& rhs, Score* res) const { + bleu->Subtract(*static_cast(rhs).bleu, + static_cast(res)->bleu.get()); + ter->Subtract(*static_cast(rhs).ter, + static_cast(res)->ter.get()); + } + void Encode(std::string* out) const { + string bs, ts; + bleu->Encode(&bs); + ter->Encode(&ts); + out->clear(); + (*out) += static_cast(bs.size()); + (*out) += bs; + (*out) += ts; + } + bool IsAdditiveIdentity() const { + return bleu->IsAdditiveIdentity() && ter->IsAdditiveIdentity(); + } + private: + ScoreP bleu; + ScoreP ter; +}; + +BLEUTERCombinationScore::~BLEUTERCombinationScore() { +} + +BLEUTERCombinationScorer::BLEUTERCombinationScorer(const vector >& refs) { + bleu_ = SentenceScorer::CreateSentenceScorer(IBM_BLEU, refs); + ter_ = SentenceScorer::CreateSentenceScorer(TER, refs); +} + +BLEUTERCombinationScorer::~BLEUTERCombinationScorer() { +} + +ScoreP BLEUTERCombinationScorer::ScoreCCandidate(const vector& hyp) const { + return ScoreP(); +} + +ScoreP BLEUTERCombinationScorer::ScoreCandidate(const std::vector& hyp) const { + BLEUTERCombinationScore* res = new BLEUTERCombinationScore; + res->bleu = bleu_->ScoreCandidate(hyp); + res->ter = ter_->ScoreCandidate(hyp); + return ScoreP(res); +} + +ScoreP BLEUTERCombinationScorer::ScoreFromString(const std::string& in) { + int bss = in[0]; + BLEUTERCombinationScore* r = new BLEUTERCombinationScore; + r->bleu = SentenceScorer::CreateScoreFromString(IBM_BLEU, in.substr(1, bss)); + r->ter = SentenceScorer::CreateScoreFromString(TER, in.substr(1 + bss)); + return ScoreP(r); +} diff --git a/mteval/comb_scorer.h b/mteval/comb_scorer.h new file mode 100644 index 00000000..346be576 --- /dev/null +++ b/mteval/comb_scorer.h @@ -0,0 +1,17 @@ +#ifndef _COMB_SCORER_ +#define _COMB_SCORER_ + +#include "scorer.h" + +class BLEUTERCombinationScorer : public SentenceScorer { + public: + BLEUTERCombinationScorer(const std::vector >& refs); + ~BLEUTERCombinationScorer(); + ScoreP ScoreCandidate(const std::vector& hyp) const; + ScoreP ScoreCCandidate(const std::vector& hyp) const; + static ScoreP ScoreFromString(const std::string& in); + private: + ScorerP bleu_,ter_; +}; + +#endif diff --git a/mteval/fast_score.cc b/mteval/fast_score.cc new file mode 100644 index 00000000..5ee264a6 --- /dev/null +++ b/mteval/fast_score.cc @@ -0,0 +1,72 @@ +#include +#include + +#include +#include + +#include "filelib.h" +#include "tdict.h" +#include "scorer.h" + +using namespace std; +namespace po = boost::program_options; + +void InitCommandLine(int argc, char** argv, po::variables_map* conf) { + po::options_description opts("Configuration options"); + opts.add_options() + ("reference,r",po::value >(), "[REQD] Reference translation(s) (tokenized text file)") + ("loss_function,l",po::value()->default_value("ibm_bleu"), "Scoring metric (ibm_bleu, nist_bleu, koehn_bleu, ter, combi)") + ("in_file,i", po::value()->default_value("-"), "Input file") + ("help,h", "Help"); + po::options_description dcmdline_options; + dcmdline_options.add(opts); + po::store(parse_command_line(argc, argv, dcmdline_options), *conf); + bool flag = false; + if (!conf->count("reference")) { + cerr << "Please specify one or more references using -r -r ...\n"; + flag = true; + } + if (flag || conf->count("help")) { + cerr << dcmdline_options << endl; + exit(1); + } +} + +int main(int argc, char** argv) { + po::variables_map conf; + InitCommandLine(argc, argv, &conf); + const string loss_function = conf["loss_function"].as(); + ScoreType type = ScoreTypeFromString(loss_function); + DocScorer ds(type, conf["reference"].as >(), ""); + cerr << "Loaded " << ds.size() << " references for scoring with " << loss_function << endl; + + ReadFile rf(conf["in_file"].as()); + ScoreP acc; + istream& in = *rf.stream(); + int lc = 0; + while(in) { + string line; + getline(in, line); + if (line.empty() && !in) break; + vector sent; + TD::ConvertSentence(line, &sent); + ScoreP sentscore = ds[lc]->ScoreCandidate(sent); + if (!acc) { acc = sentscore->GetZero(); } + acc->PlusEquals(*sentscore); + ++lc; + } + assert(lc > 0); + if (lc > ds.size()) { + cerr << "Too many (" << lc << ") translations in input, expected " << ds.size() << endl; + return 1; + } + if (lc != ds.size()) + cerr << "Fewer sentences in hyp (" << lc << ") than refs (" + << ds.size() << "): scoring partial set!\n"; + float score = acc->ComputeScore(); + string details; + acc->ScoreDetails(&details); + cerr << details << endl; + cout << score << endl; + return 0; +} diff --git a/mteval/mbr_kbest.cc b/mteval/mbr_kbest.cc new file mode 100644 index 00000000..2867b36b --- /dev/null +++ b/mteval/mbr_kbest.cc @@ -0,0 +1,138 @@ +#include +#include + +#include + +#include "prob.h" +#include "tdict.h" +#include "scorer.h" +#include "filelib.h" +#include "stringlib.h" + +using namespace std; + +namespace po = boost::program_options; + +void InitCommandLine(int argc, char** argv, po::variables_map* conf) { + po::options_description opts("Configuration options"); + opts.add_options() + ("scale,a",po::value()->default_value(1.0), "Posterior scaling factor (alpha)") + ("loss_function,l",po::value()->default_value("bleu"), "Loss function") + ("input,i",po::value()->default_value("-"), "File to read k-best lists from") + ("output_list,L", "Show reranked list as output") + ("help,h", "Help"); + po::options_description dcmdline_options; + dcmdline_options.add(opts); + po::store(parse_command_line(argc, argv, dcmdline_options), *conf); + bool flag = false; + if (flag || conf->count("help")) { + cerr << dcmdline_options << endl; + exit(1); + } +} + +struct LossComparer { + bool operator()(const pair, double>& a, const pair, double>& b) const { + return a.second < b.second; + } +}; + +bool ReadKBestList(istream* in, string* sent_id, vector, prob_t> >* list) { + static string cache_id; + static pair, prob_t> cache_pair; + list->clear(); + string cur_id; + if (cache_pair.first.size() > 0) { + list->push_back(cache_pair); + cur_id = cache_id; + cache_pair.first.clear(); + } + string line; + string tstr; + while(*in) { + getline(*in, line); + if (line.empty()) continue; + size_t p1 = line.find(" ||| "); + if (p1 == string::npos) { cerr << "Bad format: " << line << endl; abort(); } + size_t p2 = line.find(" ||| ", p1 + 4); + if (p2 == string::npos) { cerr << "Bad format: " << line << endl; abort(); } + size_t p3 = line.rfind(" ||| "); + cache_id = line.substr(0, p1); + tstr = line.substr(p1 + 5, p2 - p1 - 5); + double val = strtod(line.substr(p3 + 5).c_str(), NULL); + TD::ConvertSentence(tstr, &cache_pair.first); + cache_pair.second.logeq(val); + if (cur_id.empty()) cur_id = cache_id; + if (cur_id == cache_id) { + list->push_back(cache_pair); + *sent_id = cur_id; + cache_pair.first.clear(); + } else { break; } + } + return !list->empty(); +} + +int main(int argc, char** argv) { + po::variables_map conf; + InitCommandLine(argc, argv, &conf); + const string metric = conf["loss_function"].as(); + const bool output_list = conf.count("output_list") > 0; + const string file = conf["input"].as(); + const double mbr_scale = conf["scale"].as(); + cerr << "Posterior scaling factor (alpha) = " << mbr_scale << endl; + + ScoreType type = ScoreTypeFromString(metric); + vector, prob_t> > list; + ReadFile rf(file); + string sent_id; + while(ReadKBestList(rf.stream(), &sent_id, &list)) { + vector joints(list.size()); + const prob_t max_score = pow(list.front().second, mbr_scale); + prob_t marginal = prob_t::Zero(); + for (int i = 0 ; i < list.size(); ++i) { + const prob_t joint = pow(list[i].second, mbr_scale) / max_score; + joints[i] = joint; + // cerr << "list[" << i << "] joint=" << log(joint) << endl; + marginal += joint; + } + int mbr_idx = -1; + vector mbr_scores(output_list ? list.size() : 0); + double mbr_loss = numeric_limits::max(); + for (int i = 0 ; i < list.size(); ++i) { + vector > refs(1, list[i].first); + //cerr << i << ": " << list[i].second <<"\t" << TD::GetString(list[i].first) << endl; + ScorerP scorer = SentenceScorer::CreateSentenceScorer(type, refs); + double wl_acc = 0; + for (int j = 0; j < list.size(); ++j) { + if (i != j) { + ScoreP s = scorer->ScoreCandidate(list[j].first); + double loss = 1.0 - s->ComputeScore(); + if (type == TER || type == AER) loss = 1.0 - loss; + double weighted_loss = loss * (joints[j] / marginal); + wl_acc += weighted_loss; + if ((!output_list) && wl_acc > mbr_loss) break; + } + } + if (output_list) mbr_scores[i] = wl_acc; + if (wl_acc < mbr_loss) { + mbr_loss = wl_acc; + mbr_idx = i; + } + } + // cerr << "ML translation: " << TD::GetString(list[0].first) << endl; + cerr << "MBR Best idx: " << mbr_idx << endl; + if (output_list) { + for (int i = 0; i < list.size(); ++i) + list[i].second.logeq(mbr_scores[i]); + sort(list.begin(), list.end(), LossComparer()); + for (int i = 0; i < list.size(); ++i) + cout << sent_id << " ||| " + << TD::GetString(list[i].first) << " ||| " + << log(list[i].second) << endl; + } else { + cout << TD::GetString(list[mbr_idx].first) << endl; + } + } + return 0; +} + diff --git a/mteval/scorer.cc b/mteval/scorer.cc new file mode 100644 index 00000000..04eeaa93 --- /dev/null +++ b/mteval/scorer.cc @@ -0,0 +1,630 @@ +#include "scorer.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "filelib.h" +#include "ter.h" +#include "aer_scorer.h" +#include "comb_scorer.h" +#include "tdict.h" +#include "stringlib.h" + +using boost::shared_ptr; +using namespace std; + +void Score::TimesEquals(float scale) { + cerr<<"UNIMPLEMENTED except for BLEU (for MIRA): Score::TimesEquals"<=0 && st + float operator()(float sum,S const& ref) const { + return sum+ref.size(); + } +}; + +template +float avg_reflength(vector refs) { + unsigned n=refs.size(); + return n?accumulate(refs.begin(),refs.end(),0.,length_accum())/n:0.; +} + + +float SentenceScorer::ComputeRefLength(const Sentence &hyp) const { + return hyp.size(); // reasonable default? :) +} + +const std::string* SentenceScorer::GetSource() const { return NULL; } + +class SERScore : public ScoreBase { + friend class SERScorer; + public: + SERScore() : correct(0), total(0) {} + float ComputePartialScore() const { return 0.0;} + float ComputeScore() const { + return static_cast(correct) / static_cast(total); + } + void ScoreDetails(string* details) const { + ostringstream os; + os << "SER= " << ComputeScore() << " (" << correct << '/' << total << ')'; + *details = os.str(); + } + void PlusPartialEquals(const Score& /* delta */, int /* oracle_e_cover */, int /* oracle_f_cover */, int /* src_len */){} + + void PlusEquals(const Score& delta, const float scale) { + correct += scale*static_cast(delta).correct; + total += scale*static_cast(delta).total; + } + void PlusEquals(const Score& delta) { + correct += static_cast(delta).correct; + total += static_cast(delta).total; + } + ScoreP GetZero() const { return ScoreP(new SERScore); } + ScoreP GetOne() const { return ScoreP(new SERScore); } + void Subtract(const Score& rhs, Score* res) const { + SERScore* r = static_cast(res); + r->correct = correct - static_cast(rhs).correct; + r->total = total - static_cast(rhs).total; + } + void Encode(string* out) const { + assert(!"not implemented"); + } + bool IsAdditiveIdentity() const { + return (total == 0 && correct == 0); // correct is always 0 <= n <= total + } + private: + int correct, total; +}; + +std::string SentenceScorer::verbose_desc() const { + return desc+",ref0={ "+TD::GetString(refs[0])+" }"; +} + +class SERScorer : public SentenceScorer { + public: + SERScorer(const vector >& references) : SentenceScorer("SERScorer",references),refs_(references) {} + ScoreP ScoreCCandidate(const vector& /* hyp */) const { + return ScoreP(); + } + ScoreP ScoreCandidate(const vector& hyp) const { + SERScore* res = new SERScore; + res->total = 1; + for (int i = 0; i < refs_.size(); ++i) + if (refs_[i] == hyp) res->correct = 1; + return ScoreP(res); + } + static ScoreP ScoreFromString(const string& data) { + assert(!"Not implemented"); + } + private: + vector > refs_; +}; + +class BLEUScore : public ScoreBase { + friend class BLEUScorerBase; + public: + BLEUScore(int n) : correct_ngram_hit_counts(float(0),n), hyp_ngram_counts(float(0),n) { + ref_len = 0; + hyp_len = 0; } + BLEUScore(int n, int k) : correct_ngram_hit_counts(float(k),n), hyp_ngram_counts(float(k),n) { + ref_len = k; + hyp_len = k; } + float ComputeScore() const; + float ComputePartialScore() const; + void ScoreDetails(string* details) const; + void TimesEquals(float scale); + void PlusEquals(const Score& delta); + void PlusEquals(const Score& delta, const float scale); + void PlusPartialEquals(const Score& delta, int oracle_e_cover, int oracle_f_cover, int src_len); + ScoreP GetZero() const; + ScoreP GetOne() const; + void Subtract(const Score& rhs, Score* res) const; + void Encode(string* out) const; + bool IsAdditiveIdentity() const { + if (fabs(ref_len) > 0.1f || hyp_len != 0) return false; + for (int i = 0; i < correct_ngram_hit_counts.size(); ++i) + if (hyp_ngram_counts[i] != 0 || + correct_ngram_hit_counts[i] != 0) return false; + return true; + } + private: + int N() const { + return hyp_ngram_counts.size(); + } + float ComputeScore(vector* precs, float* bp) const; + float ComputePartialScore(vector* prec, float* bp) const; + valarray correct_ngram_hit_counts; + valarray hyp_ngram_counts; + float ref_len; + float hyp_len; +}; + +class BLEUScorerBase : public SentenceScorer { + public: + BLEUScorerBase(const vector >& references, + int n + ); + ScoreP ScoreCandidate(const vector& hyp) const; + ScoreP ScoreCCandidate(const vector& hyp) const; + static ScoreP ScoreFromString(const string& in); + + virtual float ComputeRefLength(const vector& hyp) const = 0; + private: + struct NGramCompare { + int operator() (const vector& a, const vector& b) { + size_t as = a.size(); + size_t bs = b.size(); + const size_t s = (as < bs ? as : bs); + for (size_t i = 0; i < s; ++i) { + int d = a[i] - b[i]; + if (d < 0) return true; + if (d > 0) return false; + } + return as < bs; + } + }; + typedef map, pair, NGramCompare> NGramCountMap; + void CountRef(const vector& ref) { + NGramCountMap tc; + vector ngram(n_); + int s = ref.size(); + for (int j=0; j& p = ngrams_[i->first]; + if (p.first < i->second.first) + p = i->second; + } + } + + void ComputeNgramStats(const vector& sent, + valarray* correct, + valarray* hyp, + bool clip_counts) + const { + assert(correct->size() == n_); + assert(hyp->size() == n_); + vector ngram(n_); + (*correct) *= 0; + (*hyp) *= 0; + int s = sent.size(); + for (int j=0; j& p = ngrams_[ngram]; + if(clip_counts){ + if (p.second < p.first) { + ++p.second; + (*correct)[i-1]++; + }} + else { + ++p.second; + (*correct)[i-1]++; + } + // if the 1 gram isn't found, don't try to match don't need to match any 2- 3- .. grams: + if (!p.first) { + for (; i<=k; ++i) + (*hyp)[i-1]++; + } else { + (*hyp)[i-1]++; + } + } + } + } + + mutable NGramCountMap ngrams_; + int n_; + vector lengths_; +}; + +ScoreP BLEUScorerBase::ScoreFromString(const string& in) { + istringstream is(in); + int n; + is >> n; + BLEUScore* r = new BLEUScore(n); + is >> r->ref_len >> r->hyp_len; + + for (int i = 0; i < n; ++i) { + is >> r->correct_ngram_hit_counts[i]; + is >> r->hyp_ngram_counts[i]; + } + return ScoreP(r); +} + +class IBM_BLEUScorer : public BLEUScorerBase { + public: + IBM_BLEUScorer(const vector >& references, + int n=4) : BLEUScorerBase(references, n), lengths_(references.size()) { + for (int i=0; i < references.size(); ++i) + lengths_[i] = references[i].size(); + } + float ComputeRefLength(const vector& hyp) const { + if (lengths_.size() == 1) return lengths_[0]; + int bestd = 2000000; + int hl = hyp.size(); + int bl = -1; + for (vector::const_iterator ci = lengths_.begin(); ci != lengths_.end(); ++ci) { + int cl = *ci; + if (abs(cl - hl) < bestd) { + bestd = abs(cl - hl); + bl = cl; + } + } + return bl; + } + private: + vector lengths_; +}; + +class NIST_BLEUScorer : public BLEUScorerBase { + public: + NIST_BLEUScorer(const vector >& references, + int n=4) : BLEUScorerBase(references, n), + shortest_(references[0].size()) { + for (int i=1; i < references.size(); ++i) + if (references[i].size() < shortest_) + shortest_ = references[i].size(); + } + float ComputeRefLength(const vector& /* hyp */) const { + return shortest_; + } + private: + float shortest_; +}; + +class Koehn_BLEUScorer : public BLEUScorerBase { + public: + Koehn_BLEUScorer(const vector >& references, + int n=4) : BLEUScorerBase(references, n), + avg_(0) { + for (int i=0; i < references.size(); ++i) + avg_ += references[i].size(); + avg_ /= references.size(); + } + float ComputeRefLength(const vector& /* hyp */) const { + return avg_; + } + private: + float avg_; +}; + +ScorerP SentenceScorer::CreateSentenceScorer(const ScoreType type, + const vector >& refs, + const string& src) +{ + SentenceScorer *r=0; + switch (type) { + case IBM_BLEU: r = new IBM_BLEUScorer(refs, 4);break; + case IBM_BLEU_3 : r = new IBM_BLEUScorer(refs,3);break; + case NIST_BLEU: r = new NIST_BLEUScorer(refs, 4);break; + case Koehn_BLEU: r = new Koehn_BLEUScorer(refs, 4);break; + case AER: r = new AERScorer(refs, src);break; + case TER: r = new TERScorer(refs);break; + case SER: r = new SERScorer(refs);break; + case BLEU_minus_TER_over_2: r = new BLEUTERCombinationScorer(refs);break; + default: + assert(!"Not implemented!"); + } + return ScorerP(r); +} + +ScoreP SentenceScorer::GetOne() const { + Sentence s; + return ScoreCCandidate(s)->GetOne(); +} + +ScoreP SentenceScorer::GetZero() const { + Sentence s; + return ScoreCCandidate(s)->GetZero(); +} + +ScoreP Score::GetOne(ScoreType type) { + std::vector refs; + return SentenceScorer::CreateSentenceScorer(type,refs)->GetOne(); +} + +ScoreP Score::GetZero(ScoreType type) { + std::vector refs; + return SentenceScorer::CreateSentenceScorer(type,refs)->GetZero(); +} + + +ScoreP SentenceScorer::CreateScoreFromString(const ScoreType type, const string& in) { + switch (type) { + case IBM_BLEU: + case IBM_BLEU_3: + case NIST_BLEU: + case Koehn_BLEU: + return BLEUScorerBase::ScoreFromString(in); + case TER: + return TERScorer::ScoreFromString(in); + case AER: + return AERScorer::ScoreFromString(in); + case SER: + return SERScorer::ScoreFromString(in); + case BLEU_minus_TER_over_2: + return BLEUTERCombinationScorer::ScoreFromString(in); + default: + assert(!"Not implemented!"); + } +} + +void BLEUScore::ScoreDetails(string* details) const { + char buf[2000]; + vector precs(max(N(),4)); + float bp; + float bleu = ComputeScore(&precs, &bp); + for (int i=N();i<4;++i) + precs[i]=0.; + char *bufn; + bufn=buf+sprintf(buf, "BLEU = %.2f, %.1f|%.1f|%.1f|%.1f (brev=%.3f)", + bleu*100.0, + precs[0]*100.0, + precs[1]*100.0, + precs[2]*100.0, + precs[3]*100.0, + bp); + *details = buf; +} + +float BLEUScore::ComputeScore(vector* precs, float* bp) const { + float log_bleu = 0; + if (precs) precs->clear(); + int count = 0; + for (int i = 0; i < N(); ++i) { + if (hyp_ngram_counts[i] > 0) { + float lprec = log(correct_ngram_hit_counts[i]) - log(hyp_ngram_counts[i]); + if (precs) precs->push_back(exp(lprec)); + log_bleu += lprec; + ++count; + } + } + log_bleu /= static_cast(count); + float lbp = 0.0; + if (hyp_len < ref_len) + lbp = (hyp_len - ref_len) / hyp_len; + log_bleu += lbp; + if (bp) *bp = exp(lbp); + return exp(log_bleu); +} + + +//comptue scaled score for oracle retrieval +float BLEUScore::ComputePartialScore(vector* precs, float* bp) const { + // cerr << "Then here " << endl; + float log_bleu = 0; + if (precs) precs->clear(); + int count = 0; + for (int i = 0; i < N(); ++i) { + // cerr << "In CPS " << hyp_ngram_counts[i] << " " << correct_ngram_hit_counts[i] << endl; + if (hyp_ngram_counts[i] > 0) { + float lprec = log(correct_ngram_hit_counts[i]) - log(hyp_ngram_counts[i]); + if (precs) precs->push_back(exp(lprec)); + log_bleu += lprec; + ++count; + } + } + log_bleu /= static_cast(count); + float lbp = 0.0; + if (hyp_len < ref_len) + lbp = (hyp_len - ref_len) / hyp_len; + log_bleu += lbp; + if (bp) *bp = exp(lbp); + return exp(log_bleu); +} + +float BLEUScore::ComputePartialScore() const { + // cerr << "In here first " << endl; + return ComputePartialScore(NULL, NULL); +} + +float BLEUScore::ComputeScore() const { + return ComputeScore(NULL, NULL); +} + +void BLEUScore::Subtract(const Score& rhs, Score* res) const { + const BLEUScore& d = static_cast(rhs); + BLEUScore* o = static_cast(res); + o->ref_len = ref_len - d.ref_len; + o->hyp_len = hyp_len - d.hyp_len; + o->correct_ngram_hit_counts = correct_ngram_hit_counts - d.correct_ngram_hit_counts; + o->hyp_ngram_counts = hyp_ngram_counts - d.hyp_ngram_counts; +} + +void BLEUScore::PlusEquals(const Score& delta) { + const BLEUScore& d = static_cast(delta); + correct_ngram_hit_counts += d.correct_ngram_hit_counts; + hyp_ngram_counts += d.hyp_ngram_counts; + ref_len += d.ref_len; + hyp_len += d.hyp_len; +} + +void BLEUScore::TimesEquals(float scale) { + correct_ngram_hit_counts *= scale; + hyp_ngram_counts *= scale; + ref_len *= scale; + hyp_len *= scale; +} + +void BLEUScore::PlusEquals(const Score& delta, const float scale) { + const BLEUScore& d = static_cast(delta); + correct_ngram_hit_counts = correct_ngram_hit_counts + (d.correct_ngram_hit_counts * scale); + hyp_ngram_counts = hyp_ngram_counts + (d.hyp_ngram_counts * scale); + ref_len = ref_len + (d.ref_len * scale); + hyp_len = hyp_len + (d.hyp_len * scale); +} + +void BLEUScore::PlusPartialEquals(const Score& delta, int oracle_e_cover, int oracle_f_cover, int src_len){ + const BLEUScore& d = static_cast(delta); + correct_ngram_hit_counts += d.correct_ngram_hit_counts; + hyp_ngram_counts += d.hyp_ngram_counts; + //scale the reference length according to the size of the input sentence covered by this rule + + ref_len *= (float)oracle_f_cover / src_len; + ref_len += d.ref_len; + + hyp_len = oracle_e_cover; + hyp_len += d.hyp_len; +} + + +ScoreP BLEUScore::GetZero() const { + return ScoreP(new BLEUScore(N())); +} + +ScoreP BLEUScore::GetOne() const { + return ScoreP(new BLEUScore(N(),1)); +} + + +void BLEUScore::Encode(string* out) const { + ostringstream os; + const int n = correct_ngram_hit_counts.size(); + os << n << ' ' << ref_len << ' ' << hyp_len; + for (int i = 0; i < n; ++i) + os << ' ' << correct_ngram_hit_counts[i] << ' ' << hyp_ngram_counts[i]; + *out = os.str(); +} + +BLEUScorerBase::BLEUScorerBase(const vector >& references, + int n) : SentenceScorer("BLEU"+boost::lexical_cast(n),references),n_(n) { + for (vector >::const_iterator ci = references.begin(); + ci != references.end(); ++ci) { + lengths_.push_back(ci->size()); + CountRef(*ci); + } +} + +ScoreP BLEUScorerBase::ScoreCandidate(const vector& hyp) const { + BLEUScore* bs = new BLEUScore(n_); + for (NGramCountMap::iterator i=ngrams_.begin(); i != ngrams_.end(); ++i) + i->second.second = 0; + ComputeNgramStats(hyp, &bs->correct_ngram_hit_counts, &bs->hyp_ngram_counts, true); + bs->ref_len = ComputeRefLength(hyp); + bs->hyp_len = hyp.size(); + return ScoreP(bs); +} + +ScoreP BLEUScorerBase::ScoreCCandidate(const vector& hyp) const { + BLEUScore* bs = new BLEUScore(n_); + for (NGramCountMap::iterator i=ngrams_.begin(); i != ngrams_.end(); ++i) + i->second.second = 0; + bool clip = false; + ComputeNgramStats(hyp, &bs->correct_ngram_hit_counts, &bs->hyp_ngram_counts,clip); + bs->ref_len = ComputeRefLength(hyp); + bs->hyp_len = hyp.size(); + return ScoreP(bs); +} + + +DocScorer::~DocScorer() { +} + +void DocScorer::Init( + const ScoreType type, + const vector& ref_files, + const string& src_file, bool verbose) { + scorers_.clear(); + // TODO stop using valarray, start using ReadFile + cerr << "Loading references (" << ref_files.size() << " files)\n"; + ReadFile srcrf; + if (type == AER && src_file.size() > 0) { + cerr << " (source=" << src_file << ")\n"; + srcrf.Init(src_file); + } + std::vector ifs(ref_files.begin(),ref_files.end()); + for (int i=0; i < ref_files.size(); ++i) ifs[i].Init(ref_files[i]); + char buf[64000]; + bool expect_eof = false; + int line=0; + while (ifs[0].get()) { + vector > refs(ref_files.size()); + for (int i=0; i < ref_files.size(); ++i) { + istream &in=ifs[i].get(); + if (in.eof()) break; + in.getline(buf, 64000); + refs[i].clear(); + if (strlen(buf) == 0) { + if (in.eof()) { + if (!expect_eof) { + assert(i == 0); + expect_eof = true; + } + break; + } + } else { + TD::ConvertSentence(buf, &refs[i]); + assert(!refs[i].empty()); + } + assert(!expect_eof); + } + if (!expect_eof) { + string src_line; + if (srcrf) { + getline(srcrf.get(), src_line); + map dummy; + ProcessAndStripSGML(&src_line, &dummy); + } + scorers_.push_back(ScorerP(SentenceScorer::CreateSentenceScorer(type, refs, src_line))); + if (verbose) + cerr<<"doc_scorer["<verbose_desc()< +#include +#include +//TODO: use intrusive shared_ptr in Score (because there are many of them on ErrorSurfaces) +#include "wordid.h" +#include "intrusive_refcount.hpp" + +class Score; +class SentenceScorer; +typedef boost::intrusive_ptr ScoreP; +typedef boost::shared_ptr ScorerP; + +class ViterbiEnvelope; +class ErrorSurface; +class Hypergraph; // needed for alignment + +//TODO: BLEU N (N separate arg, not part of enum)? +enum ScoreType { IBM_BLEU, NIST_BLEU, Koehn_BLEU, TER, BLEU_minus_TER_over_2, SER, AER, IBM_BLEU_3 }; +ScoreType ScoreTypeFromString(const std::string& st); +std::string StringFromScoreType(ScoreType st); + +class Score : public boost::intrusive_refcount { + public: + virtual ~Score(); + virtual float ComputeScore() const = 0; + virtual float ComputePartialScore() const =0; + virtual void ScoreDetails(std::string* details) const = 0; + std::string ScoreDetails() { + std::string d; + ScoreDetails(&d); + return d; + } + virtual void TimesEquals(float scale); // only for bleu; for mira oracle + /// same as rhs.TimesEquals(scale);PlusEquals(rhs) except doesn't modify rhs. + virtual void PlusEquals(const Score& rhs, const float scale) = 0; + virtual void PlusEquals(const Score& rhs) = 0; + virtual void PlusPartialEquals(const Score& rhs, int oracle_e_cover, int oracle_f_cover, int src_len) = 0; + virtual void Subtract(const Score& rhs, Score *res) const = 0; + virtual ScoreP GetZero() const = 0; + virtual ScoreP GetOne() const = 0; + virtual bool IsAdditiveIdentity() const = 0; // returns true if adding this delta + // to another score results in no score change + // under any circumstances + virtual void Encode(std::string* out) const = 0; + static ScoreP GetZero(ScoreType type); + static ScoreP GetOne(ScoreType type); + virtual ScoreP Clone() const = 0; +protected: + Score() { } // we define these explicitly because refcount is noncopyable + Score(Score const&) { } +}; + +//TODO: make sure default copy ctors for score types do what we want. +template +struct ScoreBase : public Score { + ScoreP Clone() const { + return ScoreP(new Derived(dynamic_cast(*this))); + } +}; + +class SentenceScorer { + public: + typedef std::vector Sentence; + typedef std::vector Sentences; + std::string desc; + Sentences refs; + SentenceScorer(std::string desc="SentenceScorer_unknown", Sentences const& refs=Sentences()) : desc(desc),refs(refs) { } + std::string verbose_desc() const; + virtual float ComputeRefLength(const Sentence& hyp) const; // default: avg of refs.length + virtual ~SentenceScorer(); + virtual ScoreP GetOne() const; + virtual ScoreP GetZero() const; + virtual ScoreP ScoreCandidate(const Sentence& hyp) const = 0; + virtual ScoreP ScoreCCandidate(const Sentence& hyp) const =0; + virtual const std::string* GetSource() const; + static ScoreP CreateScoreFromString(const ScoreType type, const std::string& in); + static ScorerP CreateSentenceScorer(const ScoreType type, + const std::vector& refs, + const std::string& src = ""); +}; + +//TODO: should be able to GetOne GetZero without supplying sentence (just type) +class DocScorer { + public: + ~DocScorer(); + DocScorer() { } + void Init(const ScoreType type, + const std::vector& ref_files, + const std::string& src_file = "", + bool verbose=false + ); + DocScorer(const ScoreType type, + const std::vector& ref_files, + const std::string& src_file = "", + bool verbose=false + ) + { + Init(type,ref_files,src_file,verbose); + } + + int size() const { return scorers_.size(); } + ScorerP operator[](size_t i) const { return scorers_[i]; } + private: + std::vector scorers_; +}; + + +#endif diff --git a/mteval/scorer_test.cc b/mteval/scorer_test.cc new file mode 100644 index 00000000..a07a8c4b --- /dev/null +++ b/mteval/scorer_test.cc @@ -0,0 +1,182 @@ +#include +#include +#include +#include + +#include "tdict.h" +#include "scorer.h" +#include "aer_scorer.h" + +using namespace std; + +class ScorerTest : public testing::Test { + protected: + virtual void SetUp() { + refs0.resize(4); + refs1.resize(4); + TD::ConvertSentence("export of high-tech products in guangdong in first two months this year reached 3.76 billion us dollars", &refs0[0]); + TD::ConvertSentence("guangdong's export of new high technology products amounts to us $ 3.76 billion in first two months of this year", &refs0[1]); + TD::ConvertSentence("guangdong exports us $ 3.76 billion worth of high technology products in the first two months of this year", &refs0[2]); + TD::ConvertSentence("in the first 2 months this year , the export volume of new hi-tech products in guangdong province reached 3.76 billion us dollars .", &refs0[3]); + TD::ConvertSentence("xinhua news agency , guangzhou , march 16 ( reporter chen ji ) the latest statistics show that from january through february this year , the export of high-tech products in guangdong province reached 3.76 billion us dollars , up 34.8 \% over the same period last year and accounted for 25.5 \% of the total export in the province .", &refs1[0]); + TD::ConvertSentence("xinhua news agency , guangzhou , march 16 ( reporter : chen ji ) -- latest statistic indicates that guangdong's export of new high technology products amounts to us $ 3.76 billion , up 34.8 \% over corresponding period and accounts for 25.5 \% of the total exports of the province .", &refs1[1]); + TD::ConvertSentence("xinhua news agency report of march 16 from guangzhou ( by staff reporter chen ji ) - latest statistics indicate guangdong province exported us $ 3.76 billion worth of high technology products , up 34.8 percent from the same period last year , which account for 25.5 percent of the total exports of the province .", &refs1[2]); + TD::ConvertSentence("guangdong , march 16 , ( xinhua ) -- ( chen ji reports ) as the newest statistics shows , in january and feberuary this year , the export volume of new hi-tech products in guangdong province reached 3.76 billion us dollars , up 34.8 \% than last year , making up 25.5 \% of the province's total .", &refs1[3]); + TD::ConvertSentence("one guangdong province will next export us $ 3.76 high-tech product two months first this year 3.76 billion us dollars", &hyp1); + TD::ConvertSentence("xinhua news agency , guangzhou , 16th of march ( reporter chen ) -- latest statistics suggest that guangdong exports new advanced technology product totals $ 3.76 million , 34.8 percent last corresponding period and accounts for 25.5 percent of the total export province .", &hyp2); + } + + virtual void TearDown() { } + + vector > refs0; + vector > refs1; + vector hyp1; + vector hyp2; +}; + +TEST_F(ScorerTest, TestCreateFromFiles) { + vector files; + files.push_back("test_data/re.txt.0"); + files.push_back("test_data/re.txt.1"); + files.push_back("test_data/re.txt.2"); + files.push_back("test_data/re.txt.3"); + DocScorer ds(IBM_BLEU, files); +} + +TEST_F(ScorerTest, TestBLEUScorer) { + ScorerP s1 = SentenceScorer::CreateSentenceScorer(IBM_BLEU, refs0); + ScorerP s2 = SentenceScorer::CreateSentenceScorer(IBM_BLEU, refs1); + ScoreP b1 = s1->ScoreCandidate(hyp1); + EXPECT_FLOAT_EQ(0.23185077, b1->ComputeScore()); + ScoreP b2 = s2->ScoreCandidate(hyp2); + EXPECT_FLOAT_EQ(0.38101241, b2->ComputeScore()); + b1->PlusEquals(*b2); + EXPECT_FLOAT_EQ(0.348854, b1->ComputeScore()); + EXPECT_FALSE(b1->IsAdditiveIdentity()); + string details; + b1->ScoreDetails(&details); + EXPECT_EQ("BLEU = 34.89, 81.5|50.8|29.5|18.6 (brev=0.898)", details); + cerr << details << endl; + string enc; + b1->Encode(&enc); + ScoreP b3 = SentenceScorer::CreateScoreFromString(IBM_BLEU, enc); + details.clear(); + cerr << "Encoded BLEU score size: " << enc.size() << endl; + b3->ScoreDetails(&details); + cerr << details << endl; + EXPECT_FALSE(b3->IsAdditiveIdentity()); + EXPECT_EQ("BLEU = 34.89, 81.5|50.8|29.5|18.6 (brev=0.898)", details); + ScoreP bz = b3->GetZero(); + EXPECT_TRUE(bz->IsAdditiveIdentity()); +} + +TEST_F(ScorerTest, TestTERScorer) { + ScorerP s1 = SentenceScorer::CreateSentenceScorer(TER, refs0); + ScorerP s2 = SentenceScorer::CreateSentenceScorer(TER, refs1); + string details; + ScoreP t1 = s1->ScoreCandidate(hyp1); + t1->ScoreDetails(&details); + cerr << "DETAILS: " << details << endl; + cerr << t1->ComputeScore() << endl; + ScoreP t2 = s2->ScoreCandidate(hyp2); + t2->ScoreDetails(&details); + cerr << "DETAILS: " << details << endl; + cerr << t2->ComputeScore() << endl; + t1->PlusEquals(*t2); + cerr << t1->ComputeScore() << endl; + t1->ScoreDetails(&details); + cerr << "DETAILS: " << details << endl; + EXPECT_EQ("TER = 44.16, 4| 8| 16| 6 (len=77)", details); + string enc; + t1->Encode(&enc); + ScoreP t3 = SentenceScorer::CreateScoreFromString(TER, enc); + details.clear(); + t3->ScoreDetails(&details); + EXPECT_EQ("TER = 44.16, 4| 8| 16| 6 (len=77)", details); + EXPECT_FALSE(t3->IsAdditiveIdentity()); + ScoreP tz = t3->GetZero(); + EXPECT_TRUE(tz->IsAdditiveIdentity()); +} + +TEST_F(ScorerTest, TestTERScorerSimple) { + vector > ref(1); + TD::ConvertSentence("1 2 3 A B", &ref[0]); + vector hyp; + TD::ConvertSentence("A B 1 2 3", &hyp); + ScorerP s1 = SentenceScorer::CreateSentenceScorer(TER, ref); + string details; + ScoreP t1 = s1->ScoreCandidate(hyp); + t1->ScoreDetails(&details); + cerr << "DETAILS: " << details << endl; +} + +TEST_F(ScorerTest, TestSERScorerSimple) { + vector > ref(1); + TD::ConvertSentence("A B C D", &ref[0]); + vector hyp1; + TD::ConvertSentence("A B C", &hyp1); + vector hyp2; + TD::ConvertSentence("A B C D", &hyp2); + ScorerP s1 = SentenceScorer::CreateSentenceScorer(SER, ref); + string details; + ScoreP t1 = s1->ScoreCandidate(hyp1); + t1->ScoreDetails(&details); + cerr << "DETAILS: " << details << endl; + ScoreP t2 = s1->ScoreCandidate(hyp2); + t2->ScoreDetails(&details); + cerr << "DETAILS: " << details << endl; + t2->PlusEquals(*t1); + t2->ScoreDetails(&details); + cerr << "DETAILS: " << details << endl; +} + +TEST_F(ScorerTest, TestCombiScorer) { + ScorerP s1 = SentenceScorer::CreateSentenceScorer(BLEU_minus_TER_over_2, refs0); + string details; + ScoreP t1 = s1->ScoreCandidate(hyp1); + t1->ScoreDetails(&details); + cerr << "DETAILS: " << details << endl; + cerr << t1->ComputeScore() << endl; + string enc; + t1->Encode(&enc); + ScoreP t2 = SentenceScorer::CreateScoreFromString(BLEU_minus_TER_over_2, enc); + details.clear(); + t2->ScoreDetails(&details); + cerr << "DETAILS: " << details << endl; + ScoreP cz = t2->GetZero(); + EXPECT_FALSE(t2->IsAdditiveIdentity()); + EXPECT_TRUE(cz->IsAdditiveIdentity()); + cz->PlusEquals(*t2); + EXPECT_FALSE(cz->IsAdditiveIdentity()); + string d2; + cz->ScoreDetails(&d2); + EXPECT_EQ(d2, details); +} + +TEST_F(ScorerTest, AERTest) { + vector > refs0(1); + TD::ConvertSentence("0-0 2-1 1-2 3-3", &refs0[0]); + + vector hyp; + TD::ConvertSentence("0-0 1-1", &hyp); + AERScorer* as = new AERScorer(refs0); + ScoreP x = as->ScoreCandidate(hyp); + string details; + x->ScoreDetails(&details); + cerr << details << endl; + string enc; + x->Encode(&enc); + delete as; + cerr << "ENC size: " << enc.size() << endl; + ScoreP y = SentenceScorer::CreateScoreFromString(AER, enc); + string d2; + y->ScoreDetails(&d2); + cerr << d2 << endl; + EXPECT_EQ(d2, details); +} + +int main(int argc, char **argv) { + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} + diff --git a/mteval/ter.cc b/mteval/ter.cc new file mode 100644 index 00000000..cacc5b00 --- /dev/null +++ b/mteval/ter.cc @@ -0,0 +1,535 @@ +#include "ter.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "tdict.h" + +const bool ter_use_average_ref_len = true; +const int ter_short_circuit_long_sentences = -1; + +using namespace std; +using namespace std::tr1; + +struct COSTS { + static const float substitution; + static const float deletion; + static const float insertion; + static const float shift; +}; +const float COSTS::substitution = 1.0f; +const float COSTS::deletion = 1.0f; +const float COSTS::insertion = 1.0f; +const float COSTS::shift = 1.0f; + +static const int MAX_SHIFT_SIZE = 10; +static const int MAX_SHIFT_DIST = 50; + +struct Shift { + unsigned int d_; + Shift() : d_() {} + Shift(int b, int e, int m) : d_() { + begin(b); + end(e); + moveto(m); + } + inline int begin() const { + return d_ & 0x3ff; + } + inline int end() const { + return (d_ >> 10) & 0x3ff; + } + inline int moveto() const { + int m = (d_ >> 20) & 0x7ff; + if (m > 1024) { m -= 1024; m *= -1; } + return m; + } + inline void begin(int b) { + d_ &= 0xfffffc00u; + d_ |= (b & 0x3ff); + } + inline void end(int e) { + d_ &= 0xfff003ffu; + d_ |= (e & 0x3ff) << 10; + } + inline void moveto(int m) { + bool neg = (m < 0); + if (neg) { m *= -1; m += 1024; } + d_ &= 0xfffff; + d_ |= (m & 0x7ff) << 20; + } +}; + +class TERScorerImpl { + + public: + enum TransType { MATCH, SUBSTITUTION, INSERTION, DELETION }; + + explicit TERScorerImpl(const vector& ref) : ref_(ref) { + for (int i = 0; i < ref.size(); ++i) + rwexists_.insert(ref[i]); + } + + float Calculate(const vector& hyp, int* subs, int* ins, int* dels, int* shifts) const { + return CalculateAllShifts(hyp, subs, ins, dels, shifts); + } + + inline int GetRefLength() const { + return ref_.size(); + } + + private: + vector ref_; + set rwexists_; + + typedef unordered_map, set, boost::hash > > NgramToIntsMap; + mutable NgramToIntsMap nmap_; + + static float MinimumEditDistance( + const vector& hyp, + const vector& ref, + vector* path) { + vector > bmat(hyp.size() + 1, vector(ref.size() + 1, MATCH)); + vector > cmat(hyp.size() + 1, vector(ref.size() + 1, 0)); + for (int i = 0; i <= hyp.size(); ++i) + cmat[i][0] = i; + for (int j = 0; j <= ref.size(); ++j) + cmat[0][j] = j; + for (int i = 1; i <= hyp.size(); ++i) { + const WordID& hw = hyp[i-1]; + for (int j = 1; j <= ref.size(); ++j) { + const WordID& rw = ref[j-1]; + float& cur_c = cmat[i][j]; + TransType& cur_b = bmat[i][j]; + + if (rw == hw) { + cur_c = cmat[i-1][j-1]; + cur_b = MATCH; + } else { + cur_c = cmat[i-1][j-1] + COSTS::substitution; + cur_b = SUBSTITUTION; + } + float cwoi = cmat[i-1][j]; + if (cur_c > cwoi + COSTS::insertion) { + cur_c = cwoi + COSTS::insertion; + cur_b = INSERTION; + } + float cwod = cmat[i][j-1]; + if (cur_c > cwod + COSTS::deletion) { + cur_c = cwod + COSTS::deletion; + cur_b = DELETION; + } + } + } + + // trace back along the best path and record the transition types + path->clear(); + int i = hyp.size(); + int j = ref.size(); + while (i > 0 || j > 0) { + if (j == 0) { + --i; + path->push_back(INSERTION); + } else if (i == 0) { + --j; + path->push_back(DELETION); + } else { + TransType t = bmat[i][j]; + path->push_back(t); + switch (t) { + case SUBSTITUTION: + case MATCH: + --i; --j; break; + case INSERTION: + --i; break; + case DELETION: + --j; break; + } + } + } + reverse(path->begin(), path->end()); + return cmat[hyp.size()][ref.size()]; + } + + void BuildWordMatches(const vector& hyp, NgramToIntsMap* nmap) const { + nmap->clear(); + set exists_both; + for (int i = 0; i < hyp.size(); ++i) + if (rwexists_.find(hyp[i]) != rwexists_.end()) + exists_both.insert(hyp[i]); + for (int start=0; start cp; + int mlen = min(MAX_SHIFT_SIZE, static_cast(ref_.size() - start)); + for (int len=0; len& in, + int start, int end, int moveto, vector* out) { + // cerr << "ps: " << start << " " << end << " " << moveto << endl; + out->clear(); + if (moveto == -1) { + for (int i = start; i <= end; ++i) + out->push_back(in[i]); + for (int i = 0; i < start; ++i) + out->push_back(in[i]); + for (int i = end+1; i < in.size(); ++i) + out->push_back(in[i]); + } else if (moveto < start) { + for (int i = 0; i <= moveto; ++i) + out->push_back(in[i]); + for (int i = start; i <= end; ++i) + out->push_back(in[i]); + for (int i = moveto+1; i < start; ++i) + out->push_back(in[i]); + for (int i = end+1; i < in.size(); ++i) + out->push_back(in[i]); + } else if (moveto > end) { + for (int i = 0; i < start; ++i) + out->push_back(in[i]); + for (int i = end+1; i <= moveto; ++i) + out->push_back(in[i]); + for (int i = start; i <= end; ++i) + out->push_back(in[i]); + for (int i = moveto+1; i < in.size(); ++i) + out->push_back(in[i]); + } else { + for (int i = 0; i < start; ++i) + out->push_back(in[i]); + for (int i = end+1; (i < in.size()) && (i <= end + (moveto - start)); ++i) + out->push_back(in[i]); + for (int i = start; i <= end; ++i) + out->push_back(in[i]); + for (int i = (end + (moveto - start))+1; i < in.size(); ++i) + out->push_back(in[i]); + } + if (out->size() != in.size()) { + cerr << "ps: " << start << " " << end << " " << moveto << endl; + cerr << "in=" << TD::GetString(in) << endl; + cerr << "out=" << TD::GetString(*out) << endl; + } + assert(out->size() == in.size()); + // cerr << "ps: " << TD::GetString(*out) << endl; + } + + void GetAllPossibleShifts(const vector& hyp, + const vector& ralign, + const vector& herr, + const vector& rerr, + const int min_size, + vector >* shifts) const { + for (int start = 0; start < hyp.size(); ++start) { + vector cp(1, hyp[start]); + NgramToIntsMap::iterator niter = nmap_.find(cp); + if (niter == nmap_.end()) continue; + bool ok = false; + int moveto; + for (set::iterator i = niter->second.begin(); i != niter->second.end(); ++i) { + moveto = *i; + int rm = ralign[moveto]; + ok = (start != rm && + (rm - start) < MAX_SHIFT_DIST && + (start - rm - 1) < MAX_SHIFT_DIST); + if (ok) break; + } + if (!ok) continue; + cp.clear(); + for (int end = start + min_size - 1; + ok && end < hyp.size() && end < (start + MAX_SHIFT_SIZE); ++end) { + cp.push_back(hyp[end]); + vector& sshifts = (*shifts)[end - start]; + ok = false; + NgramToIntsMap::iterator niter = nmap_.find(cp); + if (niter == nmap_.end()) break; + bool any_herr = false; + for (int i = start; i <= end && !any_herr; ++i) + any_herr = herr[i]; + if (!any_herr) { + ok = true; + continue; + } + for (set::iterator mi = niter->second.begin(); + mi != niter->second.end(); ++mi) { + int moveto = *mi; + int rm = ralign[moveto]; + if (! ((rm != start) && + ((rm < start) || (rm > end)) && + (rm - start <= MAX_SHIFT_DIST) && + ((start - rm - 1) <= MAX_SHIFT_DIST))) continue; + ok = true; + bool any_rerr = false; + for (int i = 0; (i <= end - start) && (!any_rerr); ++i) + any_rerr = rerr[moveto+i]; + if (!any_rerr) continue; + for (int roff = 0; roff <= (end - start); ++roff) { + int rmr = ralign[moveto+roff]; + if ((start != rmr) && ((roff == 0) || (rmr != ralign[moveto]))) + sshifts.push_back(Shift(start, end, moveto + roff)); + } + } + } + } + } + + bool CalculateBestShift(const vector& cur, + const vector& hyp, + float curerr, + const vector& path, + vector* new_hyp, + float* newerr, + vector* new_path) const { + vector herr, rerr; + vector ralign; + int hpos = -1; + for (int i = 0; i < path.size(); ++i) { + switch (path[i]) { + case MATCH: + ++hpos; + herr.push_back(false); + rerr.push_back(false); + ralign.push_back(hpos); + break; + case SUBSTITUTION: + ++hpos; + herr.push_back(true); + rerr.push_back(true); + ralign.push_back(hpos); + break; + case INSERTION: + ++hpos; + herr.push_back(true); + break; + case DELETION: + rerr.push_back(true); + ralign.push_back(hpos); + break; + } + } +#if 0 + cerr << "RALIGN: "; + for (int i = 0; i < rerr.size(); ++i) + cerr << ralign[i] << " "; + cerr << endl; + cerr << "RERR: "; + for (int i = 0; i < rerr.size(); ++i) + cerr << (bool)rerr[i] << " "; + cerr << endl; + cerr << "HERR: "; + for (int i = 0; i < herr.size(); ++i) + cerr << (bool)herr[i] << " "; + cerr << endl; +#endif + + vector > shifts(MAX_SHIFT_SIZE + 1); + GetAllPossibleShifts(cur, ralign, herr, rerr, 1, &shifts); + float cur_best_shift_cost = 0; + *newerr = curerr; + vector cur_best_path; + vector cur_best_hyp; + + bool res = false; + for (int i = shifts.size() - 1; i >=0; --i) { + float curfix = curerr - (cur_best_shift_cost + *newerr); + float maxfix = 2.0f * (1 + i) - COSTS::shift; + if ((curfix > maxfix) || ((cur_best_shift_cost == 0) && (curfix == maxfix))) break; + for (int j = 0; j < shifts[i].size(); ++j) { + const Shift& s = shifts[i][j]; + curfix = curerr - (cur_best_shift_cost + *newerr); + maxfix = 2.0f * (1 + i) - COSTS::shift; // TODO remove? + if ((curfix > maxfix) || ((cur_best_shift_cost == 0) && (curfix == maxfix))) continue; + vector shifted(cur.size()); + PerformShift(cur, s.begin(), s.end(), ralign[s.moveto()], &shifted); + vector try_path; + float try_cost = MinimumEditDistance(shifted, ref_, &try_path); + float gain = (*newerr + cur_best_shift_cost) - (try_cost + COSTS::shift); + if (gain > 0.0f || ((cur_best_shift_cost == 0.0f) && (gain == 0.0f))) { + *newerr = try_cost; + cur_best_shift_cost = COSTS::shift; + new_path->swap(try_path); + new_hyp->swap(shifted); + res = true; + // cerr << "Found better shift " << s.begin() << "..." << s.end() << " moveto " << s.moveto() << endl; + } + } + } + + return res; + } + + static void GetPathStats(const vector& path, int* subs, int* ins, int* dels) { + *subs = *ins = *dels = 0; + for (int i = 0; i < path.size(); ++i) { + switch (path[i]) { + case SUBSTITUTION: + ++(*subs); + case MATCH: + break; + case INSERTION: + ++(*ins); break; + case DELETION: + ++(*dels); break; + } + } + } + + float CalculateAllShifts(const vector& hyp, + int* subs, int* ins, int* dels, int* shifts) const { + BuildWordMatches(hyp, &nmap_); + vector path; + float med_cost = MinimumEditDistance(hyp, ref_, &path); + float edits = 0; + vector cur = hyp; + *shifts = 0; + if (ter_short_circuit_long_sentences < 0 || + ref_.size() < ter_short_circuit_long_sentences) { + while (true) { + vector new_hyp; + vector new_path; + float new_med_cost; + if (!CalculateBestShift(cur, hyp, med_cost, path, &new_hyp, &new_med_cost, &new_path)) + break; + edits += COSTS::shift; + ++(*shifts); + med_cost = new_med_cost; + path.swap(new_path); + cur.swap(new_hyp); + } + } + GetPathStats(path, subs, ins, dels); + return med_cost + edits; + } +}; + +class TERScore : public ScoreBase { + friend class TERScorer; + + public: + static const unsigned kINSERTIONS = 0; + static const unsigned kDELETIONS = 1; + static const unsigned kSUBSTITUTIONS = 2; + static const unsigned kSHIFTS = 3; + static const unsigned kREF_WORDCOUNT = 4; + static const unsigned kDUMMY_LAST_ENTRY = 5; + + TERScore() : stats(0,kDUMMY_LAST_ENTRY) {} + float ComputePartialScore() const { return 0.0;} + float ComputeScore() const { + float edits = static_cast(stats[kINSERTIONS] + stats[kDELETIONS] + stats[kSUBSTITUTIONS] + stats[kSHIFTS]); + return edits / static_cast(stats[kREF_WORDCOUNT]); + } + void ScoreDetails(string* details) const; + void PlusPartialEquals(const Score& rhs, int oracle_e_cover, int oracle_f_cover, int src_len){} + void PlusEquals(const Score& delta, const float scale) { + if (scale==1) + stats += static_cast(delta).stats; + if (scale==-1) + stats -= static_cast(delta).stats; + throw std::runtime_error("TERScore::PlusEquals with scale != +-1"); + } + void PlusEquals(const Score& delta) { + stats += static_cast(delta).stats; + } + + ScoreP GetZero() const { + return ScoreP(new TERScore); + } + ScoreP GetOne() const { + return ScoreP(new TERScore); + } + void Subtract(const Score& rhs, Score* res) const { + static_cast(res)->stats = stats - static_cast(rhs).stats; + } + void Encode(std::string* out) const { + ostringstream os; + os << stats[kINSERTIONS] << ' ' + << stats[kDELETIONS] << ' ' + << stats[kSUBSTITUTIONS] << ' ' + << stats[kSHIFTS] << ' ' + << stats[kREF_WORDCOUNT]; + *out = os.str(); + } + bool IsAdditiveIdentity() const { + for (int i = 0; i < kDUMMY_LAST_ENTRY; ++i) + if (stats[i] != 0) return false; + return true; + } + private: + valarray stats; +}; + +ScoreP TERScorer::ScoreFromString(const std::string& data) { + istringstream is(data); + TERScore* r = new TERScore; + is >> r->stats[TERScore::kINSERTIONS] + >> r->stats[TERScore::kDELETIONS] + >> r->stats[TERScore::kSUBSTITUTIONS] + >> r->stats[TERScore::kSHIFTS] + >> r->stats[TERScore::kREF_WORDCOUNT]; + return ScoreP(r); +} + +void TERScore::ScoreDetails(std::string* details) const { + char buf[200]; + sprintf(buf, "TER = %.2f, %3d|%3d|%3d|%3d (len=%d)", + ComputeScore() * 100.0f, + stats[kINSERTIONS], + stats[kDELETIONS], + stats[kSUBSTITUTIONS], + stats[kSHIFTS], + stats[kREF_WORDCOUNT]); + *details = buf; +} + +TERScorer::~TERScorer() { + for (vector::iterator i = impl_.begin(); i != impl_.end(); ++i) + delete *i; +} + +TERScorer::TERScorer(const vector >& refs) : impl_(refs.size()) { + for (int i = 0; i < refs.size(); ++i) + impl_[i] = new TERScorerImpl(refs[i]); +} + +ScoreP TERScorer::ScoreCCandidate(const vector& hyp) const { + return ScoreP(); +} + +ScoreP TERScorer::ScoreCandidate(const std::vector& hyp) const { + float best_score = numeric_limits::max(); + TERScore* res = new TERScore; + int avg_len = 0; + for (int i = 0; i < impl_.size(); ++i) + avg_len += impl_[i]->GetRefLength(); + avg_len /= impl_.size(); + for (int i = 0; i < impl_.size(); ++i) { + int subs, ins, dels, shifts; + float score = impl_[i]->Calculate(hyp, &subs, &ins, &dels, &shifts); + // cerr << "Component TER cost: " << score << endl; + if (score < best_score) { + res->stats[TERScore::kINSERTIONS] = ins; + res->stats[TERScore::kDELETIONS] = dels; + res->stats[TERScore::kSUBSTITUTIONS] = subs; + res->stats[TERScore::kSHIFTS] = shifts; + if (ter_use_average_ref_len) { + res->stats[TERScore::kREF_WORDCOUNT] = avg_len; + } else { + res->stats[TERScore::kREF_WORDCOUNT] = impl_[i]->GetRefLength(); + } + + best_score = score; + } + } + return ScoreP(res); +} diff --git a/mteval/ter.h b/mteval/ter.h new file mode 100644 index 00000000..43314791 --- /dev/null +++ b/mteval/ter.h @@ -0,0 +1,19 @@ +#ifndef _TER_H_ +#define _TER_H_ + +#include "scorer.h" + +class TERScorerImpl; + +class TERScorer : public SentenceScorer { + public: + TERScorer(const std::vector >& references); + ~TERScorer(); + ScoreP ScoreCandidate(const std::vector& hyp) const; + ScoreP ScoreCCandidate(const std::vector& hyp) const; + static ScoreP ScoreFromString(const std::string& data); + private: + std::vector impl_; +}; + +#endif diff --git a/mteval/test_data/re.txt.0 b/mteval/test_data/re.txt.0 new file mode 100644 index 00000000..86eff087 --- /dev/null +++ b/mteval/test_data/re.txt.0 @@ -0,0 +1,5 @@ +erdogan states turkey to reject any pressures to urge it to recognize cyprus +ankara 12 - 1 ( afp ) - turkish prime minister recep tayyip erdogan announced today , wednesday , that ankara will reject any pressure by the european union to urge it to recognize cyprus . this comes two weeks before the summit of european union state and government heads who will decide whether or nor membership negotiations with ankara should be opened . +erdogan told " ntv " television station that " the european union cannot address us by imposing new conditions on us with regard to cyprus . +we will discuss this dossier in the course of membership negotiations . " +he added " let me be clear , i cannot sidestep turkey , this is something we cannot accept . " diff --git a/mteval/test_data/re.txt.1 b/mteval/test_data/re.txt.1 new file mode 100644 index 00000000..2140f198 --- /dev/null +++ b/mteval/test_data/re.txt.1 @@ -0,0 +1,5 @@ +erdogan confirms turkey will resist any pressure to recognize cyprus +ankara 12 - 1 ( afp ) - the turkish head of government , recep tayyip erdogan , announced today ( wednesday ) that ankara would resist any pressure the european union might exercise in order to force it into recognizing cyprus . this comes two weeks before a summit of european union heads of state and government , who will decide whether or not to open membership negotiations with ankara . +erdogan said to the ntv television channel : " the european union cannot engage with us through imposing new conditions on us with regard to cyprus . +we shall discuss this issue in the course of the membership negotiations . " +he added : " let me be clear - i cannot confine turkey . this is something we do not accept . " diff --git a/mteval/test_data/re.txt.2 b/mteval/test_data/re.txt.2 new file mode 100644 index 00000000..94e46286 --- /dev/null +++ b/mteval/test_data/re.txt.2 @@ -0,0 +1,5 @@ +erdogan confirms that turkey will reject any pressures to encourage it to recognize cyprus +ankara , 12 / 1 ( afp ) - the turkish prime minister recep tayyip erdogan declared today , wednesday , that ankara will reject any pressures that the european union may apply on it to encourage to recognize cyprus . this comes two weeks before a summit of the heads of countries and governments of the european union , who will decide on whether or not to start negotiations on joining with ankara . +erdogan told the ntv television station that " it is not possible for the european union to talk to us by imposing new conditions on us regarding cyprus . +we shall discuss this dossier during the negotiations on joining . " +and he added , " let me be clear . turkey's arm should not be twisted ; this is something we cannot accept . " diff --git a/mteval/test_data/re.txt.3 b/mteval/test_data/re.txt.3 new file mode 100644 index 00000000..f87c3308 --- /dev/null +++ b/mteval/test_data/re.txt.3 @@ -0,0 +1,5 @@ +erdogan stresses that turkey will reject all pressures to force it to recognize cyprus +ankara 12 - 1 ( afp ) - turkish prime minister recep tayyip erdogan announced today , wednesday , that ankara would refuse all pressures applied on it by the european union to force it to recognize cyprus . that came two weeks before the summit of the presidents and prime ministers of the european union , who would decide on whether to open negotiations on joining with ankara or not . +erdogan said to " ntv " tv station that the " european union can not communicate with us by imposing on us new conditions related to cyprus . +we will discuss this file during the negotiations on joining . " +he added , " let me be clear . turkey's arm should not be twisted . this is unacceptable to us . " -- cgit v1.2.3