From e4c5e87db2139aa0f8655b063da7d8b5199cb46d Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 20 Dec 2011 18:34:14 -0500 Subject: migrate fast_score to the new API --- mteval/Makefile.am | 2 +- mteval/fast_score.cc | 40 +++++++++++++++++++++++----------------- mteval/ns.cc | 5 +++-- mteval/ns_ter.cc | 12 ++++++++++++ mteval/ns_ter.h | 1 + pro-train/dist-pro.pl | 2 +- vest/dist-vest.pl | 2 +- 7 files changed, 42 insertions(+), 22 deletions(-) diff --git a/mteval/Makefile.am b/mteval/Makefile.am index 6679d949..e7126675 100644 --- a/mteval/Makefile.am +++ b/mteval/Makefile.am @@ -10,7 +10,7 @@ endif noinst_LIBRARIES = libmteval.a -libmteval_a_SOURCES = ter.cc comb_scorer.cc aer_scorer.cc scorer.cc external_scorer.cc ns.cc ns_ter.cc ns_ext.cc ns_comb.cc +libmteval_a_SOURCES = ter.cc comb_scorer.cc aer_scorer.cc scorer.cc external_scorer.cc ns.cc ns_ter.cc ns_ext.cc ns_comb.cc ns_docscorer.cc fast_score_SOURCES = fast_score.cc fast_score_LDADD = libmteval.a $(top_srcdir)/utils/libutils.a -lz diff --git a/mteval/fast_score.cc b/mteval/fast_score.cc index 5ee264a6..a271ccc5 100644 --- a/mteval/fast_score.cc +++ b/mteval/fast_score.cc @@ -4,9 +4,11 @@ #include #include +#include "stringlib.h" #include "filelib.h" #include "tdict.h" -#include "scorer.h" +#include "ns.h" +#include "ns_docscorer.h" using namespace std; namespace po = boost::program_options; @@ -14,8 +16,8 @@ namespace po = boost::program_options; void InitCommandLine(int argc, char** argv, po::variables_map* conf) { po::options_description opts("Configuration options"); opts.add_options() - ("reference,r",po::value >(), "[REQD] Reference translation(s) (tokenized text file)") - ("loss_function,l",po::value()->default_value("ibm_bleu"), "Scoring metric (ibm_bleu, nist_bleu, koehn_bleu, ter, combi)") + ("reference,r",po::value >(), "[1 or more required] Reference translation(s) in tokenized text files") + ("evaluation_metric,m",po::value()->default_value("IBM_BLEU"), "Evaluation metric (ibm_bleu, koehn_bleu, nist_bleu, ter, meteor, etc.)") ("in_file,i", po::value()->default_value("-"), "Input file") ("help,h", "Help"); po::options_description dcmdline_options; @@ -35,24 +37,29 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) { int main(int argc, char** argv) { po::variables_map conf; InitCommandLine(argc, argv, &conf); - const string loss_function = conf["loss_function"].as(); - ScoreType type = ScoreTypeFromString(loss_function); - DocScorer ds(type, conf["reference"].as >(), ""); + string loss_function = UppercaseString(conf["evaluation_metric"].as()); + if (loss_function == "COMBI") { + cerr << "WARNING: 'combi' metric is no longer supported, switching to 'COMB:TER=-0.5;IBM_BLEU=0.5'\n"; + loss_function = "COMB:TER=-0.5;IBM_BLEU=0.5"; + } else if (loss_function == "BLEU") { + cerr << "WARNING: 'BLEU' is ambiguous, assuming 'IBM_BLEU'\n"; + loss_function = "IBM_BLEU"; + } + EvaluationMetric* metric = EvaluationMetric::Instance(loss_function); + DocumentScorer ds(metric, conf["reference"].as >()); cerr << "Loaded " << ds.size() << " references for scoring with " << loss_function << endl; ReadFile rf(conf["in_file"].as()); - ScoreP acc; + SufficientStats acc; istream& in = *rf.stream(); int lc = 0; - while(in) { - string line; - getline(in, line); - if (line.empty() && !in) break; + string line; + while(getline(in, line)) { vector sent; TD::ConvertSentence(line, &sent); - ScoreP sentscore = ds[lc]->ScoreCandidate(sent); - if (!acc) { acc = sentscore->GetZero(); } - acc->PlusEquals(*sentscore); + SufficientStats t; + ds[lc]->Evaluate(sent, &t); + acc += t; ++lc; } assert(lc > 0); @@ -63,9 +70,8 @@ int main(int argc, char** argv) { if (lc != ds.size()) cerr << "Fewer sentences in hyp (" << lc << ") than refs (" << ds.size() << "): scoring partial set!\n"; - float score = acc->ComputeScore(); - string details; - acc->ScoreDetails(&details); + float score = metric->ComputeScore(acc); + const string details = metric->DetailedScore(acc); cerr << details << endl; cout << score << endl; return 0; diff --git a/mteval/ns.cc b/mteval/ns.cc index 6139757d..1018319d 100644 --- a/mteval/ns.cc +++ b/mteval/ns.cc @@ -173,7 +173,7 @@ struct BleuSegmentEvaluator : public SegmentEvaluator { template struct BleuMetric : public EvaluationMetric { - BleuMetric() : EvaluationMetric("IBM_BLEU") {} + BleuMetric() : EvaluationMetric(BrevityType == IBM ? "IBM_BLEU" : (BrevityType == Koehn ? "KOEHN_BLEU" : "NIST_BLEU")) {} unsigned SufficientStatisticsVectorSize() const { return N*2 + 2; } shared_ptr CreateSegmentEvaluator(const vector >& refs) const { return shared_ptr(new BleuSegmentEvaluator(refs, this)); @@ -208,7 +208,8 @@ struct BleuMetric : public EvaluationMetric { vector precs(N); float bp; float bleu = ComputeBreakdown(stats, &bp, &precs); - sprintf(buf, "BLEU = %.2f, %.1f|%.1f|%.1f|%.1f (brev=%.3f)", + sprintf(buf, "%s = %.2f, %.1f|%.1f|%.1f|%.1f (brev=%.3f)", + MetricId().c_str(), bleu*100.0, precs[0]*100.0, precs[1]*100.0, diff --git a/mteval/ns_ter.cc b/mteval/ns_ter.cc index 8c969e58..f75acf1d 100644 --- a/mteval/ns_ter.cc +++ b/mteval/ns_ter.cc @@ -473,3 +473,15 @@ float TERMetric::ComputeScore(const SufficientStats& stats) const { return edits / static_cast(stats[kREF_WORDCOUNT]); } +string TERMetric::DetailedScore(const SufficientStats& stats) const { + char buf[200]; + sprintf(buf, "TER = %.2f, %3.f|%3.f|%3.f|%3.f (len=%3.f)", + ComputeScore(stats) * 100.0f, + stats[kINSERTIONS], + stats[kDELETIONS], + stats[kSUBSTITUTIONS], + stats[kSHIFTS], + stats[kREF_WORDCOUNT]); + return buf; +} + diff --git a/mteval/ns_ter.h b/mteval/ns_ter.h index 6c020cfa..3190fc1b 100644 --- a/mteval/ns_ter.h +++ b/mteval/ns_ter.h @@ -10,6 +10,7 @@ class TERMetric : public EvaluationMetric { public: virtual unsigned SufficientStatisticsVectorSize() const; + virtual std::string DetailedScore(const SufficientStats& stats) const; virtual void ComputeSufficientStatistics(const std::vector& hyp, const std::vector >& refs, SufficientStats* out) const; diff --git a/pro-train/dist-pro.pl b/pro-train/dist-pro.pl index 5db053de..ba9cdc06 100755 --- a/pro-train/dist-pro.pl +++ b/pro-train/dist-pro.pl @@ -288,7 +288,7 @@ while (1){ $retries++; } die "Dev set contains $devSize sentences, but we don't have topbest and hypergraphs for all these! Decoder failure? Check $decoderLog\n" if ($devSize != $num_hgs || $devSize != $num_topbest); - my $dec_score = check_output("cat $runFile | $SCORER $refs_comma_sep -l $metric"); + my $dec_score = check_output("cat $runFile | $SCORER $refs_comma_sep -m $metric"); chomp $dec_score; print STDERR "DECODER SCORE: $dec_score\n"; diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl index 11e791c1..c382a972 100755 --- a/vest/dist-vest.pl +++ b/vest/dist-vest.pl @@ -308,7 +308,7 @@ while (1){ $retries++; } die "Dev set contains $devSize sentences, but we don't have topbest and hypergraphs for all these! Decoder failure? Check $decoderLog\n" if ($devSize != $num_hgs || $devSize != $num_topbest); - my $dec_score = check_output("cat $runFile | $SCORER $refs_comma_sep -l $metric"); + my $dec_score = check_output("cat $runFile | $SCORER $refs_comma_sep -m $metric"); chomp $dec_score; print STDERR "DECODER SCORE: $dec_score\n"; -- cgit v1.2.3