From e4c5e87db2139aa0f8655b063da7d8b5199cb46d Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 20 Dec 2011 18:34:14 -0500 Subject: migrate fast_score to the new API --- mteval/fast_score.cc | 40 +++++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 17 deletions(-) (limited to 'mteval/fast_score.cc') diff --git a/mteval/fast_score.cc b/mteval/fast_score.cc index 5ee264a6..a271ccc5 100644 --- a/mteval/fast_score.cc +++ b/mteval/fast_score.cc @@ -4,9 +4,11 @@ #include #include +#include "stringlib.h" #include "filelib.h" #include "tdict.h" -#include "scorer.h" +#include "ns.h" +#include "ns_docscorer.h" using namespace std; namespace po = boost::program_options; @@ -14,8 +16,8 @@ namespace po = boost::program_options; void InitCommandLine(int argc, char** argv, po::variables_map* conf) { po::options_description opts("Configuration options"); opts.add_options() - ("reference,r",po::value >(), "[REQD] Reference translation(s) (tokenized text file)") - ("loss_function,l",po::value()->default_value("ibm_bleu"), "Scoring metric (ibm_bleu, nist_bleu, koehn_bleu, ter, combi)") + ("reference,r",po::value >(), "[1 or more required] Reference translation(s) in tokenized text files") + ("evaluation_metric,m",po::value()->default_value("IBM_BLEU"), "Evaluation metric (ibm_bleu, koehn_bleu, nist_bleu, ter, meteor, etc.)") ("in_file,i", po::value()->default_value("-"), "Input file") ("help,h", "Help"); po::options_description dcmdline_options; @@ -35,24 +37,29 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) { int main(int argc, char** argv) { po::variables_map conf; InitCommandLine(argc, argv, &conf); - const string loss_function = conf["loss_function"].as(); - ScoreType type = ScoreTypeFromString(loss_function); - DocScorer ds(type, conf["reference"].as >(), ""); + string loss_function = UppercaseString(conf["evaluation_metric"].as()); + if (loss_function == "COMBI") { + cerr << "WARNING: 'combi' metric is no longer supported, switching to 'COMB:TER=-0.5;IBM_BLEU=0.5'\n"; + loss_function = "COMB:TER=-0.5;IBM_BLEU=0.5"; + } else if (loss_function == "BLEU") { + cerr << "WARNING: 'BLEU' is ambiguous, assuming 'IBM_BLEU'\n"; + loss_function = "IBM_BLEU"; + } + EvaluationMetric* metric = EvaluationMetric::Instance(loss_function); + DocumentScorer ds(metric, conf["reference"].as >()); cerr << "Loaded " << ds.size() << " references for scoring with " << loss_function << endl; ReadFile rf(conf["in_file"].as()); - ScoreP acc; + SufficientStats acc; istream& in = *rf.stream(); int lc = 0; - while(in) { - string line; - getline(in, line); - if (line.empty() && !in) break; + string line; + while(getline(in, line)) { vector sent; TD::ConvertSentence(line, &sent); - ScoreP sentscore = ds[lc]->ScoreCandidate(sent); - if (!acc) { acc = sentscore->GetZero(); } - acc->PlusEquals(*sentscore); + SufficientStats t; + ds[lc]->Evaluate(sent, &t); + acc += t; ++lc; } assert(lc > 0); @@ -63,9 +70,8 @@ int main(int argc, char** argv) { if (lc != ds.size()) cerr << "Fewer sentences in hyp (" << lc << ") than refs (" << ds.size() << "): scoring partial set!\n"; - float score = acc->ComputeScore(); - string details; - acc->ScoreDetails(&details); + float score = metric->ComputeScore(acc); + const string details = metric->DetailedScore(acc); cerr << details << endl; cout << score << endl; return 0; -- cgit v1.2.3