From 481a120564fdb73c8c6833e2102acb533683261c Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Fri, 27 Jan 2012 02:31:00 -0500 Subject: migrate mert to the new scorer interface --- vest/mr_vest_reduce.cc | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) (limited to 'vest/mr_vest_reduce.cc') diff --git a/vest/mr_vest_reduce.cc b/vest/mr_vest_reduce.cc index 3df52020..dda61f88 100644 --- a/vest/mr_vest_reduce.cc +++ b/vest/mr_vest_reduce.cc @@ -10,6 +10,7 @@ #include "error_surface.h" #include "line_optimizer.h" #include "b64tools.h" +#include "stringlib.h" using namespace std; namespace po = boost::program_options; @@ -17,12 +18,12 @@ namespace po = boost::program_options; void InitCommandLine(int argc, char** argv, po::variables_map* conf) { po::options_description opts("Configuration options"); opts.add_options() - ("loss_function,l",po::value(), "Loss function being optimized") + ("evaluation_metric,m",po::value(), "Evaluation metric (IBM_BLEU, etc.)") ("help,h", "Help"); po::options_description dcmdline_options; dcmdline_options.add(opts); po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - bool flag = conf->count("loss_function") == 0; + bool flag = conf->count("evaluation_metric") == 0; if (flag || conf->count("help")) { cerr << dcmdline_options << endl; exit(1); @@ -32,30 +33,27 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) { int main(int argc, char** argv) { po::variables_map conf; InitCommandLine(argc, argv, &conf); - const string loss_function = conf["loss_function"].as(); - ScoreType type = ScoreTypeFromString(loss_function); + const string evaluation_metric = conf["evaluation_metric"].as(); LineOptimizer::ScoreType opt_type = LineOptimizer::MAXIMIZE_SCORE; - if (type == TER || type == AER) { + if (UppercaseString(evaluation_metric) == "TER") opt_type = LineOptimizer::MINIMIZE_SCORE; - } - string last_key; + EvaluationMetric* metric = EvaluationMetric::Instance(evaluation_metric); + vector esv; - while(cin) { - string line; - getline(cin, line); - if (line.empty()) continue; + string last_key, line, key, val; + while(getline(cin, line)) { size_t ks = line.find("\t"); assert(string::npos != ks); assert(ks > 2); - string key = line.substr(2, ks - 2); - string val = line.substr(ks + 1); + key = line.substr(2, ks - 2); + val = line.substr(ks + 1); if (key != last_key) { if (!last_key.empty()) { float score; - double x = LineOptimizer::LineOptimize(esv, opt_type, &score); + double x = LineOptimizer::LineOptimize(metric, esv, opt_type, &score); cout << last_key << "|" << x << "|" << score << endl; } - last_key = key; + last_key.swap(key); esv.clear(); } if (val.size() % 4 != 0) { @@ -68,13 +66,11 @@ int main(int argc, char** argv) { continue; } esv.push_back(ErrorSurface()); - esv.back().Deserialize(type, encoded); + esv.back().Deserialize(encoded); } if (!esv.empty()) { - // cerr << "ESV=" << esv.size() << endl; - // for (int i = 0; i < esv.size(); ++i) { cerr << esv[i].size() << endl; } float score; - double x = LineOptimizer::LineOptimize(esv, opt_type, &score); + double x = LineOptimizer::LineOptimize(metric, esv, opt_type, &score); cout << last_key << "|" << x << "|" << score << endl; } return 0; -- cgit v1.2.3