From 89b662b51373f0f466d62a65d3f0a164d1d31b1c Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Fri, 27 Jan 2012 14:49:08 -0500 Subject: rename vest to dpmert (dynamic programming mert), rename variables and types to correspond to standard geometric concepts --- dpmert/mr_dpmert_reduce.cc | 77 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 dpmert/mr_dpmert_reduce.cc (limited to 'dpmert/mr_dpmert_reduce.cc') diff --git a/dpmert/mr_dpmert_reduce.cc b/dpmert/mr_dpmert_reduce.cc new file mode 100644 index 00000000..dda61f88 --- /dev/null +++ b/dpmert/mr_dpmert_reduce.cc @@ -0,0 +1,77 @@ +#include +#include +#include +#include + +#include +#include + +#include "sparse_vector.h" +#include "error_surface.h" +#include "line_optimizer.h" +#include "b64tools.h" +#include "stringlib.h" + +using namespace std; +namespace po = boost::program_options; + +void InitCommandLine(int argc, char** argv, po::variables_map* conf) { + po::options_description opts("Configuration options"); + opts.add_options() + ("evaluation_metric,m",po::value(), "Evaluation metric (IBM_BLEU, etc.)") + ("help,h", "Help"); + po::options_description dcmdline_options; + dcmdline_options.add(opts); + po::store(parse_command_line(argc, argv, dcmdline_options), *conf); + bool flag = conf->count("evaluation_metric") == 0; + if (flag || conf->count("help")) { + cerr << dcmdline_options << endl; + exit(1); + } +} + +int main(int argc, char** argv) { + po::variables_map conf; + InitCommandLine(argc, argv, &conf); + const string evaluation_metric = conf["evaluation_metric"].as(); + LineOptimizer::ScoreType opt_type = LineOptimizer::MAXIMIZE_SCORE; + if (UppercaseString(evaluation_metric) == "TER") + opt_type = LineOptimizer::MINIMIZE_SCORE; + EvaluationMetric* metric = EvaluationMetric::Instance(evaluation_metric); + + vector esv; + string last_key, line, key, val; + while(getline(cin, line)) { + size_t ks = line.find("\t"); + assert(string::npos != ks); + assert(ks > 2); + key = line.substr(2, ks - 2); + val = line.substr(ks + 1); + if (key != last_key) { + if (!last_key.empty()) { + float score; + double x = LineOptimizer::LineOptimize(metric, esv, opt_type, &score); + cout << last_key << "|" << x << "|" << score << endl; + } + last_key.swap(key); + esv.clear(); + } + if (val.size() % 4 != 0) { + cerr << "B64 encoding error 1! Skipping.\n"; + continue; + } + string encoded(val.size() / 4 * 3, '\0'); + if (!B64::b64decode(reinterpret_cast(&val[0]), val.size(), &encoded[0], encoded.size())) { + cerr << "B64 encoding error 2! Skipping.\n"; + continue; + } + esv.push_back(ErrorSurface()); + esv.back().Deserialize(encoded); + } + if (!esv.empty()) { + float score; + double x = LineOptimizer::LineOptimize(metric, esv, opt_type, &score); + cout << last_key << "|" << x << "|" << score << endl; + } + return 0; +} -- cgit v1.2.3 From 39dc17229f574f53ddbbc6d7515712e39514b059 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Fri, 3 Feb 2012 20:55:10 -0500 Subject: use interface properly --- dpmert/mr_dpmert_reduce.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'dpmert/mr_dpmert_reduce.cc') diff --git a/dpmert/mr_dpmert_reduce.cc b/dpmert/mr_dpmert_reduce.cc index dda61f88..31512a03 100644 --- a/dpmert/mr_dpmert_reduce.cc +++ b/dpmert/mr_dpmert_reduce.cc @@ -34,10 +34,10 @@ int main(int argc, char** argv) { po::variables_map conf; InitCommandLine(argc, argv, &conf); const string evaluation_metric = conf["evaluation_metric"].as(); + EvaluationMetric* metric = EvaluationMetric::Instance(evaluation_metric); LineOptimizer::ScoreType opt_type = LineOptimizer::MAXIMIZE_SCORE; - if (UppercaseString(evaluation_metric) == "TER") + if (metric->IsErrorMetric()) opt_type = LineOptimizer::MINIMIZE_SCORE; - EvaluationMetric* metric = EvaluationMetric::Instance(evaluation_metric); vector esv; string last_key, line, key, val; -- cgit v1.2.3