diff options
Diffstat (limited to 'vest/mr_vest_reduce.cc')
-rw-r--r-- | vest/mr_vest_reduce.cc | 80 |
1 files changed, 80 insertions, 0 deletions
diff --git a/vest/mr_vest_reduce.cc b/vest/mr_vest_reduce.cc new file mode 100644 index 00000000..c1347065 --- /dev/null +++ b/vest/mr_vest_reduce.cc @@ -0,0 +1,80 @@ +#include <sstream> +#include <iostream> +#include <fstream> +#include <vector> + +#include <boost/program_options.hpp> +#include <boost/program_options/variables_map.hpp> + +#include "sparse_vector.h" +#include "error_surface.h" +#include "line_optimizer.h" +#include "hg_io.h" + +using namespace std; +namespace po = boost::program_options; + +void InitCommandLine(int argc, char** argv, po::variables_map* conf) { + po::options_description opts("Configuration options"); + opts.add_options() + ("loss_function,l",po::value<string>()->default_value("ibm_bleu"), "Loss function being optimized") + ("help,h", "Help"); + po::options_description dcmdline_options; + dcmdline_options.add(opts); + po::store(parse_command_line(argc, argv, dcmdline_options), *conf); + bool flag = false; + if (flag || conf->count("help")) { + cerr << dcmdline_options << endl; + exit(1); + } +} + +int main(int argc, char** argv) { + po::variables_map conf; + InitCommandLine(argc, argv, &conf); + const string loss_function = conf["loss_function"].as<string>(); + ScoreType type = ScoreTypeFromString(loss_function); + LineOptimizer::ScoreType opt_type = LineOptimizer::MAXIMIZE_SCORE; + if (type == TER) + opt_type = LineOptimizer::MINIMIZE_SCORE; + string last_key; + vector<ErrorSurface> esv; + while(cin) { + string line; + getline(cin, line); + if (line.empty()) continue; + size_t ks = line.find("\t"); + assert(string::npos != ks); + assert(ks > 2); + string key = line.substr(2, ks - 2); + string val = line.substr(ks + 1); + if (key != last_key) { + if (!last_key.empty()) { + float score; + double x = LineOptimizer::LineOptimize(esv, opt_type, &score); + cout << last_key << "|" << x << "|" << score << endl; + } + last_key = key; + esv.clear(); + } + if (val.size() % 4 != 0) { + cerr << "B64 encoding error 1! Skipping.\n"; + continue; + } + string encoded(val.size() / 4 * 3, '\0'); + if (!B64::b64decode(reinterpret_cast<const unsigned char*>(&val[0]), val.size(), &encoded[0], encoded.size())) { + cerr << "B64 encoding error 2! Skipping.\n"; + continue; + } + esv.push_back(ErrorSurface()); + esv.back().Deserialize(type, encoded); + } + if (!esv.empty()) { + cerr << "ESV=" << esv.size() << endl; + for (int i = 0; i < esv.size(); ++i) { cerr << esv[i].size() << endl; } + float score; + double x = LineOptimizer::LineOptimize(esv, opt_type, &score); + cout << last_key << "|" << x << "|" << score << endl; + } + return 0; +} |