From 1b8181bf0d6e9137e6b9ccdbe414aec37377a1a9 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sun, 18 Nov 2012 13:35:42 -0500 Subject: major restructure of the training code --- dpmert/mr_dpmert_generate_mapper_input.cc | 81 ------------------------------- 1 file changed, 81 deletions(-) delete mode 100644 dpmert/mr_dpmert_generate_mapper_input.cc (limited to 'dpmert/mr_dpmert_generate_mapper_input.cc') diff --git a/dpmert/mr_dpmert_generate_mapper_input.cc b/dpmert/mr_dpmert_generate_mapper_input.cc deleted file mode 100644 index 199cd23a..00000000 --- a/dpmert/mr_dpmert_generate_mapper_input.cc +++ /dev/null @@ -1,81 +0,0 @@ -#include -#include - -#include -#include - -#include "filelib.h" -#include "weights.h" -#include "line_optimizer.h" - -using namespace std; -namespace po = boost::program_options; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("dev_set_size,s",po::value(),"[REQD] Development set size (# of parallel sentences)") - ("forest_repository,r",po::value(),"[REQD] Path to forest repository") - ("weights,w",po::value(),"[REQD] Current feature weights file") - ("optimize_feature,o",po::value >(), "Feature to optimize (if none specified, all weights listed in the weights file will be optimized)") - ("random_directions,d",po::value()->default_value(20),"Number of random directions to run the line optimizer in") - ("help,h", "Help"); - po::options_description dcmdline_options; - dcmdline_options.add(opts); - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - bool flag = false; - if (conf->count("dev_set_size") == 0) { - cerr << "Please specify the size of the development set using -d N\n"; - flag = true; - } - if (conf->count("weights") == 0) { - cerr << "Please specify the starting-point weights using -w \n"; - flag = true; - } - if (conf->count("forest_repository") == 0) { - cerr << "Please specify the forest repository location using -r \n"; - flag = true; - } - if (flag || conf->count("help")) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -int main(int argc, char** argv) { - RandomNumberGenerator rng; - po::variables_map conf; - InitCommandLine(argc, argv, &conf); - vector features; - SparseVector origin; - vector w; - Weights::InitFromFile(conf["weights"].as(), &w, &features); - Weights::InitSparseVector(w, &origin); - const string forest_repository = conf["forest_repository"].as(); - if (!DirectoryExists(forest_repository)) { - cerr << "Forest repository directory " << forest_repository << " not found!\n"; - return 1; - } - if (conf.count("optimize_feature") > 0) - features=conf["optimize_feature"].as >(); - vector > directions; - vector fids(features.size()); - for (unsigned i = 0; i < features.size(); ++i) - fids[i] = FD::Convert(features[i]); - LineOptimizer::CreateOptimizationDirections( - fids, - conf["random_directions"].as(), - &rng, - &directions); - unsigned dev_set_size = conf["dev_set_size"].as(); - for (unsigned i = 0; i < dev_set_size; ++i) { - for (unsigned j = 0; j < directions.size(); ++j) { - cout << forest_repository << '/' << i << ".json.gz " << i << ' '; - print(cout, origin, "=", ";"); - cout << ' '; - print(cout, directions[j], "=", ";"); - cout << endl; - } - } - return 0; -} -- cgit v1.2.3