diff options
author | Michael Denkowski <michael.j.denkowski@gmail.com> | 2012-12-22 16:01:23 -0500 |
---|---|---|
committer | Michael Denkowski <michael.j.denkowski@gmail.com> | 2012-12-22 16:01:23 -0500 |
commit | 597d89c11db53e91bc011eab70fd613bbe6453e8 (patch) | |
tree | 83c87c07d1ff6d3ee4e3b1626f7eddd49c61095b /dpmert/mr_dpmert_generate_mapper_input.cc | |
parent | 65e958ff2678a41c22be7171456a63f002ef370b (diff) | |
parent | 201af2acd394415a05072fbd53d42584875aa4b4 (diff) |
Merge branch 'master' of git://github.com/redpony/cdec
Diffstat (limited to 'dpmert/mr_dpmert_generate_mapper_input.cc')
-rw-r--r-- | dpmert/mr_dpmert_generate_mapper_input.cc | 81 |
1 files changed, 0 insertions, 81 deletions
diff --git a/dpmert/mr_dpmert_generate_mapper_input.cc b/dpmert/mr_dpmert_generate_mapper_input.cc deleted file mode 100644 index 199cd23a..00000000 --- a/dpmert/mr_dpmert_generate_mapper_input.cc +++ /dev/null @@ -1,81 +0,0 @@ -#include <iostream> -#include <vector> - -#include <boost/program_options.hpp> -#include <boost/program_options/variables_map.hpp> - -#include "filelib.h" -#include "weights.h" -#include "line_optimizer.h" - -using namespace std; -namespace po = boost::program_options; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("dev_set_size,s",po::value<unsigned>(),"[REQD] Development set size (# of parallel sentences)") - ("forest_repository,r",po::value<string>(),"[REQD] Path to forest repository") - ("weights,w",po::value<string>(),"[REQD] Current feature weights file") - ("optimize_feature,o",po::value<vector<string> >(), "Feature to optimize (if none specified, all weights listed in the weights file will be optimized)") - ("random_directions,d",po::value<unsigned int>()->default_value(20),"Number of random directions to run the line optimizer in") - ("help,h", "Help"); - po::options_description dcmdline_options; - dcmdline_options.add(opts); - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - bool flag = false; - if (conf->count("dev_set_size") == 0) { - cerr << "Please specify the size of the development set using -d N\n"; - flag = true; - } - if (conf->count("weights") == 0) { - cerr << "Please specify the starting-point weights using -w <weightfile.txt>\n"; - flag = true; - } - if (conf->count("forest_repository") == 0) { - cerr << "Please specify the forest repository location using -r <DIR>\n"; - flag = true; - } - if (flag || conf->count("help")) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -int main(int argc, char** argv) { - RandomNumberGenerator<boost::mt19937> rng; - po::variables_map conf; - InitCommandLine(argc, argv, &conf); - vector<string> features; - SparseVector<weight_t> origin; - vector<weight_t> w; - Weights::InitFromFile(conf["weights"].as<string>(), &w, &features); - Weights::InitSparseVector(w, &origin); - const string forest_repository = conf["forest_repository"].as<string>(); - if (!DirectoryExists(forest_repository)) { - cerr << "Forest repository directory " << forest_repository << " not found!\n"; - return 1; - } - if (conf.count("optimize_feature") > 0) - features=conf["optimize_feature"].as<vector<string> >(); - vector<SparseVector<weight_t> > directions; - vector<int> fids(features.size()); - for (unsigned i = 0; i < features.size(); ++i) - fids[i] = FD::Convert(features[i]); - LineOptimizer::CreateOptimizationDirections( - fids, - conf["random_directions"].as<unsigned int>(), - &rng, - &directions); - unsigned dev_set_size = conf["dev_set_size"].as<unsigned>(); - for (unsigned i = 0; i < dev_set_size; ++i) { - for (unsigned j = 0; j < directions.size(); ++j) { - cout << forest_repository << '/' << i << ".json.gz " << i << ' '; - print(cout, origin, "=", ";"); - cout << ' '; - print(cout, directions[j], "=", ";"); - cout << endl; - } - } - return 0; -} |