diff options
author | Kenneth Heafield <github@kheafield.com> | 2012-10-22 12:07:20 +0100 |
---|---|---|
committer | Kenneth Heafield <github@kheafield.com> | 2012-10-22 12:07:20 +0100 |
commit | 5f98fe5c4f2a2090eeb9d30c030305a70a8347d1 (patch) | |
tree | 9b6002f850e6dea1e3400c6b19bb31a9cdf3067f /rst_parser/rst_parse.cc | |
parent | cf9994131993b40be62e90e213b1e11e6b550143 (diff) | |
parent | 21825a09d97c2e0afd20512f306fb25fed55e529 (diff) |
Merge remote branch 'upstream/master'
Conflicts:
Jamroot
bjam
decoder/Jamfile
decoder/cdec.cc
dpmert/Jamfile
jam-files/sanity.jam
klm/lm/Jamfile
klm/util/Jamfile
mira/Jamfile
Diffstat (limited to 'rst_parser/rst_parse.cc')
-rw-r--r-- | rst_parser/rst_parse.cc | 111 |
1 files changed, 0 insertions, 111 deletions
diff --git a/rst_parser/rst_parse.cc b/rst_parser/rst_parse.cc deleted file mode 100644 index 9c42a8f4..00000000 --- a/rst_parser/rst_parse.cc +++ /dev/null @@ -1,111 +0,0 @@ -#include "arc_factored.h" - -#include <vector> -#include <iostream> -#include <boost/program_options.hpp> -#include <boost/program_options/variables_map.hpp> - -#include "timing_stats.h" -#include "arc_ff.h" -#include "dep_training.h" -#include "stringlib.h" -#include "filelib.h" -#include "tdict.h" -#include "weights.h" -#include "rst.h" -#include "global_ff.h" - -using namespace std; -namespace po = boost::program_options; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - string cfg_file; - opts.add_options() - ("input,i",po::value<string>()->default_value("-"), "File containing test data (jsent format)") - ("q_weights,q",po::value<string>(), "Arc-factored weights for proposal distribution (mandatory)") - ("p_weights,p",po::value<string>(), "Weights for target distribution (optional)") - ("samples,n",po::value<unsigned>()->default_value(1000), "Number of samples"); - po::options_description clo("Command line options"); - clo.add_options() - ("config,c", po::value<string>(&cfg_file), "Configuration file") - ("help,?", "Print this help message and exit"); - - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(dconfig_options).add(clo); - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (cfg_file.size() > 0) { - ReadFile rf(cfg_file); - po::store(po::parse_config_file(*rf.stream(), dconfig_options), *conf); - } - if (conf->count("help") || conf->count("q_weights") == 0) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -int main(int argc, char** argv) { - po::variables_map conf; - InitCommandLine(argc, argv, &conf); - vector<weight_t> qweights, pweights; - Weights::InitFromFile(conf["q_weights"].as<string>(), &qweights); - if (conf.count("p_weights")) - Weights::InitFromFile(conf["p_weights"].as<string>(), &pweights); - const bool global = pweights.size() > 0; - ArcFeatureFunctions ffs; - GlobalFeatureFunctions gff; - ReadFile rf(conf["input"].as<string>()); - istream* in = rf.stream(); - TrainingInstance sent; - MT19937 rng; - int samples = conf["samples"].as<unsigned>(); - int totroot = 0, root_right = 0, tot = 0, cor = 0; - while(TrainingInstance::ReadInstance(in, &sent)) { - ffs.PrepareForInput(sent.ts); - if (global) gff.PrepareForInput(sent.ts); - ArcFactoredForest forest(sent.ts.pos.size()); - forest.ExtractFeatures(sent.ts, ffs); - forest.Reweight(qweights); - TreeSampler ts(forest); - double best_score = -numeric_limits<double>::infinity(); - EdgeSubset best_tree; - for (int n = 0; n < samples; ++n) { - EdgeSubset tree; - ts.SampleRandomSpanningTree(&tree, &rng); - SparseVector<double> qfeats, gfeats; - tree.ExtractFeatures(sent.ts, ffs, &qfeats); - double score = 0; - if (global) { - gff.Features(sent.ts, tree, &gfeats); - score = (qfeats + gfeats).dot(pweights); - } else { - score = qfeats.dot(qweights); - } - if (score > best_score) { - best_tree = tree; - best_score = score; - } - } - cerr << "BEST SCORE: " << best_score << endl; - cout << best_tree << endl; - const bool sent_has_ref = sent.tree.h_m_pairs.size() > 0; - if (sent_has_ref) { - map<pair<short,short>, bool> ref; - for (int i = 0; i < sent.tree.h_m_pairs.size(); ++i) - ref[sent.tree.h_m_pairs[i]] = true; - int ref_root = sent.tree.roots.front(); - if (ref_root == best_tree.roots.front()) { ++root_right; } - ++totroot; - for (int i = 0; i < best_tree.h_m_pairs.size(); ++i) { - if (ref[best_tree.h_m_pairs[i]]) { - ++cor; - } - ++tot; - } - } - } - cerr << "F = " << (double(cor + root_right) / (tot + totroot)) << endl; - return 0; -} - |