diff options
| author | Chris Dyer <cdyer@cab.ark.cs.cmu.edu> | 2012-10-02 00:19:43 -0400 | 
|---|---|---|
| committer | Chris Dyer <cdyer@cab.ark.cs.cmu.edu> | 2012-10-02 00:19:43 -0400 | 
| commit | e26434979adc33bd949566ba7bf02dff64e80a3e (patch) | |
| tree | d1c72495e3af6301bd28e7e66c42de0c7a944d1f /rst_parser/rst_parse.cc | |
| parent | 0870d4a1f5e14cc7daf553b180d599f09f6614a2 (diff) | |
cdec cleanup, remove bayesian stuff, parsing stuff
Diffstat (limited to 'rst_parser/rst_parse.cc')
| -rw-r--r-- | rst_parser/rst_parse.cc | 111 | 
1 files changed, 0 insertions, 111 deletions
| diff --git a/rst_parser/rst_parse.cc b/rst_parser/rst_parse.cc deleted file mode 100644 index 9c42a8f4..00000000 --- a/rst_parser/rst_parse.cc +++ /dev/null @@ -1,111 +0,0 @@ -#include "arc_factored.h" - -#include <vector> -#include <iostream> -#include <boost/program_options.hpp> -#include <boost/program_options/variables_map.hpp> - -#include "timing_stats.h" -#include "arc_ff.h" -#include "dep_training.h" -#include "stringlib.h" -#include "filelib.h" -#include "tdict.h" -#include "weights.h" -#include "rst.h" -#include "global_ff.h" - -using namespace std; -namespace po = boost::program_options; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { -  po::options_description opts("Configuration options"); -  string cfg_file; -  opts.add_options() -        ("input,i",po::value<string>()->default_value("-"), "File containing test data (jsent format)") -        ("q_weights,q",po::value<string>(), "Arc-factored weights for proposal distribution (mandatory)") -        ("p_weights,p",po::value<string>(), "Weights for target distribution (optional)") -        ("samples,n",po::value<unsigned>()->default_value(1000), "Number of samples"); -  po::options_description clo("Command line options"); -  clo.add_options() -        ("config,c", po::value<string>(&cfg_file), "Configuration file") -        ("help,?", "Print this help message and exit"); - -  po::options_description dconfig_options, dcmdline_options; -  dconfig_options.add(opts); -  dcmdline_options.add(dconfig_options).add(clo); -  po::store(parse_command_line(argc, argv, dcmdline_options), *conf); -  if (cfg_file.size() > 0) { -    ReadFile rf(cfg_file); -    po::store(po::parse_config_file(*rf.stream(), dconfig_options), *conf); -  } -  if (conf->count("help") || conf->count("q_weights") == 0) { -    cerr << dcmdline_options << endl; -    exit(1); -  } -} - -int main(int argc, char** argv) { -  po::variables_map conf; -  InitCommandLine(argc, argv, &conf); -  vector<weight_t> qweights, pweights; -  Weights::InitFromFile(conf["q_weights"].as<string>(), &qweights); -  if (conf.count("p_weights")) -    Weights::InitFromFile(conf["p_weights"].as<string>(), &pweights); -  const bool global = pweights.size() > 0; -  ArcFeatureFunctions ffs; -  GlobalFeatureFunctions gff; -  ReadFile rf(conf["input"].as<string>()); -  istream* in = rf.stream(); -  TrainingInstance sent; -  MT19937 rng; -  int samples = conf["samples"].as<unsigned>(); -  int totroot = 0, root_right = 0, tot = 0, cor = 0; -  while(TrainingInstance::ReadInstance(in, &sent)) { -    ffs.PrepareForInput(sent.ts); -    if (global) gff.PrepareForInput(sent.ts); -    ArcFactoredForest forest(sent.ts.pos.size()); -    forest.ExtractFeatures(sent.ts, ffs); -    forest.Reweight(qweights); -    TreeSampler ts(forest); -    double best_score = -numeric_limits<double>::infinity(); -    EdgeSubset best_tree; -    for (int n = 0; n < samples; ++n) { -      EdgeSubset tree; -      ts.SampleRandomSpanningTree(&tree, &rng); -      SparseVector<double> qfeats, gfeats; -      tree.ExtractFeatures(sent.ts, ffs, &qfeats); -      double score = 0; -      if (global) { -        gff.Features(sent.ts, tree, &gfeats); -        score = (qfeats + gfeats).dot(pweights); -      } else { -        score = qfeats.dot(qweights); -      } -      if (score > best_score) { -        best_tree = tree; -        best_score = score; -      } -    } -    cerr << "BEST SCORE: " << best_score << endl; -    cout << best_tree << endl; -    const bool sent_has_ref = sent.tree.h_m_pairs.size() > 0; -    if (sent_has_ref) { -      map<pair<short,short>, bool> ref; -      for (int i = 0; i < sent.tree.h_m_pairs.size(); ++i) -        ref[sent.tree.h_m_pairs[i]] = true; -      int ref_root = sent.tree.roots.front(); -      if (ref_root == best_tree.roots.front()) { ++root_right; } -      ++totroot; -      for (int i = 0; i < best_tree.h_m_pairs.size(); ++i) { -        if (ref[best_tree.h_m_pairs[i]]) { -          ++cor; -        } -        ++tot; -      } -    } -  } -  cerr << "F = " << (double(cor + root_right) / (tot + totroot)) << endl; -  return 0; -} - | 
