diff options
| author | Chris Dyer <cdyer@cs.cmu.edu> | 2012-01-27 13:19:27 -0500 | 
|---|---|---|
| committer | Chris Dyer <cdyer@cs.cmu.edu> | 2012-01-27 13:19:27 -0500 | 
| commit | 203c3c3357b9ed8cfe44932c2bf5ea19eba6238c (patch) | |
| tree | c446f8e8afbe194ef656b33cfc643f83633cf18c /vest | |
| parent | 481a120564fdb73c8c6833e2102acb533683261c (diff) | |
migration to new metric api for vest, clean up of unsupported/not functional code
Diffstat (limited to 'vest')
| -rwxr-xr-x | vest/dist-vest.pl | 22 | ||||
| -rw-r--r-- | vest/mbr_kbest.cc | 138 | ||||
| -rw-r--r-- | vest/mr_vest_generate_mapper_input.cc | 356 | ||||
| -rw-r--r-- | vest/mr_vest_map.cc | 16 | 
4 files changed, 67 insertions, 465 deletions
| diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl index 8cde748b..1ec8c6b1 100755 --- a/vest/dist-vest.pl +++ b/vest/dist-vest.pl @@ -65,8 +65,6 @@ my $oraclen=0;  my $oracleb=20;  my $bleu_weight=1;  my $use_make = 1;  # use make to parallelize line search -my $dirargs=''; -my $density_prune;  my $useqsub;  my $pass_suffix = '';  my $cpbin=1; @@ -75,7 +73,6 @@ Getopt::Long::Configure("no_auto_abbrev");  if (GetOptions(  	"decoder=s" => \$decoderOpt,  	"jobs=i" => \$jobs, -	"density-prune=f" => \$density_prune,  	"dont-clean" => \$disable_clean,  	"pass-suffix=s" => \$pass_suffix,  	"dry-run" => \$dryrun, @@ -87,15 +84,7 @@ if (GetOptions(  	"normalize=s" => \$normalize,  	"pmem=s" => \$pmem,          "cpbin!" => \$cpbin, -	"rand-directions=i" => \$rand_directions, -	"random_directions=i" => \$rand_directions, -        "bleu_weight=s" => \$bleu_weight, -        "no-primary!" => \$noprimary, -        "max-similarity=s" => \$maxsim, -        "oracle-directions=i" => \$oraclen, -        "n-oracle=i" => \$oraclen, -        "oracle-batch=i" => \$oracleb, -        "directions-args=s" => \$dirargs, +	"random-directions=i" => \$rand_directions,  	"ref-files=s" => \$refFiles,  	"metric=s" => \$metric,  	"source-file=s" => \$srcFile, @@ -107,10 +96,6 @@ if (GetOptions(  	exit;  } -if (defined $density_prune) { -  die "--density_prune n: n must be greater than 1.0\n" unless $density_prune > 1.0; -} -  if ($useqsub) {    $use_make = 0;    die "LocalEnvironment.pm does not have qsub configuration for this host. Cannot run with --qsub!\n" unless has_qsub(); @@ -328,10 +313,7 @@ while (1){  		print STDERR "\nGENERATE OPTIMIZATION STRATEGY (OPT-ITERATION $opt_iter/$optimization_iters)\n";  		print STDERR unchecked_output("date");  		$icc++; -		my $nop=$noprimary?"--no_primary":""; -		my $targs=$oraclen ? "--decoder_translations='$runFile.gz' ".get_comma_sep_refs('-references',$refFiles):""; -		my $bwargs=$bleu_weight!=1 ? "--bleu_weight=$bleu_weight":""; -		$cmd="$MAPINPUT -w $inweights -r $dir/hgs $bwargs -s $devSize -d $rand_directions --max_similarity=$maxsim --oracle_directions=$oraclen --oracle_batch=$oracleb $targs $dirargs > $dir/agenda.$im1-$opt_iter"; +		$cmd="$MAPINPUT -w $inweights -r $dir/hgs -s $devSize -d $rand_directions > $dir/agenda.$im1-$opt_iter";  		print STDERR "COMMAND:\n$cmd\n";  		check_call($cmd);  		check_call("mkdir -p $dir/splag.$im1"); diff --git a/vest/mbr_kbest.cc b/vest/mbr_kbest.cc deleted file mode 100644 index 2867b36b..00000000 --- a/vest/mbr_kbest.cc +++ /dev/null @@ -1,138 +0,0 @@ -#include <iostream> -#include <vector> - -#include <boost/program_options.hpp> - -#include "prob.h" -#include "tdict.h" -#include "scorer.h" -#include "filelib.h" -#include "stringlib.h" - -using namespace std; - -namespace po = boost::program_options; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { -  po::options_description opts("Configuration options"); -  opts.add_options() -        ("scale,a",po::value<double>()->default_value(1.0), "Posterior scaling factor (alpha)") -        ("loss_function,l",po::value<string>()->default_value("bleu"), "Loss function") -        ("input,i",po::value<string>()->default_value("-"), "File to read k-best lists from") -        ("output_list,L", "Show reranked list as output") -        ("help,h", "Help"); -  po::options_description dcmdline_options; -  dcmdline_options.add(opts); -  po::store(parse_command_line(argc, argv, dcmdline_options), *conf); -  bool flag = false; -  if (flag || conf->count("help")) { -    cerr << dcmdline_options << endl; -    exit(1); -  } -} - -struct LossComparer { -  bool operator()(const pair<vector<WordID>, double>& a, const pair<vector<WordID>, double>& b) const { -    return a.second < b.second; -  } -}; - -bool ReadKBestList(istream* in, string* sent_id, vector<pair<vector<WordID>, prob_t> >* list) { -  static string cache_id; -  static pair<vector<WordID>, prob_t> cache_pair; -  list->clear(); -  string cur_id; -  if (cache_pair.first.size() > 0) { -    list->push_back(cache_pair); -    cur_id = cache_id; -    cache_pair.first.clear(); -  } -  string line; -  string tstr; -  while(*in) { -    getline(*in, line); -    if (line.empty()) continue; -    size_t p1 = line.find(" ||| "); -    if (p1 == string::npos) { cerr << "Bad format: " << line << endl; abort(); } -    size_t p2 = line.find(" ||| ", p1 + 4); -    if (p2 == string::npos) { cerr << "Bad format: " << line << endl; abort(); } -    size_t p3 = line.rfind(" ||| "); -    cache_id = line.substr(0, p1); -    tstr = line.substr(p1 + 5, p2 - p1 - 5); -    double val = strtod(line.substr(p3 + 5).c_str(), NULL); -    TD::ConvertSentence(tstr, &cache_pair.first); -    cache_pair.second.logeq(val); -    if (cur_id.empty()) cur_id = cache_id; -    if (cur_id == cache_id) { -      list->push_back(cache_pair); -      *sent_id = cur_id; -      cache_pair.first.clear(); -    } else { break; } -  } -  return !list->empty(); -} - -int main(int argc, char** argv) { -  po::variables_map conf; -  InitCommandLine(argc, argv, &conf); -  const string metric = conf["loss_function"].as<string>(); -  const bool output_list = conf.count("output_list") > 0; -  const string file = conf["input"].as<string>(); -  const double mbr_scale = conf["scale"].as<double>(); -  cerr << "Posterior scaling factor (alpha) = " << mbr_scale << endl; - -  ScoreType type = ScoreTypeFromString(metric); -  vector<pair<vector<WordID>, prob_t> > list; -  ReadFile rf(file); -  string sent_id; -  while(ReadKBestList(rf.stream(), &sent_id, &list)) { -    vector<prob_t> joints(list.size()); -    const prob_t max_score = pow(list.front().second, mbr_scale); -    prob_t marginal = prob_t::Zero(); -    for (int i = 0 ; i < list.size(); ++i) { -      const prob_t joint = pow(list[i].second, mbr_scale) / max_score; -      joints[i] = joint; -      // cerr << "list[" << i << "] joint=" << log(joint) << endl; -      marginal += joint; -    } -    int mbr_idx = -1; -    vector<double> mbr_scores(output_list ? list.size() : 0); -    double mbr_loss = numeric_limits<double>::max(); -    for (int i = 0 ; i < list.size(); ++i) { -      vector<vector<WordID> > refs(1, list[i].first); -      //cerr << i << ": " << list[i].second <<"\t" << TD::GetString(list[i].first) << endl; -      ScorerP scorer = SentenceScorer::CreateSentenceScorer(type, refs); -      double wl_acc = 0; -      for (int j = 0; j < list.size(); ++j) { -        if (i != j) { -          ScoreP s = scorer->ScoreCandidate(list[j].first); -          double loss = 1.0 - s->ComputeScore(); -          if (type == TER || type == AER) loss = 1.0 - loss; -          double weighted_loss = loss * (joints[j] / marginal); -          wl_acc += weighted_loss; -          if ((!output_list) && wl_acc > mbr_loss) break; -        } -      } -      if (output_list) mbr_scores[i] = wl_acc; -      if (wl_acc < mbr_loss) { -        mbr_loss = wl_acc; -        mbr_idx = i; -      } -    } -    // cerr << "ML translation: " << TD::GetString(list[0].first) << endl; -    cerr << "MBR Best idx: " << mbr_idx << endl; -    if (output_list) { -      for (int i = 0; i < list.size(); ++i) -        list[i].second.logeq(mbr_scores[i]); -      sort(list.begin(), list.end(), LossComparer()); -      for (int i = 0; i < list.size(); ++i) -        cout << sent_id << " ||| " -             << TD::GetString(list[i].first) << " ||| " -             << log(list[i].second) << endl; -    } else { -      cout << TD::GetString(list[mbr_idx].first) << endl; -    } -  } -  return 0; -} - diff --git a/vest/mr_vest_generate_mapper_input.cc b/vest/mr_vest_generate_mapper_input.cc index 0c094fd5..59d4f24f 100644 --- a/vest/mr_vest_generate_mapper_input.cc +++ b/vest/mr_vest_generate_mapper_input.cc @@ -1,320 +1,78 @@ -//TODO: debug segfault when references supplied, null shared_ptr when oracle  #include <iostream>  #include <vector> -#include <sstream>  #include <boost/program_options.hpp>  #include <boost/program_options/variables_map.hpp> -#include "sampler.h"  #include "filelib.h"  #include "weights.h"  #include "line_optimizer.h" -#include "hg.h" -#include "hg_io.h" -#include "scorer.h" -#include "oracle_bleu.h" -#include "ff_bleu.h" - -const bool DEBUG_ORACLE=true; - -//TODO: decide on cdec_ff ffs, or just bleumodel - if just bleumodel, then do existing features on serialized hypergraphs remain?  weights (origin) is passed to oracle_bleu.h:ComputeOracle -//void register_feature_functions(); -//FFRegistry ff_registry; -namespace { -void init_bleumodel() { -  ff_registry.clear(); -  ff_registry.Register(new FFFactory<BLEUModel>); -} - -struct init_ff { -  init_ff() { -    init_bleumodel(); -  } -}; -//init_ff reg; // order of initialization?  ff_registry may not be init yet.  call in Run() instead. -}  using namespace std;  namespace po = boost::program_options; -typedef SparseVector<double> Dir; -typedef Dir Point; - -void compress_similar(vector<Dir> &dirs,double min_dist,ostream *log=&cerr,bool avg=true,bool verbose=true) { -  //  return; //TODO: debug -  if (min_dist<=0) return; -  double max_s=1.-min_dist; -  if (log&&verbose) *log<<"max allowed S="<<max_s<<endl; -  unsigned N=dirs.size(); -  for (int i=0;i<N;++i) { -    for (int j=i+1;j<N;++j) { -      double s=dirs[i].tanimoto_coef(dirs[j]); -      if (log&&verbose) *log<<"S["<<i<<","<<j<<"]="<<s<<' '; -      if (s>max_s) { -        if (log) *log << "Collapsing similar directions (T="<<s<<" > "<<max_s<<").  dirs["<<i<<"]="<<dirs[i]<<" dirs["<<j<<"]"<<endl; -        if (avg) { -          dirs[i]+=dirs[j]; -          dirs[i]/=2.; -          if (log) *log<<" averaged="<<dirs[i]; -        } -        if (log) *log<<endl; -        swap(dirs[j],dirs[--N]); -      } -    } -    if (log&&verbose) *log<<endl; - -  } -  dirs.resize(N); -} - -struct oracle_directions { -  MT19937 rng; -  OracleBleu oracle; -  vector<Dir> directions; - -  bool start_random; -  bool include_primary; -  bool old_to_hope; -  bool fear_to_hope; -  unsigned n_random; -  void AddPrimaryAndRandomDirections() { -    LineOptimizer::CreateOptimizationDirections( -      fids,n_random,&rng,&directions,include_primary); -  } - -  void Print() { -    for (int i = 0; i < dev_set_size; ++i) -      for (int j = 0; j < directions.size(); ++j) { -        cout << forest_file(i) <<" " << i<<" "; -        print(cout,origin,"=",";"); -        cout<<" "; -        print(cout,directions[j],"=",";"); -        cout<<"\n"; -      } -  } - -  void AddOptions(po::options_description *opts) { -    oracle.AddOptions(opts); -    opts->add_options() -      ("dev_set_size,s",po::value<unsigned>(&dev_set_size),"[REQD] Development set size (# of parallel sentences)") -      ("forest_repository,r",po::value<string>(&forest_repository),"[REQD] Path to forest repository") -      ("weights,w",po::value<string>(&weights_file),"[REQD] Current feature weights file") -      ("optimize_feature,o",po::value<vector<string> >(), "Feature to optimize (if none specified, all weights listed in the weights file will be optimized)") -      ("random_directions,d",po::value<unsigned>(&n_random)->default_value(10),"Number of random directions to run the line optimizer in") -      ("no_primary,n","don't use the primary (orthogonal each feature alone) directions") -      ("oracle_directions,O",po::value<unsigned>(&n_oracle)->default_value(0),"read the forests and choose this many directions based on heading toward a hope max (bleu+modelscore) translation.") -      ("oracle_start_random",po::bool_switch(&start_random),"sample random subsets of dev set for ALL oracle directions, not just those after a sequential run through it") -      ("oracle_batch,b",po::value<unsigned>(&oracle_batch)->default_value(10),"to produce each oracle direction, sum the 'gradient' over this many sentences") -      ("max_similarity,m",po::value<double>(&max_similarity)->default_value(0),"remove directions that are too similar (Tanimoto coeff. less than (1-this)).  0 means don't filter, 1 means only 1 direction allowed?") -      ("fear_to_hope,f",po::bool_switch(&fear_to_hope),"for each of the oracle_directions, also include a direction from fear to hope (as well as origin to hope)") -      ("no_old_to_hope","don't emit the usual old -> hope oracle") -      ("decoder_translations",po::value<string>(&decoder_translations_file)->default_value(""),"one per line decoder 1best translations for computing document BLEU vs. sentences-seen-so-far BLEU") -      ; -  } -  void InitCommandLine(int argc, char *argv[], po::variables_map *conf) { -    po::options_description opts("Configuration options"); -    AddOptions(&opts); -    opts.add_options()("help,h", "Help"); - -    po::options_description dcmdline_options; -    dcmdline_options.add(opts); -    po::store(parse_command_line(argc, argv, dcmdline_options), *conf); -    po::notify(*conf); -    if (conf->count("dev_set_size") == 0) { -      cerr << "Please specify the size of the development set using -s N\n"; -      goto bad_cmdline; -    } -    if (conf->count("weights") == 0) { -      cerr << "Please specify the starting-point weights using -w <weightfile.txt>\n"; -      goto bad_cmdline; -    } -    if (conf->count("forest_repository") == 0) { -      cerr << "Please specify the forest repository location using -r <DIR>\n"; -      goto bad_cmdline; -    } -    if (n_oracle && oracle.refs.empty()) { -      cerr<<"Specify references when using oracle directions\n"; -      goto bad_cmdline; -    } -    if (conf->count("help")) { -      cout << dcmdline_options << endl; -      exit(0); -    } - -    return; -    bad_cmdline: -      cerr << dcmdline_options << endl; -      exit(1); +void InitCommandLine(int argc, char** argv, po::variables_map* conf) { +  po::options_description opts("Configuration options"); +  opts.add_options() +        ("dev_set_size,s",po::value<unsigned>(),"[REQD] Development set size (# of parallel sentences)") +        ("forest_repository,r",po::value<string>(),"[REQD] Path to forest repository") +        ("weights,w",po::value<string>(),"[REQD] Current feature weights file") +        ("optimize_feature,o",po::value<vector<string> >(), "Feature to optimize (if none specified, all weights listed in the weights file will be optimized)") +        ("random_directions,d",po::value<unsigned int>()->default_value(20),"Number of random directions to run the line optimizer in") +        ("help,h", "Help"); +  po::options_description dcmdline_options; +  dcmdline_options.add(opts); +  po::store(parse_command_line(argc, argv, dcmdline_options), *conf); +  bool flag = false; +  if (conf->count("dev_set_size") == 0) { +    cerr << "Please specify the size of the development set using -d N\n"; +    flag = true;    } - -  int main(int argc, char *argv[]) { -    po::variables_map conf; -    InitCommandLine(argc,argv,&conf); -    init_bleumodel(); -    UseConf(conf); -    Run(); -    return 0; +  if (conf->count("weights") == 0) { +    cerr << "Please specify the starting-point weights using -w <weightfile.txt>\n"; +    flag = true;    } -  bool verbose() const { return oracle.verbose; } -  void Run() { -//    register_feature_functions(); -    AddPrimaryAndRandomDirections(); -    AddOracleDirections(); -    compress_similar(directions,max_similarity,&cerr,true,verbose()); -    Print(); +  if (conf->count("forest_repository") == 0) { +    cerr << "Please specify the forest repository location using -r <DIR>\n"; +    flag = true;    } - - -  Point origin; // old weights that gave model 1best. -  vector<string> optimize_features; -  void UseConf(po::variables_map const& conf) { -    oracle.UseConf(conf); -    include_primary=!conf.count("no_primary"); -    old_to_hope=!conf.count("no_old_to_hope"); - -    if (conf.count("optimize_feature") > 0) -      optimize_features=conf["optimize_feature"].as<vector<string> >(); -    Init(); +  if (flag || conf->count("help")) { +    cerr << dcmdline_options << endl; +    exit(1);    } +} -  string weights_file; -  double max_similarity; -  unsigned n_oracle, oracle_batch; -  string forest_repository; -  unsigned dev_set_size; -  vector<Oracle> oracles; -  vector<int> fids; -  string forest_file(unsigned i) const { -    ostringstream o; -    o << forest_repository << '/' << i << ".json.gz"; -    return o.str(); -  } - -  oracle_directions() { } - -  Sentences model_hyps; - -  vector<ScoreP> model_scores; -  bool have_doc; -  void Init() { -    have_doc=!decoder_translations_file.empty(); -    if (have_doc) { -      model_hyps.Load(decoder_translations_file); -      if (verbose()) model_hyps.Print(cerr,5); -      model_scores.resize(model_hyps.size()); -      if (dev_set_size!=model_hyps.size()) { -        cerr<<"You supplied decoder_translations with a different number of lines ("<<model_hyps.size()<<") than dev_set_size ("<<dev_set_size<<")"<<endl; -        abort(); -      } -      cerr << "Scoring model translations " << model_hyps << endl; -      for (int i=0;i<model_hyps.size();++i) { -        //TODO: what is scoreCcand? without clipping? do without for consistency w/ oracle -        model_scores[i]=oracle.ds[i]->ScoreCandidate(model_hyps[i]); -        assert(model_scores[i]); -        if (verbose()) cerr<<"Before model["<<i<<"]: "<<ds().ScoreDetails()<<endl; -        if (verbose()) cerr<<"model["<<i<<"]: "<<model_scores[i]->ScoreDetails()<<endl; -        oracle.doc_score->PlusEquals(*model_scores[i]); -        if (verbose()) cerr<<"After model["<<i<<"]: "<<ds().ScoreDetails()<<endl; -      } -      //TODO: compute doc bleu stats for each sentence, then when getting oracle temporarily exclude stats for that sentence (skip regular score updating) -    } -    start_random=false; -    cerr << "Forest repo: " << forest_repository << endl; -    assert(DirectoryExists(forest_repository)); -    vector<string> features; -    vector<weight_t> dorigin; -    Weights::InitFromFile(weights_file, &dorigin, &features); -    if (optimize_features.size()) -      features=optimize_features; -    Weights::InitSparseVector(dorigin, &origin); -    fids.clear(); -    AddFeatureIds(features); -    oracles.resize(dev_set_size); -  } - -  void AddFeatureIds(vector<string> const& features) { -    int i = fids.size(); -    fids.resize(fids.size()+features.size()); -    for (; i < features.size(); ++i) -      fids[i] = FD::Convert(features[i]); - } - - -  std::string decoder_translations_file; // one per line -  //TODO: is it worthwhile to get a complete document bleu first?  would take a list of 1best translations one per line from the decoders, rather than loading all the forests (expensive).  translations are in run.raw.N.gz - new arg -  void adjust_doc(unsigned i,double scale=1.) { -    oracle.doc_score->PlusEquals(*model_scores[i],scale); -  } - -  Score &ds() { -    return *oracle.doc_score; -  } - -  Oracle const& ComputeOracle(unsigned i) { -    Oracle &o=oracles[i]; -    if (o.is_null()) { -      if (have_doc) { -        if (verbose()) cerr<<"Before removing i="<<i<<" "<<ds().ScoreDetails()<<"\n"; -        adjust_doc(i,-1); -      } -      ReadFile rf(forest_file(i)); -      Hypergraph hg; -      { -        Timer t("Loading forest from JSON "+forest_file(i)); -        HypergraphIO::ReadFromJSON(rf.stream(), &hg); -      } -      if (verbose()) cerr<<"Before oracle["<<i<<"]: "<<ds().ScoreDetails()<<endl; -      o=oracle.ComputeOracle(oracle.MakeMetadata(hg,i),&hg,origin); -      if (verbose()) { -        cerr << o; -        ScoreP hopesc=oracle.GetScore(o.hope.sentence,i); -        oracle.doc_score->PlusEquals(*hopesc,1); -        cerr<<"With hope: "<<ds().ScoreDetails()<<endl; -        oracle.doc_score->PlusEquals(*hopesc,-1); -        cerr<<"Without hope: "<<ds().ScoreDetails()<<endl; -        cerr<<" oracle="<<oracle.GetScore(o.hope.sentence,i)->ScoreDetails()<<endl -            <<" model="<<oracle.GetScore(o.model.sentence,i)->ScoreDetails()<<endl; -        if (have_doc) -          cerr<<" doc (should = model): "<<model_scores[i]->ScoreDetails()<<endl; -      } -      if (have_doc) { -        adjust_doc(i,1); -      } else -        oracle.IncludeLastScore(); -    } -    return o; -  } - -  // if start_random is true, immediately sample w/ replacement from src sentences; otherwise, consume them sequentially until exhausted, then random.  oracle vectors are summed -  void AddOracleDirections() { -    MT19937::IntRNG rsg=rng.inclusive(0,dev_set_size-1); -    unsigned b=0; -    for(unsigned i=0;i<n_oracle;++i) { -      Dir o2hope; -      Dir fear2hope; -      for (unsigned j=0;j<oracle_batch;++j,++b) { -        Oracle const& o=ComputeOracle((start_random||b>=dev_set_size) ? rsg() : b); - -        if (old_to_hope) -          o2hope+=o.ModelHopeGradient(); -        if (fear_to_hope) -          fear2hope+=o.FearHopeGradient(); -      } -      double N=(double)oracle_batch; -      if (old_to_hope) { -        o2hope/=N; -        directions.push_back(o2hope); -      } -      if (fear_to_hope) { -        fear2hope/=N; -        directions.push_back(fear2hope); -      } +int main(int argc, char** argv) { +  RandomNumberGenerator<boost::mt19937> rng; +  po::variables_map conf; +  InitCommandLine(argc, argv, &conf); +  vector<string> features; +  SparseVector<weight_t> origin; +  vector<weight_t> w; +  Weights::InitFromFile(conf["weights"].as<string>(), &w, &features); +  Weights::InitSparseVector(w, &origin); +  const string forest_repository = conf["forest_repository"].as<string>(); +  assert(DirectoryExists(forest_repository)); +  if (conf.count("optimize_feature") > 0) +    features=conf["optimize_feature"].as<vector<string> >(); +  vector<SparseVector<weight_t> > directions; +  vector<int> fids(features.size()); +  for (int i = 0; i < features.size(); ++i) +    fids[i] = FD::Convert(features[i]); +  LineOptimizer::CreateOptimizationDirections( +     fids, +     conf["random_directions"].as<unsigned int>(), +     &rng, +     &directions); +  unsigned dev_set_size = conf["dev_set_size"].as<unsigned>(); +  for (unsigned i = 0; i < dev_set_size; ++i) { +    for (unsigned j = 0; j < directions.size(); ++j) { +      cout << forest_repository << '/' << i << ".json.gz " << i << ' '; +      print(cout, origin, "=", ";"); +      cout << ' '; +      print(cout, directions[j], "=", ";"); +      cout << endl;      }    } -}; - -int main(int argc, char** argv) { -  oracle_directions od; -  return od.main(argc,argv); +  return 0;  } diff --git a/vest/mr_vest_map.cc b/vest/mr_vest_map.cc index 8f6e085d..7d9625bc 100644 --- a/vest/mr_vest_map.cc +++ b/vest/mr_vest_map.cc @@ -82,20 +82,20 @@ int main(int argc, char** argv) {      if (line.empty()) continue;      istringstream is(line);      int sent_id; -    string file, s_origin, s_axis; +    string file, s_origin, s_direction;      // path-to-file (JSON) sent_ed starting-point search-direction -    is >> file >> sent_id >> s_origin >> s_axis; +    is >> file >> sent_id >> s_origin >> s_direction;      SparseVector<double> origin; -    assert(ReadSparseVectorString(s_origin, &origin)); -    SparseVector<double> axis; -    assert(ReadSparseVectorString(s_axis, &axis)); -    // cerr << "File: " << file << "\nAxis: " << axis << "\n   X: " << origin << endl; +    ReadSparseVectorString(s_origin, &origin); +    SparseVector<double> direction; +    ReadSparseVectorString(s_direction, &direction); +    // cerr << "File: " << file << "\nDir: " << direction << "\n   X: " << origin << endl;      if (last_file != file) {        last_file = file;        ReadFile rf(file);        HypergraphIO::ReadFromJSON(rf.stream(), &hg);      } -    ViterbiEnvelopeWeightFunction wf(origin, axis); +    ViterbiEnvelopeWeightFunction wf(origin, direction);      ViterbiEnvelope ve = Inside<ViterbiEnvelope, ViterbiEnvelopeWeightFunction>(hg, NULL, wf);      ErrorSurface es; @@ -104,7 +104,7 @@ int main(int argc, char** argv) {      // cerr << "Error surface has " << es.size() << " segments\n";      string val;      es.Serialize(&val); -    cout << 'M' << ' ' << s_origin << ' ' << s_axis << '\t'; +    cout << 'M' << ' ' << s_origin << ' ' << s_direction << '\t';      B64::b64encode(val.c_str(), val.size(), &cout);      cout << endl << flush;    } | 
