From 3e6c1e09afe6224551945538a7bae1273ac7b26d Mon Sep 17 00:00:00 2001 From: graehl Date: Tue, 13 Jul 2010 02:54:45 +0000 Subject: vest oracle directions git-svn-id: https://ws10smt.googlecode.com/svn/trunk@229 ec762483-ff6d-05da-a07a-a48fb63a330f --- vest/mr_vest_generate_mapper_input.cc | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'vest') diff --git a/vest/mr_vest_generate_mapper_input.cc b/vest/mr_vest_generate_mapper_input.cc index 9e702e2f..5c3e8181 100644 --- a/vest/mr_vest_generate_mapper_input.cc +++ b/vest/mr_vest_generate_mapper_input.cc @@ -5,6 +5,7 @@ #include #include +#include "sampler.h" #include "filelib.h" #include "weights.h" #include "line_optimizer.h" @@ -16,8 +17,7 @@ namespace po = boost::program_options; typedef SparseVector Dir; -typedef RandomNumberGenerator RNG; -RNG rng; +MT19937 rng; struct oracle_directions { string forest_repository; @@ -45,6 +45,18 @@ struct oracle_directions { } return dir; } + // if start_random is true, immediately sample w/ replacement from src sentences; otherwise, consume them sequentially until exhausted, then random. oracle vectors are summed + void add_directions(vector &dirs,unsigned n,unsigned batchsz=20,bool start_random=false) { + MT19937::IntRNG rsg=rng.inclusive(0,dev_set_size-1); + unsigned b=0; + for(unsigned i=0;i=dev_set_size)?rsg():b]; + d/=(double)batchsz; + } + } }; @@ -135,6 +147,7 @@ int main(int argc, char** argv) { &axes, !conf.count("no_primary") ); + od.add_directions(axes,conf["oracle_directions"].as(),conf["oracle_batch"].as()); compress_similar(axes,conf["max_similarity"].as()); for (int i = 0; i < od.dev_set_size; ++i) for (int j = 0; j < axes.size(); ++j) -- cgit v1.2.3