From 60aef601b7bc88c39f6245cb224fd726bf612a95 Mon Sep 17 00:00:00 2001 From: graehl Date: Sun, 18 Jul 2010 23:33:03 +0000 Subject: boost cmd line notify(conf) was missing git-svn-id: https://ws10smt.googlecode.com/svn/trunk@313 ec762483-ff6d-05da-a07a-a48fb63a330f --- decoder/cdec.cc | 11 +++++++---- decoder/ff_bleu.cc | 4 ++-- decoder/oracle_bleu.h | 37 +++++++++++++++++++++++------------ vest/mr_vest_generate_mapper_input.cc | 37 ++++++++++++++++++++--------------- vest/scorer.cc | 2 +- 5 files changed, 56 insertions(+), 35 deletions(-) diff --git a/decoder/cdec.cc b/decoder/cdec.cc index 8827cce3..a9c1cb3b 100644 --- a/decoder/cdec.cc +++ b/decoder/cdec.cc @@ -91,7 +91,7 @@ void print_options(std::ostream &out,po::options_description const& opts) { } -void InitCommandLine(int argc, char** argv, po::variables_map* confp) { +void InitCommandLine(int argc, char** argv, OracleBleu &ob, po::variables_map* confp) { po::variables_map &conf=*confp; po::options_description opts("Configuration options"); opts.add_options() @@ -151,7 +151,7 @@ void InitCommandLine(int argc, char** argv, po::variables_map* confp) { ("combine_size,C",po::value()->default_value(1), "When option -G is used, process this many sentence pairs before writing the gradient (1=emit after every sentence pair)") ("forest_output,O",po::value(),"Directory to write forests to") ("minimal_forests,m","Write minimal forests (excludes Rule information). Such forests can be used for ML/MAP training, but not rescoring, etc."); - OracleBleu::AddOptions(&opts); + ob.AddOptions(&opts); po::options_description clo("Command line options"); clo.add_options() ("config,c", po::value(), "Configuration file") @@ -206,6 +206,8 @@ void InitCommandLine(int argc, char** argv, po::variables_map* confp) { cerr << dcmdline_options << endl; exit(1); } + po::notify(conf); + } // TODO move out of cdec into some sampling decoder file @@ -358,7 +360,9 @@ int main(int argc, char** argv) { global_ff_registry.reset(new FFRegistry); register_feature_functions(); po::variables_map conf; - InitCommandLine(argc, argv, &conf); + OracleBleu oracle; + + InitCommandLine(argc, argv, oracle, &conf); const bool write_gradient = conf.count("cll_gradient"); const bool feature_expectations = conf.count("feature_expectations"); if (write_gradient && feature_expectations) { @@ -488,7 +492,6 @@ int main(int argc, char** argv) { const bool crf_uniform_empirical = conf.count("crf_uniform_empirical"); const bool get_oracle_forest = conf.count("get_oracle_forest"); - OracleBleu oracle; if (get_oracle_forest) oracle.UseConf(conf); diff --git a/decoder/ff_bleu.cc b/decoder/ff_bleu.cc index ab61ed10..f8d62aa2 100644 --- a/decoder/ff_bleu.cc +++ b/decoder/ff_bleu.cc @@ -243,8 +243,8 @@ BLEUModel::BLEUModel(const string& param) : //loop over argv and load all references into vector of NgramMaps if (argc >= 1) { - if (argv[1] != "-o" || argc<2) { - cerr<(argv[1]); diff --git a/decoder/oracle_bleu.h b/decoder/oracle_bleu.h index cc19fbca..07d83b7f 100755 --- a/decoder/oracle_bleu.h +++ b/decoder/oracle_bleu.h @@ -73,13 +73,13 @@ struct OracleBleu { WeightVector feature_weights_; DocScorer ds; - static void AddOptions(boost::program_options::options_description *opts) { + void AddOptions(boost::program_options::options_description *opts) { using namespace boost::program_options; using namespace std; opts->add_options() - ("references,R", value(), "Translation reference files") - ("oracle_loss", value(), "IBM_BLEU_3 (default), IBM_BLEU etc") - ("bleu_weight", value()->default_value(1.), "weight to give the hope/fear loss function vs. model score") + ("references,R", value(&refs), "Translation reference files") + ("oracle_loss", value(&loss_name)->default_value("IBM_BLEU_3"), "IBM_BLEU_3 (default), IBM_BLEU etc") + ("bleu_weight", value(&bleu_weight)->default_value(1.), "weight to give the hope/fear loss function vs. model score") ; } int order; @@ -110,28 +110,41 @@ struct OracleBleu { } double bleu_weight; - void UseConf(boost::program_options::variables_map const& conf) { + // you have to call notify(conf) yourself, once, in main or similar + void UseConf(boost::program_options::variables_map const& /* conf */) { using namespace std; - bleu_weight=conf["bleu_weight"].as(); - set_loss(conf["oracle_loss"].as()); - set_refs(conf["references"].as()); + // bleu_weight=conf["bleu_weight"].as(); + //set_loss(conf["oracle_loss"].as()); + //set_refs(conf["references"].as()); + init_loss(); + init_refs(); } ScoreType loss; -// std::string loss_name; + std::string loss_name; boost::shared_ptr pff; - void set_loss(std::string const& lossd="IBM_BLEU_3") { -// loss_name=lossd; - loss=ScoreTypeFromString(lossd); + void set_loss(std::string const& lossd) { + loss_name=lossd; + init_loss(); + } + void init_loss() { + loss=ScoreTypeFromString(loss_name); order=(loss==IBM_BLEU_3)?3:4; std::ostringstream param; param<<"-o "<Create("BLEUModel",param.str()); } + bool is_null() const { + return refs.empty(); + } void set_refs(Refs const& r) { refs=r; + init_refs(); + } + void init_refs() { + if (is_null()) return; assert(refs.size()); ds=DocScorer(loss,refs); doc_score.reset(); diff --git a/vest/mr_vest_generate_mapper_input.cc b/vest/mr_vest_generate_mapper_input.cc index 50e620f8..ac826ecf 100644 --- a/vest/mr_vest_generate_mapper_input.cc +++ b/vest/mr_vest_generate_mapper_input.cc @@ -33,13 +33,16 @@ typedef SparseVector Dir; typedef Dir Point; -void compress_similar(vector &dirs,double min_dist,ostream *log=&cerr,bool avg=true) { +void compress_similar(vector &dirs,double min_dist,ostream *log=&cerr,bool avg=true,bool verbose=true) { + // return; //TODO: debug if (min_dist<=0) return; double max_s=1.-min_dist; + if (log&&verbose) *log<<"max allowed S="< "< &dirs,double min_dist,ostream *log=&cerr,bool swap(dirs[j],dirs[--N]); } } + if (log&&verbose) *log<(); forest_repository = (*conf)["forest_repository"].as(); weights_file = (*conf)["weights"].as(); n_random = (*conf)["random_directions"].as(); + } } int main(int argc, char *argv[]) { @@ -134,8 +143,8 @@ struct oracle_directions { void Run() { AddPrimaryAndRandomDirections(); - //AddOracleDirections(); - //compress_similar(directions,max_similarity); + AddOracleDirections(); + compress_similar(directions,max_similarity); Print(); } @@ -143,25 +152,21 @@ struct oracle_directions { Point origin; // old weights that gave model 1best. vector optimize_features; void UseConf(po::variables_map const& conf) { -#if 0 oracle.UseConf(conf); - + // po::value(&var) takes care of below: + // fear_to_hope=conf.count("fear_to_hope"); + // n_random=conf["random_directions"].as(); + // forest_repository=conf["forest_repository"].as(); + // dev_set_size=conf["dev_set_size"].as(); + // n_oracle=conf["oracle_directions"].as(); + // oracle_batch=conf["oracle_batch"].as(); + // max_similarity=conf["max_similarity"].as(); + // weights_file=conf["weights"].as(); include_primary=!conf.count("no_primary"); old_to_hope=!conf.count("no_old_to_hope"); if (conf.count("optimize_feature") > 0) optimize_features=conf["optimize_feature"].as >(); - - // po::value(&var) takes care of below: -// fear_to_hope=conf.count("fear_to_hope"); -// n_random=conf["random_directions"].as(); -// forest_repository=conf["forest_repository"].as(); -// dev_set_size=conf["dev_set_size"].as(); -// n_oracle=conf["oracle_directions"].as(); -// oracle_batch=conf["oracle_batch"].as(); -// max_similarity=conf["max_similarity"].as(); -// weights_file=conf["weights"].as(); -#endif Init(); } diff --git a/vest/scorer.cc b/vest/scorer.cc index 8f981af6..30015bb2 100644 --- a/vest/scorer.cc +++ b/vest/scorer.cc @@ -44,7 +44,7 @@ ScoreType ScoreTypeFromString(const string& st) { return Koehn_BLEU; if (sl == "combi") return BLEU_minus_TER_over_2; - cerr << "Don't understand score type '" << sl << "', defaulting to ibm_bleu.\n"; + cerr << "Don't understand score type '" << st << "', defaulting to ibm_bleu.\n"; return IBM_BLEU; } -- cgit v1.2.3