diff options
-rw-r--r-- | decoder/cdec.cc | 11 | ||||
-rw-r--r-- | decoder/ff_bleu.cc | 4 | ||||
-rwxr-xr-x | decoder/oracle_bleu.h | 37 | ||||
-rw-r--r-- | vest/mr_vest_generate_mapper_input.cc | 37 | ||||
-rw-r--r-- | vest/scorer.cc | 2 |
5 files changed, 56 insertions, 35 deletions
diff --git a/decoder/cdec.cc b/decoder/cdec.cc index 8827cce3..a9c1cb3b 100644 --- a/decoder/cdec.cc +++ b/decoder/cdec.cc @@ -91,7 +91,7 @@ void print_options(std::ostream &out,po::options_description const& opts) { } -void InitCommandLine(int argc, char** argv, po::variables_map* confp) { +void InitCommandLine(int argc, char** argv, OracleBleu &ob, po::variables_map* confp) { po::variables_map &conf=*confp; po::options_description opts("Configuration options"); opts.add_options() @@ -151,7 +151,7 @@ void InitCommandLine(int argc, char** argv, po::variables_map* confp) { ("combine_size,C",po::value<int>()->default_value(1), "When option -G is used, process this many sentence pairs before writing the gradient (1=emit after every sentence pair)") ("forest_output,O",po::value<string>(),"Directory to write forests to") ("minimal_forests,m","Write minimal forests (excludes Rule information). Such forests can be used for ML/MAP training, but not rescoring, etc."); - OracleBleu::AddOptions(&opts); + ob.AddOptions(&opts); po::options_description clo("Command line options"); clo.add_options() ("config,c", po::value<string>(), "Configuration file") @@ -206,6 +206,8 @@ void InitCommandLine(int argc, char** argv, po::variables_map* confp) { cerr << dcmdline_options << endl; exit(1); } + po::notify(conf); + } // TODO move out of cdec into some sampling decoder file @@ -358,7 +360,9 @@ int main(int argc, char** argv) { global_ff_registry.reset(new FFRegistry); register_feature_functions(); po::variables_map conf; - InitCommandLine(argc, argv, &conf); + OracleBleu oracle; + + InitCommandLine(argc, argv, oracle, &conf); const bool write_gradient = conf.count("cll_gradient"); const bool feature_expectations = conf.count("feature_expectations"); if (write_gradient && feature_expectations) { @@ -488,7 +492,6 @@ int main(int argc, char** argv) { const bool crf_uniform_empirical = conf.count("crf_uniform_empirical"); const bool get_oracle_forest = conf.count("get_oracle_forest"); - OracleBleu oracle; if (get_oracle_forest) oracle.UseConf(conf); diff --git a/decoder/ff_bleu.cc b/decoder/ff_bleu.cc index ab61ed10..f8d62aa2 100644 --- a/decoder/ff_bleu.cc +++ b/decoder/ff_bleu.cc @@ -243,8 +243,8 @@ BLEUModel::BLEUModel(const string& param) : //loop over argv and load all references into vector of NgramMaps if (argc >= 1) { - if (argv[1] != "-o" || argc<2) { - cerr<<bleu_usage_name<<" specification should be: "<<bleu_usage_short<<"; you provided: "<<param<<endl<<bleu_usage_verbose<<endl; + if (argv[0] != "-o" || argc<2) { + cerr<<bleu_usage_name<<" specification should be: "<<bleu_usage_short<<"; you provided: "<<param<<endl<<argv[0]<<endl<<bleu_usage_verbose<<endl; abort(); } else order=boost::lexical_cast<int>(argv[1]); diff --git a/decoder/oracle_bleu.h b/decoder/oracle_bleu.h index cc19fbca..07d83b7f 100755 --- a/decoder/oracle_bleu.h +++ b/decoder/oracle_bleu.h @@ -73,13 +73,13 @@ struct OracleBleu { WeightVector feature_weights_; DocScorer ds; - static void AddOptions(boost::program_options::options_description *opts) { + void AddOptions(boost::program_options::options_description *opts) { using namespace boost::program_options; using namespace std; opts->add_options() - ("references,R", value<Refs >(), "Translation reference files") - ("oracle_loss", value<string>(), "IBM_BLEU_3 (default), IBM_BLEU etc") - ("bleu_weight", value<double>()->default_value(1.), "weight to give the hope/fear loss function vs. model score") + ("references,R", value<Refs >(&refs), "Translation reference files") + ("oracle_loss", value<string>(&loss_name)->default_value("IBM_BLEU_3"), "IBM_BLEU_3 (default), IBM_BLEU etc") + ("bleu_weight", value<double>(&bleu_weight)->default_value(1.), "weight to give the hope/fear loss function vs. model score") ; } int order; @@ -110,28 +110,41 @@ struct OracleBleu { } double bleu_weight; - void UseConf(boost::program_options::variables_map const& conf) { + // you have to call notify(conf) yourself, once, in main or similar + void UseConf(boost::program_options::variables_map const& /* conf */) { using namespace std; - bleu_weight=conf["bleu_weight"].as<double>(); - set_loss(conf["oracle_loss"].as<string>()); - set_refs(conf["references"].as<Refs>()); + // bleu_weight=conf["bleu_weight"].as<double>(); + //set_loss(conf["oracle_loss"].as<string>()); + //set_refs(conf["references"].as<Refs>()); + init_loss(); + init_refs(); } ScoreType loss; -// std::string loss_name; + std::string loss_name; boost::shared_ptr<FeatureFunction> pff; - void set_loss(std::string const& lossd="IBM_BLEU_3") { -// loss_name=lossd; - loss=ScoreTypeFromString(lossd); + void set_loss(std::string const& lossd) { + loss_name=lossd; + init_loss(); + } + void init_loss() { + loss=ScoreTypeFromString(loss_name); order=(loss==IBM_BLEU_3)?3:4; std::ostringstream param; param<<"-o "<<order; pff=global_ff_registry->Create("BLEUModel",param.str()); } + bool is_null() const { + return refs.empty(); + } void set_refs(Refs const& r) { refs=r; + init_refs(); + } + void init_refs() { + if (is_null()) return; assert(refs.size()); ds=DocScorer(loss,refs); doc_score.reset(); diff --git a/vest/mr_vest_generate_mapper_input.cc b/vest/mr_vest_generate_mapper_input.cc index 50e620f8..ac826ecf 100644 --- a/vest/mr_vest_generate_mapper_input.cc +++ b/vest/mr_vest_generate_mapper_input.cc @@ -33,13 +33,16 @@ typedef SparseVector<double> Dir; typedef Dir Point; -void compress_similar(vector<Dir> &dirs,double min_dist,ostream *log=&cerr,bool avg=true) { +void compress_similar(vector<Dir> &dirs,double min_dist,ostream *log=&cerr,bool avg=true,bool verbose=true) { + // return; //TODO: debug if (min_dist<=0) return; double max_s=1.-min_dist; + if (log&&verbose) *log<<"max allowed S="<<max_s<<' '; unsigned N=dirs.size(); for (int i=0;i<N;++i) { for (int j=i+1;j<N;++j) { double s=dirs[i].tanimoto_coef(dirs[j]); + if (log&&verbose) *log<<"S["<<i<<","<<j<<"]="<<s<<' '; if (s>max_s) { if (log) *log << "Collapsing similar directions (T="<<s<<" > "<<max_s<<"). dirs["<<i<<"]="<<dirs[i]<<" dirs["<<j<<"]"; if (avg) { @@ -51,6 +54,8 @@ void compress_similar(vector<Dir> &dirs,double min_dist,ostream *log=&cerr,bool swap(dirs[j],dirs[--N]); } } + if (log&&verbose) *log<<endl; + } dirs.resize(N); } @@ -118,10 +123,14 @@ struct oracle_directions { cerr << dcmdline_options << endl; exit(1); } + po::notify(*conf); + + if (0) { dev_set_size = (*conf)["dev_set_size"].as<unsigned>(); forest_repository = (*conf)["forest_repository"].as<string>(); weights_file = (*conf)["weights"].as<string>(); n_random = (*conf)["random_directions"].as<unsigned>(); + } } int main(int argc, char *argv[]) { @@ -134,8 +143,8 @@ struct oracle_directions { void Run() { AddPrimaryAndRandomDirections(); - //AddOracleDirections(); - //compress_similar(directions,max_similarity); + AddOracleDirections(); + compress_similar(directions,max_similarity); Print(); } @@ -143,25 +152,21 @@ struct oracle_directions { Point origin; // old weights that gave model 1best. vector<string> optimize_features; void UseConf(po::variables_map const& conf) { -#if 0 oracle.UseConf(conf); - + // po::value<X>(&var) takes care of below: + // fear_to_hope=conf.count("fear_to_hope"); + // n_random=conf["random_directions"].as<unsigned int>(); + // forest_repository=conf["forest_repository"].as<string>(); + // dev_set_size=conf["dev_set_size"].as<unsigned int>(); + // n_oracle=conf["oracle_directions"].as<unsigned>(); + // oracle_batch=conf["oracle_batch"].as<unsigned>(); + // max_similarity=conf["max_similarity"].as<double>(); + // weights_file=conf["weights"].as<string>(); include_primary=!conf.count("no_primary"); old_to_hope=!conf.count("no_old_to_hope"); if (conf.count("optimize_feature") > 0) optimize_features=conf["optimize_feature"].as<vector<string> >(); - - // po::value<X>(&var) takes care of below: -// fear_to_hope=conf.count("fear_to_hope"); -// n_random=conf["random_directions"].as<unsigned int>(); -// forest_repository=conf["forest_repository"].as<string>(); -// dev_set_size=conf["dev_set_size"].as<unsigned int>(); -// n_oracle=conf["oracle_directions"].as<unsigned>(); -// oracle_batch=conf["oracle_batch"].as<unsigned>(); -// max_similarity=conf["max_similarity"].as<double>(); -// weights_file=conf["weights"].as<string>(); -#endif Init(); } diff --git a/vest/scorer.cc b/vest/scorer.cc index 8f981af6..30015bb2 100644 --- a/vest/scorer.cc +++ b/vest/scorer.cc @@ -44,7 +44,7 @@ ScoreType ScoreTypeFromString(const string& st) { return Koehn_BLEU; if (sl == "combi") return BLEU_minus_TER_over_2; - cerr << "Don't understand score type '" << sl << "', defaulting to ibm_bleu.\n"; + cerr << "Don't understand score type '" << st << "', defaulting to ibm_bleu.\n"; return IBM_BLEU; } |