diff options
| -rw-r--r-- | decoder/cdec.cc | 11 | ||||
| -rw-r--r-- | decoder/ff_bleu.cc | 4 | ||||
| -rwxr-xr-x | decoder/oracle_bleu.h | 37 | ||||
| -rw-r--r-- | vest/mr_vest_generate_mapper_input.cc | 37 | ||||
| -rw-r--r-- | vest/scorer.cc | 2 | 
5 files changed, 56 insertions, 35 deletions
| diff --git a/decoder/cdec.cc b/decoder/cdec.cc index 8827cce3..a9c1cb3b 100644 --- a/decoder/cdec.cc +++ b/decoder/cdec.cc @@ -91,7 +91,7 @@ void print_options(std::ostream &out,po::options_description const& opts) {  } -void InitCommandLine(int argc, char** argv, po::variables_map* confp) { +void InitCommandLine(int argc, char** argv, OracleBleu &ob, po::variables_map* confp) {    po::variables_map &conf=*confp;    po::options_description opts("Configuration options");    opts.add_options() @@ -151,7 +151,7 @@ void InitCommandLine(int argc, char** argv, po::variables_map* confp) {          ("combine_size,C",po::value<int>()->default_value(1), "When option -G is used, process this many sentence pairs before writing the gradient (1=emit after every sentence pair)")          ("forest_output,O",po::value<string>(),"Directory to write forests to")          ("minimal_forests,m","Write minimal forests (excludes Rule information). Such forests can be used for ML/MAP training, but not rescoring, etc."); -  OracleBleu::AddOptions(&opts); +  ob.AddOptions(&opts);    po::options_description clo("Command line options");    clo.add_options()          ("config,c", po::value<string>(), "Configuration file") @@ -206,6 +206,8 @@ void InitCommandLine(int argc, char** argv, po::variables_map* confp) {      cerr << dcmdline_options << endl;      exit(1);    } +  po::notify(conf); +  }  // TODO move out of cdec into some sampling decoder file @@ -358,7 +360,9 @@ int main(int argc, char** argv) {    global_ff_registry.reset(new FFRegistry);    register_feature_functions();    po::variables_map conf; -  InitCommandLine(argc, argv, &conf); +  OracleBleu oracle; + +  InitCommandLine(argc, argv, oracle, &conf);    const bool write_gradient = conf.count("cll_gradient");    const bool feature_expectations = conf.count("feature_expectations");    if (write_gradient && feature_expectations) { @@ -488,7 +492,6 @@ int main(int argc, char** argv) {    const bool crf_uniform_empirical = conf.count("crf_uniform_empirical");    const bool get_oracle_forest = conf.count("get_oracle_forest"); -  OracleBleu oracle;    if (get_oracle_forest)      oracle.UseConf(conf); diff --git a/decoder/ff_bleu.cc b/decoder/ff_bleu.cc index ab61ed10..f8d62aa2 100644 --- a/decoder/ff_bleu.cc +++ b/decoder/ff_bleu.cc @@ -243,8 +243,8 @@ BLEUModel::BLEUModel(const string& param) :    //loop over argv and load all references into vector of NgramMaps    if (argc >= 1) { -    if (argv[1] != "-o" || argc<2) { -      cerr<<bleu_usage_name<<" specification should be: "<<bleu_usage_short<<"; you provided: "<<param<<endl<<bleu_usage_verbose<<endl; +    if (argv[0] != "-o" || argc<2) { +      cerr<<bleu_usage_name<<" specification should be: "<<bleu_usage_short<<"; you provided: "<<param<<endl<<argv[0]<<endl<<bleu_usage_verbose<<endl;        abort();      } else        order=boost::lexical_cast<int>(argv[1]); diff --git a/decoder/oracle_bleu.h b/decoder/oracle_bleu.h index cc19fbca..07d83b7f 100755 --- a/decoder/oracle_bleu.h +++ b/decoder/oracle_bleu.h @@ -73,13 +73,13 @@ struct OracleBleu {    WeightVector feature_weights_;    DocScorer ds; -  static void AddOptions(boost::program_options::options_description *opts) { +  void AddOptions(boost::program_options::options_description *opts) {      using namespace boost::program_options;      using namespace std;      opts->add_options() -      ("references,R", value<Refs >(), "Translation reference files") -      ("oracle_loss", value<string>(), "IBM_BLEU_3 (default), IBM_BLEU etc") -      ("bleu_weight", value<double>()->default_value(1.), "weight to give the hope/fear loss function vs. model score") +      ("references,R", value<Refs >(&refs), "Translation reference files") +      ("oracle_loss", value<string>(&loss_name)->default_value("IBM_BLEU_3"), "IBM_BLEU_3 (default), IBM_BLEU etc") +      ("bleu_weight", value<double>(&bleu_weight)->default_value(1.), "weight to give the hope/fear loss function vs. model score")        ;    }    int order; @@ -110,28 +110,41 @@ struct OracleBleu {    }    double bleu_weight; -  void UseConf(boost::program_options::variables_map const& conf) { +  // you have to call notify(conf) yourself, once, in main or similar +  void UseConf(boost::program_options::variables_map const& /* conf */) {      using namespace std; -    bleu_weight=conf["bleu_weight"].as<double>(); -    set_loss(conf["oracle_loss"].as<string>()); -    set_refs(conf["references"].as<Refs>()); +    //    bleu_weight=conf["bleu_weight"].as<double>(); +    //set_loss(conf["oracle_loss"].as<string>()); +    //set_refs(conf["references"].as<Refs>()); +    init_loss(); +    init_refs();    }    ScoreType loss; -//  std::string loss_name; +  std::string loss_name;    boost::shared_ptr<FeatureFunction> pff; -  void set_loss(std::string const& lossd="IBM_BLEU_3") { -//    loss_name=lossd; -    loss=ScoreTypeFromString(lossd); +  void set_loss(std::string const& lossd) { +    loss_name=lossd; +    init_loss(); +  } +  void init_loss() { +    loss=ScoreTypeFromString(loss_name);      order=(loss==IBM_BLEU_3)?3:4;      std::ostringstream param;      param<<"-o "<<order;      pff=global_ff_registry->Create("BLEUModel",param.str());    } +  bool is_null() const { +    return refs.empty(); +  }    void set_refs(Refs const& r) {      refs=r; +    init_refs(); +  } +  void init_refs() { +    if (is_null()) return;      assert(refs.size());      ds=DocScorer(loss,refs);      doc_score.reset(); diff --git a/vest/mr_vest_generate_mapper_input.cc b/vest/mr_vest_generate_mapper_input.cc index 50e620f8..ac826ecf 100644 --- a/vest/mr_vest_generate_mapper_input.cc +++ b/vest/mr_vest_generate_mapper_input.cc @@ -33,13 +33,16 @@ typedef SparseVector<double> Dir;  typedef Dir Point; -void compress_similar(vector<Dir> &dirs,double min_dist,ostream *log=&cerr,bool avg=true) { +void compress_similar(vector<Dir> &dirs,double min_dist,ostream *log=&cerr,bool avg=true,bool verbose=true) { +  //  return; //TODO: debug    if (min_dist<=0) return;    double max_s=1.-min_dist; +  if (log&&verbose) *log<<"max allowed S="<<max_s<<' ';    unsigned N=dirs.size();    for (int i=0;i<N;++i) {      for (int j=i+1;j<N;++j) {        double s=dirs[i].tanimoto_coef(dirs[j]); +      if (log&&verbose) *log<<"S["<<i<<","<<j<<"]="<<s<<' ';        if (s>max_s) {          if (log) *log << "Collapsing similar directions (T="<<s<<" > "<<max_s<<").  dirs["<<i<<"]="<<dirs[i]<<" dirs["<<j<<"]";          if (avg) { @@ -51,6 +54,8 @@ void compress_similar(vector<Dir> &dirs,double min_dist,ostream *log=&cerr,bool          swap(dirs[j],dirs[--N]);        }      } +    if (log&&verbose) *log<<endl; +    }    dirs.resize(N);  } @@ -118,10 +123,14 @@ struct oracle_directions {        cerr << dcmdline_options << endl;        exit(1);      } +    po::notify(*conf); + +    if (0) {      dev_set_size = (*conf)["dev_set_size"].as<unsigned>();      forest_repository = (*conf)["forest_repository"].as<string>();      weights_file = (*conf)["weights"].as<string>();      n_random = (*conf)["random_directions"].as<unsigned>(); +    }    }    int main(int argc, char *argv[]) { @@ -134,8 +143,8 @@ struct oracle_directions {    void Run() {      AddPrimaryAndRandomDirections(); -    //AddOracleDirections(); -    //compress_similar(directions,max_similarity); +    AddOracleDirections(); +    compress_similar(directions,max_similarity);      Print();    } @@ -143,25 +152,21 @@ struct oracle_directions {    Point origin; // old weights that gave model 1best.    vector<string> optimize_features;    void UseConf(po::variables_map const& conf) { -#if 0      oracle.UseConf(conf); - +      // po::value<X>(&var) takes care of below: +      //    fear_to_hope=conf.count("fear_to_hope"); +      //    n_random=conf["random_directions"].as<unsigned int>(); +      //    forest_repository=conf["forest_repository"].as<string>(); +      //    dev_set_size=conf["dev_set_size"].as<unsigned int>(); +      //    n_oracle=conf["oracle_directions"].as<unsigned>(); +      //    oracle_batch=conf["oracle_batch"].as<unsigned>(); +      //    max_similarity=conf["max_similarity"].as<double>(); +      //    weights_file=conf["weights"].as<string>();      include_primary=!conf.count("no_primary");      old_to_hope=!conf.count("no_old_to_hope");      if (conf.count("optimize_feature") > 0)        optimize_features=conf["optimize_feature"].as<vector<string> >(); - -    // po::value<X>(&var) takes care of below: -//    fear_to_hope=conf.count("fear_to_hope"); -//    n_random=conf["random_directions"].as<unsigned int>(); -//    forest_repository=conf["forest_repository"].as<string>(); -//    dev_set_size=conf["dev_set_size"].as<unsigned int>(); -//    n_oracle=conf["oracle_directions"].as<unsigned>(); -//    oracle_batch=conf["oracle_batch"].as<unsigned>(); -//    max_similarity=conf["max_similarity"].as<double>(); -//    weights_file=conf["weights"].as<string>(); -#endif      Init();    } diff --git a/vest/scorer.cc b/vest/scorer.cc index 8f981af6..30015bb2 100644 --- a/vest/scorer.cc +++ b/vest/scorer.cc @@ -44,7 +44,7 @@ ScoreType ScoreTypeFromString(const string& st) {      return Koehn_BLEU;    if (sl == "combi")      return BLEU_minus_TER_over_2; -  cerr << "Don't understand score type '" << sl << "', defaulting to ibm_bleu.\n"; +  cerr << "Don't understand score type '" << st << "', defaulting to ibm_bleu.\n";    return IBM_BLEU;  } | 
