diff options
| author | graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-08 23:27:05 +0000 | 
|---|---|---|
| committer | graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-08 23:27:05 +0000 | 
| commit | 402124314fb821ba7869bf3c5008d7644f9e15b4 (patch) | |
| tree | b234accf2e7c31d35d19240bbac868657c5cec7d | |
| parent | c12e7241e8908def96943b1a4056e536ea91eded (diff) | |
feature functions support e.g. --usage=LanguageModel, report feature ids, warn about duplicate ids, 0-expand weight vector for no segfault, --warn_0_weight, and know their own names to simplify registration
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@192 ec762483-ff6d-05da-a07a-a48fb63a330f
| -rw-r--r-- | decoder/cdec.cc | 126 | ||||
| -rw-r--r-- | decoder/cdec_ff.cc | 9 | ||||
| -rw-r--r-- | decoder/ff.cc | 64 | ||||
| -rw-r--r-- | decoder/ff.h | 30 | ||||
| -rw-r--r-- | decoder/ff_factory.cc | 1 | ||||
| -rw-r--r-- | decoder/ff_lm.cc | 4 | ||||
| -rw-r--r-- | decoder/ff_lm.h | 1 | 
7 files changed, 181 insertions, 54 deletions
| diff --git a/decoder/cdec.cc b/decoder/cdec.cc index 54e24792..919751a2 100644 --- a/decoder/cdec.cc +++ b/decoder/cdec.cc @@ -56,7 +56,28 @@ void ConvertSV(const SparseVector<prob_t>& src, SparseVector<double>* trg) {      trg->set_value(it->first, it->second);  } -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { + +inline string str(char const* name,po::variables_map const& conf) { +  return conf[name].as<string>(); +} + +shared_ptr<FeatureFunction> make_ff(string const& ffp,bool verbose_feature_functions,char const* pre="") { +  string ff, param; +  SplitCommandAndParam(ffp, &ff, ¶m); +  cerr << "Feature: " << ff; +  if (param.size() > 0) cerr << " (with config parameters '" << param << "')\n"; +  else cerr << " (no config parameters)\n"; +  shared_ptr<FeatureFunction> pf = global_ff_registry->Create(ff, param); +  if (!pf) +    exit(1); +  int nbyte=pf->NumBytesContext(); +  if (verbose_feature_functions) +    cerr<<"State is "<<nbyte<<" bytes for "<<pre<<"feature "<<ffp<<endl; +  return pf; +} + +void InitCommandLine(int argc, char** argv, po::variables_map* confp) { +  po::variables_map &conf=*confp;    po::options_description opts("Configuration options");    opts.add_options()          ("formalism,f",po::value<string>(),"Decoding formalism; values include SCFG, FST, PB, LexTrans (lexical translation model, also disc training), CSplit (compound splitting), Tagger (sequence labeling), LexAlign (alignment only, or EM training)") @@ -65,8 +86,9 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) {          ("weights,w",po::value<string>(),"Feature weights file")      ("prelm_weights",po::value<string>(),"Feature weights file for prelm_beam_prune.  Requires --weights.")      ("prelm_copy_weights","use --weights as value for --prelm_weights.") +    ("prelm_feature_function",po::value<vector<string> >()->composing(),"Additional feature functions for prelm pass only (in addition to the 0-state subset of feature_function")      ("keep_prelm_cube_order","when forest rescoring with final models, use the edge ordering from the prelm pruning features*weights.  only meaningful if --prelm_weights given.  UNTESTED but assume that cube pruning gives a sensible result, and that 'good' (as tuned for bleu w/ prelm features) edges come first.") - +    ("warn_0_weight","Warn about any feature id that has a 0 weight (this is perfectly safe if you intend 0 weight, though)")          ("no_freeze_feature_set,Z", "Do not freeze feature set after reading feature weights file")          ("feature_function,F",po::value<vector<string> >()->composing(), "Additional feature function(s) (-L for list)")          ("list_feature_functions,L","List available feature functions") @@ -111,33 +133,44 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) {    po::options_description clo("Command line options");    clo.add_options()          ("config,c", po::value<string>(), "Configuration file") -        ("help,h", "Print this help message and exit"); +        ("help,h", "Print this help message and exit") +    ("usage", po::value<string>(), "Describe a feature function type") +    ; +    po::options_description dconfig_options, dcmdline_options;    dconfig_options.add(opts);    dcmdline_options.add(opts).add(clo); -  po::store(parse_command_line(argc, argv, dcmdline_options), *conf); -  if (conf->count("config")) { -    const string cfg = (*conf)["config"].as<string>(); +  po::store(parse_command_line(argc, argv, dcmdline_options), conf); +  if (conf.count("config")) { +    const string cfg = str("config",conf);      cerr << "Configuration file: " << cfg << endl;      ifstream config(cfg.c_str()); -    po::store(po::parse_config_file(config, dconfig_options), *conf); +    po::store(po::parse_config_file(config, dconfig_options), conf);    } -  po::notify(*conf); +  po::notify(conf); -  if (conf->count("list_feature_functions")) { +  if (conf.count("list_feature_functions")) {      cerr << "Available feature functions (specify with -F):\n";      global_ff_registry->DisplayList();      cerr << endl;      exit(1);    } -  if (conf->count("help") || conf->count("formalism") == 0) { +  if (conf.count("usage")) { +    cout<<global_ff_registry->usage(str("usage",conf),true,true)<<endl; +    exit(0); +  } +  if (conf.count("help")) { +    cout << dcmdline_options << endl; +    exit(0); +  } +  if (conf.count("help") || conf.count("formalism") == 0) {      cerr << dcmdline_options << endl;      exit(1);    } -  const string formalism = LowercaseString((*conf)["formalism"].as<string>()); +  const string formalism = LowercaseString(str("formalism",conf));    if (formalism != "scfg" && formalism != "fst" && formalism != "lextrans" && formalism != "pb" && formalism != "csplit" && formalism != "tagger" && formalism != "lexalign") {      cerr << "Error: --formalism takes only 'scfg', 'fst', 'pb', 'csplit', 'lextrans', 'lexalign', or 'tagger'\n";      cerr << dcmdline_options << endl; @@ -256,18 +289,17 @@ bool beam_param(po::variables_map const& conf,string const& name,double *val,boo  bool prelm_weights_string(po::variables_map const& conf,string &s)  {    if (conf.count("prelm_weights")) { -    s=conf["prelm_weights"].as<string>(); +    s=str("prelm_weights",conf);      return true;    }    if (conf.count("prelm_copy_weights")) { -    s=conf["weights"].as<string>(); +    s=str("weights",conf);      return true;    }    return false;  } -  void forest_stats(Hypergraph &forest,string name,bool show_tree,bool show_features,FeatureWeights *weights=0) {      cerr << viterbi_stats(forest,name,true,show_tree);      if (show_features) { @@ -305,6 +337,10 @@ void maybe_prune(Hypergraph &forest,po::variables_map const& conf,string nbeam,s      }  } +void show_models(po::variables_map const& conf,ModelSet &ms,char const* header) { +  cerr<<header<<": "; +  ms.show_features(cerr,cerr,conf.count("warn_0_weight")); +}  int main(int argc, char** argv) { @@ -322,7 +358,7 @@ int main(int argc, char** argv) {    const bool output_training_vector = (write_gradient || feature_expectations);    boost::shared_ptr<Translator> translator; -  const string formalism = LowercaseString(conf["formalism"].as<string>()); +  const string formalism = LowercaseString(str("formalism",conf));    const bool csplit_preserve_full_word = conf.count("csplit_preserve_full_word");    if (csplit_preserve_full_word &&        (formalism != "csplit" || !(conf.count("beam_prune")||conf.count("density_prune")||conf.count("prelm_beam_prune")||conf.count("prelm_density_prune")))) { @@ -341,7 +377,7 @@ int main(int argc, char** argv) {    Weights w,prelm_w;    bool has_prelm_models = false;    if (conf.count("weights")) { -    w.InitFromFile(conf["weights"].as<string>()); +    w.InitFromFile(str("weights",conf));      feature_weights.resize(FD::NumFeats());      w.InitVector(&feature_weights);      string plmw; @@ -350,13 +386,9 @@ int main(int argc, char** argv) {        prelm_w.InitFromFile(plmw);        prelm_feature_weights.resize(FD::NumFeats());        prelm_w.InitVector(&prelm_feature_weights); -      cerr << "prelm_weights: " << FeatureVector(prelm_feature_weights)<<endl; -    } -    cerr << "+LM weights: " << FeatureVector(feature_weights)<<endl; -    if (!conf.count("no_freeze_feature_set")) { -      cerr << "Freezing feature set (use --no_freeze_feature_set to change)." << endl; -      FD::Freeze(); +//      cerr << "prelm_weights: " << FeatureVector(prelm_feature_weights)<<endl;      } +//    cerr << "+LM weights: " << FeatureVector(feature_weights)<<endl;    }    // set up translation back end @@ -378,41 +410,46 @@ int main(int argc, char** argv) {      assert(!"error");    // set up additional scoring features -  vector<shared_ptr<FeatureFunction> > pffs; +  vector<shared_ptr<FeatureFunction> > pffs,prelm_only_ffs;    vector<const FeatureFunction*> late_ffs,prelm_ffs;    if (conf.count("feature_function") > 0) {      const vector<string>& add_ffs = conf["feature_function"].as<vector<string> >();      for (int i = 0; i < add_ffs.size(); ++i) { -      string ff, param; -      SplitCommandAndParam(add_ffs[i], &ff, ¶m); -      cerr << "Feature: " << ff; -      if (param.size() > 0) cerr << " (with config parameters '" << param << "')\n"; -      else cerr << " (no config parameters)\n"; -      shared_ptr<FeatureFunction> pff = global_ff_registry->Create(ff, param); -      FeatureFunction const* p=pff.get(); -      if (!p) { exit(1); } -      // TODO check that multiple features aren't trying to set the same fid -      pffs.push_back(pff); +      pffs.push_back(make_ff(add_ffs[i],verbose_feature_functions)); +      FeatureFunction const* p=pffs.back().get();        late_ffs.push_back(p); -      int nbyte=p->NumBytesContext(); -      if (verbose_feature_functions) -        cerr<<"State is "<<nbyte<<" bytes for feature "<<ff<<endl;        if (has_prelm_models) { -        if (nbyte==0) +        if (p->NumBytesContext()==0)            prelm_ffs.push_back(p);          else -          cerr << "Excluding stateful feature from prelm pruning: "<<ff<<" - state is "<<nbyte<<" bytes."<<endl; +          cerr << "Excluding stateful feature from prelm pruning: "<<add_ffs[i]<<endl;  }      }    } +  if (conf.count("prelm_feature_function") > 0) { +    const vector<string>& add_ffs = conf["prelm_feature_function"].as<vector<string> >(); +    for (int i = 0; i < add_ffs.size(); ++i) { +      prelm_only_ffs.push_back(make_ff(add_ffs[i],verbose_feature_functions,"prelm-only ")); +      prelm_ffs.push_back(prelm_only_ffs.back().get()); +    } +  } +    if (has_prelm_models)          cerr << "prelm rescoring with "<<prelm_ffs.size()<<" 0-state feature functions.  +LM pass will use "<<late_ffs.size()<<" features (not counting rule features)."<<endl;    ModelSet late_models(feature_weights, late_ffs); +  show_models(conf,late_models,"late "); +  ModelSet prelm_models(prelm_feature_weights, prelm_ffs); +  if (has_prelm_models) +    show_models(conf,prelm_models,"prelm "); +  if (!conf.count("no_freeze_feature_set")) { // this used to happen immediately after loading weights, but now show_models will extend weight vector nicely. +    cerr << "Freezing feature set (use --no_freeze_feature_set to change)." << endl; +    FD::Freeze(); +  }    int palg = 1; -  if (LowercaseString(conf["intersection_strategy"].as<string>()) == "full") { +  if (LowercaseString(str("intersection_strategy",conf)) == "full") {      palg = 0;      cerr << "Using full intersection (no pruning).\n";    } @@ -426,17 +463,17 @@ int main(int argc, char** argv) {    const bool minimal_forests = conf.count("minimal_forests");    const bool graphviz = conf.count("graphviz");    const bool joshua_viz = conf.count("show_joshua_visualization"); -  const bool encode_b64 = conf["vector_format"].as<string>() == "b64"; +  const bool encode_b64 = str("vector_format",conf) == "b64";    const bool kbest = conf.count("k_best");    const bool unique_kbest = conf.count("unique_k_best");    const bool crf_uniform_empirical = conf.count("crf_uniform_empirical");    shared_ptr<WriteFile> extract_file;    if (conf.count("extract_rules")) -    extract_file.reset(new WriteFile(conf["extract_rules"].as<string>())); +    extract_file.reset(new WriteFile(str("extract_rules",conf)));    int combine_size = conf["combine_size"].as<int>();    if (combine_size < 1) combine_size = 1; -  const string input = conf["input"].as<string>(); +    const string input = str("input",conf);    cerr << "Reading input from " << ((input == "-") ? "STDIN" : input.c_str()) << endl;    ReadFile in_read(input);    istream *in = in_read.stream(); @@ -506,7 +543,6 @@ int main(int argc, char** argv) {        ExtractRulesDedupe(forest, extract_file->stream());      if (has_prelm_models) { -      ModelSet prelm_models(prelm_feature_weights, prelm_ffs);        Timer t("prelm rescoring");        forest.Reweight(prelm_feature_weights);        forest.SortInEdgesByEdgeWeights(); @@ -544,7 +580,7 @@ int main(int argc, char** argv) {      maybe_prune(forest,conf,"beam_prune","density_prune","+LM",srclen);      if (conf.count("forest_output") && !has_ref) { -      ForestWriter writer(conf["forest_output"].as<string>(), sent_id); +      ForestWriter writer(str("forest_output",conf), sent_id);        if (FileExists(writer.fname_)) {          cerr << "  Unioning...\n";          Hypergraph new_hg; @@ -621,7 +657,7 @@ int main(int argc, char** argv) {          }          //DumpKBest(sent_id, forest, 1000);          if (conf.count("forest_output")) { -          ForestWriter writer(conf["forest_output"].as<string>(), sent_id); +          ForestWriter writer(str("forest_output",conf), sent_id);            if (FileExists(writer.fname_)) {              cerr << "  Unioning...\n";              Hypergraph new_hg; diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc index 8cf2f2fd..077956a8 100644 --- a/decoder/cdec_ff.cc +++ b/decoder/cdec_ff.cc @@ -12,13 +12,14 @@ boost::shared_ptr<FFRegistry> global_ff_registry;  void register_feature_functions() {    global_ff_registry->Register(new FFFactory<LanguageModel>); -  //TODO: define usage(false,false) for each of the below + +  //TODO: use for all features the new Register which requires usage(...)  #ifdef HAVE_RANDLM    global_ff_registry->Register("RandLM", new FFFactory<LanguageModelRandLM>);  #endif -  global_ff_registry->Register("WordPenalty", new FFFactory<WordPenalty>); -  global_ff_registry->Register("SourceWordPenalty", new FFFactory<SourceWordPenalty>); -  global_ff_registry->Register("ArityPenalty", new FFFactory<ArityPenalty>); +  global_ff_registry->Register(new FFFactory<WordPenalty>); +  global_ff_registry->Register(new FFFactory<SourceWordPenalty>); +  global_ff_registry->Register(new FFFactory<ArityPenalty>);    global_ff_registry->Register("RuleShape", new FFFactory<RuleShapeFeatures>);    global_ff_registry->Register("RelativeSentencePosition", new FFFactory<RelativeSentencePosition>);    global_ff_registry->Register("Model2BinaryFeatures", new FFFactory<Model2BinaryFeatures>); diff --git a/decoder/ff.cc b/decoder/ff.cc index 73dbbdc9..3f433dfb 100644 --- a/decoder/ff.cc +++ b/decoder/ff.cc @@ -29,6 +29,55 @@ string FeatureFunction::usage_helper(std::string const& name,std::string const&    return r;  } +FeatureFunction::Features FeatureFunction::single_feature(WordID feat) { +  return Features(1,feat); +} + +FeatureFunction::Features ModelSet::all_features(std::ostream *warn) { +  typedef FeatureFunction::Features FFS; +  FFS ffs; +#define WARNFF(x) do { if (warn) { *warn << "WARNING: "<< x ; *warn<<endl; } } while(0) +  typedef std::map<WordID,string> FFM; +  FFM ff_from; +  for (unsigned i=0;i<models_.size();++i) { +    FeatureFunction const& ff=*models_[i]; +    string const& ffname=ff.name; +    FFS si=ff.features(); +    if (si.empty()) { +      WARNFF(ffname<<" doesn't yet report any feature IDs - implement features() method?"); +    } +    for (unsigned j=0;j<si.size();++j) { +      WordID fid=si[j]; +      if (fid >= weights_.size()) +        weights_.resize(fid+1); +      pair<FFM::iterator,bool> i_new=ff_from.insert(FFM::value_type(fid,ffname)); +      if (i_new.second) +        ffs.push_back(fid); +      else { +        WARNFF(ffname<<" models["<<i<<"] tried to define feature "<<FD::Convert(fid)<<" already defined earlier by "<<i_new.first->second); +      } +    } +  } +  return ffs; +#undef WARNFF +} + +void ModelSet::show_features(std::ostream &out,std::ostream &warn,bool warn_zero_wt) +{ +  typedef FeatureFunction::Features FFS; +  FFS ffs=all_features(&warn); +  out << "Weight  Feature\n"; +  for (unsigned i=0;i<ffs.size();++i) { +    WordID fid=ffs[i]; +    string const& fname=FD::Convert(fid); +    double wt=weights_[fid]; +    if (warn_zero_wt && wt==0) +      warn<<"WARNING: "<<fname<<" has 0 weight."<<endl; +    out << wt << "  " << fname<<endl; +  } + +} +  // Hiero and Joshua use log_10(e) as the value, so I do to  WordPenalty::WordPenalty(const string& param) :      fid_(FD::Convert("WordPenalty")), @@ -59,6 +108,15 @@ SourceWordPenalty::SourceWordPenalty(const string& param) :    }  } +FeatureFunction::Features SourceWordPenalty::features() const { +  return single_feature(fid_); +} + +FeatureFunction::Features WordPenalty::features() const { +  return single_feature(fid_); +} + +  void SourceWordPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta,                                          const Hypergraph::Edge& edge,                                          const std::vector<const void*>& ant_states, @@ -75,12 +133,16 @@ void SourceWordPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta,  ArityPenalty::ArityPenalty(const std::string& /* param */) :      value_(-1.0 / log(10)) {    string fname = "Arity_X"; -  for (int i = 0; i < 10; ++i) { +  for (int i = 0; i < N_ARITIES; ++i) {      fname[6]=i + '0';      fids_[i] = FD::Convert(fname);    }  } +FeatureFunction::Features ArityPenalty::features() const { +  return Features(&fids_[0],&fids_[N_ARITIES]); +} +  void ArityPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta,                                           const Hypergraph::Edge& edge,                                           const std::vector<const void*>& ant_states, diff --git a/decoder/ff.h b/decoder/ff.h index c6c9cf8f..6f8b8626 100644 --- a/decoder/ff.h +++ b/decoder/ff.h @@ -15,6 +15,7 @@ class FeatureFunction;  // see definition below  // FinalTraversalFeatures(...)  class FeatureFunction {   public: +  std::string name; // set by FF factory using usage()    FeatureFunction() : state_size_() {}    explicit FeatureFunction(int state_size) : state_size_(state_size) {}    virtual ~FeatureFunction(); @@ -24,12 +25,14 @@ class FeatureFunction {      return usage_helper("FIXME_feature_needs_name","[no parameters]","[no documentation yet]",show_params,show_details);    } -  static std::string usage_helper(std::string const& name,std::string const& params,std::string const& details,bool show_params,bool show_details); +  typedef std::vector<WordID> Features; // set of features ids +protected: +  static std::string usage_helper(std::string const& name,std::string const& params,std::string const& details,bool show_params,bool show_details); +  static Features single_feature(WordID feat);  public: -  typedef std::vector<WordID> Features; -  virtual Features features() { return Features(); } +  virtual Features features() const { return Features(); }    // returns the number of bytes of context that this feature function will    // (maximally) use.  By default, 0 ("stateless" models in Hiero/Joshua).    // NOTE: this value is fixed for the instance of your class, you cannot @@ -87,7 +90,11 @@ public:  // add value_  class WordPenalty : public FeatureFunction {   public: +  Features features() const;    WordPenalty(const std::string& param); +  static std::string usage(bool p,bool d) { +    return usage_helper("WordPenalty","","number of target words (local feature)",p,d); +  }   protected:    virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,                                       const Hypergraph::Edge& edge, @@ -102,7 +109,11 @@ class WordPenalty : public FeatureFunction {  class SourceWordPenalty : public FeatureFunction {   public: +  Features features() const;    SourceWordPenalty(const std::string& param); +  static std::string usage(bool p,bool d) { +    return usage_helper("SourceWordPenalty","","number of source words (local feature, and meaningless except when input has non-constant number of source words, e.g. segmentation/morphology/speech recognition lattice)",p,d); +  }   protected:    virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,                                       const Hypergraph::Edge& edge, @@ -117,7 +128,12 @@ class SourceWordPenalty : public FeatureFunction {  class ArityPenalty : public FeatureFunction {   public: +  Features features() const;    ArityPenalty(const std::string& param); +  static std::string usage(bool p,bool d) { +    return usage_helper("ArityPenalty","","Indicator feature Arity_N=1 for rule of arity N (local feature)",p,d); +  } +   protected:    virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,                                       const Hypergraph::Edge& edge, @@ -126,7 +142,10 @@ class ArityPenalty : public FeatureFunction {                                       SparseVector<double>* estimated_features,                                       void* context) const;   private: -  int fids_[10]; +  enum {N_ARITIES=10}; + + +  int fids_[N_ARITIES];    const double value_;  }; @@ -153,6 +172,9 @@ class ModelSet {                          Hypergraph::Edge* edge) const;    bool empty() const { return models_.empty(); } + +  FeatureFunction::Features all_features(std::ostream *warnings=0); // this will warn about duplicate features as well (one function overwrites the feature of another).  also resizes weights_ so it is large enough to hold the (0) weight for the largest reported feature id +  void show_features(std::ostream &out,std::ostream &warn,bool warn_zero_wt=true); //show features and weights   private:    std::vector<const FeatureFunction*> models_;    std::vector<double> weights_; diff --git a/decoder/ff_factory.cc b/decoder/ff_factory.cc index d66cd883..fe733ca5 100644 --- a/decoder/ff_factory.cc +++ b/decoder/ff_factory.cc @@ -28,6 +28,7 @@ shared_ptr<FeatureFunction> FFRegistry::Create(const string& ffname, const strin      cerr << "I don't know how to create feature " << ffname << endl;    } else {      res = it->second->Create(param); +    res->name=ffname;    }    return res;  } diff --git a/decoder/ff_lm.cc b/decoder/ff_lm.cc index 9e6f02b7..0590fa7e 100644 --- a/decoder/ff_lm.cc +++ b/decoder/ff_lm.cc @@ -532,6 +532,10 @@ LanguageModel::LanguageModel(const string& param) {    SetStateSize(LanguageModelImpl::OrderToStateSize(order));  } +FeatureFunction::Features LanguageModel::features() const { +  return single_feature(fid_); +} +  LanguageModel::~LanguageModel() {    delete pimpl_;  } diff --git a/decoder/ff_lm.h b/decoder/ff_lm.h index 5ea41068..935e283c 100644 --- a/decoder/ff_lm.h +++ b/decoder/ff_lm.h @@ -19,6 +19,7 @@ class LanguageModel : public FeatureFunction {                                        SparseVector<double>* features) const;    std::string DebugStateToString(const void* state) const;    static std::string usage(bool param,bool verbose); +  Features features() const;   protected:    virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,                                       const Hypergraph::Edge& edge, | 
