diff options
Diffstat (limited to 'extools')
| -rw-r--r-- | extools/filter_score_grammar.cc | 50 | 
1 files changed, 45 insertions, 5 deletions
| diff --git a/extools/filter_score_grammar.cc b/extools/filter_score_grammar.cc index e1fd714b..f34b240d 100644 --- a/extools/filter_score_grammar.cc +++ b/extools/filter_score_grammar.cc @@ -37,6 +37,8 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) {    opts.add_options()          ("test_set,t", po::value<string>(), "Filter for this test set (not specified = no filtering)")          ("top_e_given_f,n", po::value<size_t>()->default_value(30), "Keep top N rules, according to p(e|f). 0 for all") +        ("hiero_features", "Use 'Hiero' features") +//        ("feature,f", po::value<vector<string> >()->composing(), "List of features to compute")          ("aligned_corpus,c", po::value<string>(), "Aligned corpus (single line format)")          ("help,h", "Print this help message and exit");    po::options_description clo("Command line options"); @@ -245,6 +247,38 @@ struct FeatureExtractor {    const string extractor_name;  }; +struct LogRuleCount : public FeatureExtractor { +  LogRuleCount() : +    FeatureExtractor("LogRuleCount"), +    fid_(FD::Convert("LogRuleCount")), kCFE(FD::Convert("CFE")) {} +  virtual void ExtractFeatures(const vector<WordID>& lhs_src, +                               const vector<WordID>& trg, +                               const RuleStatistics& info, +                               SparseVector<float>* result) const { +    (void) lhs_src; (void) trg; +    result->set_value(fid_, log(info.counts.value(kCFE))); +  } +  const int fid_; +  const int kCFE; +}; + +struct SingletonRule : public FeatureExtractor { +  SingletonRule() : +    FeatureExtractor("SingletonRule"), +    fid_(FD::Convert("SingletonRule")), kCFE(FD::Convert("CFE")) {} +  virtual void ExtractFeatures(const vector<WordID>& lhs_src, +                               const vector<WordID>& trg, +                               const RuleStatistics& info, +                               SparseVector<float>* result) const { +    (void) lhs_src; (void) trg; +    if (info.counts.value(kCFE) > 0.999 && info.counts.value(kCFE) < 1.001) { +      result->set_value(fid_, 1.0); +    } +  } +  const int fid_; +  const int kCFE; +}; +  struct EGivenFExtractor : public FeatureExtractor {    EGivenFExtractor() :      FeatureExtractor("EGivenF"), @@ -403,9 +437,15 @@ int main(int argc, char** argv){    // TODO make this list configurable    vector<boost::shared_ptr<FeatureExtractor> > extractors; -  extractors.push_back(boost::shared_ptr<FeatureExtractor>(new EGivenFExtractor)); -  extractors.push_back(boost::shared_ptr<FeatureExtractor>(new FGivenEExtractor)); -  extractors.push_back(boost::shared_ptr<FeatureExtractor>(new LexProbExtractor(conf["aligned_corpus"].as<string>()))); +  if (conf.count("hiero_features")) { +    extractors.push_back(boost::shared_ptr<FeatureExtractor>(new EGivenFExtractor)); +    extractors.push_back(boost::shared_ptr<FeatureExtractor>(new FGivenEExtractor)); +    extractors.push_back(boost::shared_ptr<FeatureExtractor>(new LexProbExtractor(conf["aligned_corpus"].as<string>()))); +  } else { +    extractors.push_back(boost::shared_ptr<FeatureExtractor>(new LogRuleCount)); +    extractors.push_back(boost::shared_ptr<FeatureExtractor>(new SingletonRule)); +    extractors.push_back(boost::shared_ptr<FeatureExtractor>(new LexProbExtractor(conf["aligned_corpus"].as<string>()))); +  }    //score unscored grammar    cerr <<"Scoring grammar..." << endl; @@ -415,7 +455,7 @@ int main(int argc, char** argv){    vector<WordID> key, cur_key,temp_key;    int line = 0; -  const int kEGivenF = FD::Convert("EGivenF"); +  const int kLogRuleCount = FD::Convert("LogRuleCount");    multimap<float, string> options;     while(!unscored_grammar.eof())      { @@ -436,7 +476,7 @@ int main(int argc, char** argv){             os << TD::GetString(cur_key)                << ' ' << TD::GetString(it->first) << " ||| ";             feats.Write(false, &os); -           options.insert(make_pair(feats.value(kEGivenF), os.str())); +           options.insert(make_pair(-feats.value(kLogRuleCount), os.str()));          }          int ocount = 0;          for (multimap<float,string>::iterator it = options.begin(); it != options.end(); ++it) { | 
