summaryrefslogtreecommitdiff
path: root/extools/featurize_grammar.cc
diff options
context:
space:
mode:
Diffstat (limited to 'extools/featurize_grammar.cc')
-rw-r--r--extools/featurize_grammar.cc17
1 files changed, 17 insertions, 0 deletions
diff --git a/extools/featurize_grammar.cc b/extools/featurize_grammar.cc
index 0d054626..b387fe04 100644
--- a/extools/featurize_grammar.cc
+++ b/extools/featurize_grammar.cc
@@ -385,6 +385,22 @@ struct LogRuleCount : public FeatureExtractor {
const int kCFE;
};
+struct BackoffRule : public FeatureExtractor {
+ BackoffRule() :
+ fid_(FD::Convert("BackoffRule")) {}
+ virtual void ExtractFeatures(const WordID lhs,
+ const vector<WordID>& src,
+ const vector<WordID>& trg,
+ const RuleStatistics& info,
+ SparseVector<float>* result) const {
+ (void) lhs; (void) src; (void) trg;
+ string lhstr = TD::Convert(lhs);
+ if(lhstr.find('_')!=string::npos)
+ result->set_value(fid_, -1);
+ }
+ const int fid_;
+};
+
// The negative log of the condition rule probs
// ignoring the identities of the non-terminals.
// i.e. the prob Hiero would assign.
@@ -656,6 +672,7 @@ int main(int argc, char** argv){
reg.Register("LexProb", new FEFactory<LexProbExtractor>);
reg.Register("XFeatures", new FEFactory<XFeatures>);
reg.Register("LabelledRuleConditionals", new FEFactory<LabelledRuleConditionals>);
+ reg.Register("BackoffRule", new FEFactory<BackoffRule>);
po::variables_map conf;
InitCommandLine(reg, argc, argv, &conf);
aligned_corpus = conf["aligned_corpus"].as<string>(); // GLOBAL VAR