summaryrefslogtreecommitdiff
path: root/extools/featurize_grammar.cc
diff options
context:
space:
mode:
authorolivia.buzek <olivia.buzek@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-08 21:59:50 +0000
committerolivia.buzek <olivia.buzek@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-08 21:59:50 +0000
commitc12e7241e8908def96943b1a4056e536ea91eded (patch)
treec24b9cf0d2a90239b01eb6432e683292c95bb06f /extools/featurize_grammar.cc
parenta034f92b1fe0c6368ebb140bc691f0718dd23a23 (diff)
Adding backoff grammar and BackoffRule feature.
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@191 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'extools/featurize_grammar.cc')
-rw-r--r--extools/featurize_grammar.cc17
1 files changed, 17 insertions, 0 deletions
diff --git a/extools/featurize_grammar.cc b/extools/featurize_grammar.cc
index 0d054626..b387fe04 100644
--- a/extools/featurize_grammar.cc
+++ b/extools/featurize_grammar.cc
@@ -385,6 +385,22 @@ struct LogRuleCount : public FeatureExtractor {
const int kCFE;
};
+struct BackoffRule : public FeatureExtractor {
+ BackoffRule() :
+ fid_(FD::Convert("BackoffRule")) {}
+ virtual void ExtractFeatures(const WordID lhs,
+ const vector<WordID>& src,
+ const vector<WordID>& trg,
+ const RuleStatistics& info,
+ SparseVector<float>* result) const {
+ (void) lhs; (void) src; (void) trg;
+ string lhstr = TD::Convert(lhs);
+ if(lhstr.find('_')!=string::npos)
+ result->set_value(fid_, -1);
+ }
+ const int fid_;
+};
+
// The negative log of the condition rule probs
// ignoring the identities of the non-terminals.
// i.e. the prob Hiero would assign.
@@ -656,6 +672,7 @@ int main(int argc, char** argv){
reg.Register("LexProb", new FEFactory<LexProbExtractor>);
reg.Register("XFeatures", new FEFactory<XFeatures>);
reg.Register("LabelledRuleConditionals", new FEFactory<LabelledRuleConditionals>);
+ reg.Register("BackoffRule", new FEFactory<BackoffRule>);
po::variables_map conf;
InitCommandLine(reg, argc, argv, &conf);
aligned_corpus = conf["aligned_corpus"].as<string>(); // GLOBAL VAR