diff options
Diffstat (limited to 'decoder')
-rw-r--r-- | decoder/cdec_ff.cc | 3 | ||||
-rw-r--r-- | decoder/ff_rules.cc | 48 | ||||
-rw-r--r-- | decoder/ff_rules.h | 20 |
3 files changed, 63 insertions, 8 deletions
diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc index 99ab7473..3ab0f9f6 100644 --- a/decoder/cdec_ff.cc +++ b/decoder/cdec_ff.cc @@ -49,8 +49,9 @@ void register_feature_functions() { ff_registry.Register("RuleIdentityFeatures", new FFFactory<RuleIdentityFeatures>()); ff_registry.Register("SourceSyntaxFeatures", new FFFactory<SourceSyntaxFeatures>); ff_registry.Register("SourceSpanSizeFeatures", new FFFactory<SourceSpanSizeFeatures>); - ff_registry.Register("RuleNgramFeatures", new FFFactory<RuleNgramFeatures>()); ff_registry.Register("CMR2008ReorderingFeatures", new FFFactory<CMR2008ReorderingFeatures>()); + ff_registry.Register("RuleSourceBigramFeatures", new FFFactory<RuleSourceBigramFeatures>()); + ff_registry.Register("RuleTargetBigramFeatures", new FFFactory<RuleTargetBigramFeatures>()); ff_registry.Register("KLanguageModel", new KLanguageModelFactory()); ff_registry.Register("NonLatinCount", new FFFactory<NonLatinCount>); ff_registry.Register("RuleShape", new FFFactory<RuleShapeFeatures>); diff --git a/decoder/ff_rules.cc b/decoder/ff_rules.cc index 0aafb0ba..6716d3da 100644 --- a/decoder/ff_rules.cc +++ b/decoder/ff_rules.cc @@ -68,15 +68,15 @@ void RuleIdentityFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta, features->add_value(it->second, 1); } -RuleNgramFeatures::RuleNgramFeatures(const std::string& param) { +RuleSourceBigramFeatures::RuleSourceBigramFeatures(const std::string& param) { } -void RuleNgramFeatures::PrepareForInput(const SentenceMetadata& smeta) { +void RuleSourceBigramFeatures::PrepareForInput(const SentenceMetadata& smeta) { // std::map<const TRule*, SparseVector<double> > rule2_feats_.clear(); } -void RuleNgramFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta, +void RuleSourceBigramFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta, const Hypergraph::Edge& edge, const vector<const void*>& ant_contexts, SparseVector<double>* features, @@ -94,14 +94,52 @@ void RuleNgramFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta, assert(w > 0); const string& cur = TD::Convert(w); ostringstream os; - os << "RB:" << prev << '_' << cur; + os << "RBS:" << prev << '_' << cur; const int fid = FD::Convert(Escape(os.str())); if (fid <= 0) return; f.add_value(fid, 1.0); prev = cur; } ostringstream os; - os << "RB:" << prev << '_' << "</r>"; + os << "RBS:" << prev << '_' << "</r>"; + f.set_value(FD::Convert(Escape(os.str())), 1.0); + } + (*features) += it->second; +} + +RuleTargetBigramFeatures::RuleTargetBigramFeatures(const std::string& param) { +} + +void RuleTargetBigramFeatures::PrepareForInput(const SentenceMetadata& smeta) { + rule2_feats_.clear(); +} + +void RuleTargetBigramFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta, + const Hypergraph::Edge& edge, + const vector<const void*>& ant_contexts, + SparseVector<double>* features, + SparseVector<double>* estimated_features, + void* context) const { + map<const TRule*, SparseVector<double> >::iterator it = rule2_feats_.find(edge.rule_.get()); + if (it == rule2_feats_.end()) { + const TRule& rule = *edge.rule_; + it = rule2_feats_.insert(make_pair(&rule, SparseVector<double>())).first; + SparseVector<double>& f = it->second; + string prev = "<r>"; + for (int i = 0; i < rule.e_.size(); ++i) { + WordID w = rule.e_[i]; + if (w < 0) w = -w; + if (w == 0) return; + const string& cur = TD::Convert(w); + ostringstream os; + os << "RBT:" << prev << '_' << cur; + const int fid = FD::Convert(Escape(os.str())); + if (fid <= 0) return; + f.add_value(fid, 1.0); + prev = cur; + } + ostringstream os; + os << "RBT:" << prev << '_' << "</r>"; f.set_value(FD::Convert(Escape(os.str())), 1.0); } (*features) += it->second; diff --git a/decoder/ff_rules.h b/decoder/ff_rules.h index 7f5e1dfa..b100ec34 100644 --- a/decoder/ff_rules.h +++ b/decoder/ff_rules.h @@ -5,6 +5,7 @@ #include <map> #include "trule.h" #include "ff.h" +#include "hg.h" #include "array2d.h" #include "wordid.h" @@ -23,9 +24,24 @@ class RuleIdentityFeatures : public FeatureFunction { mutable std::map<const TRule*, int> rule2_fid_; }; -class RuleNgramFeatures : public FeatureFunction { +class RuleSourceBigramFeatures : public FeatureFunction { public: - RuleNgramFeatures(const std::string& param); + RuleSourceBigramFeatures(const std::string& param); + protected: + virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, + const Hypergraph::Edge& edge, + const std::vector<const void*>& ant_contexts, + SparseVector<double>* features, + SparseVector<double>* estimated_features, + void* context) const; + virtual void PrepareForInput(const SentenceMetadata& smeta); + private: + mutable std::map<const TRule*, SparseVector<double> > rule2_feats_; +}; + +class RuleTargetBigramFeatures : public FeatureFunction { + public: + RuleTargetBigramFeatures(const std::string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, const HG::Edge& edge, |