summaryrefslogtreecommitdiff
path: root/decoder
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cab.ark.cs.cmu.edu>2012-11-06 00:02:58 -0500
committerChris Dyer <cdyer@cab.ark.cs.cmu.edu>2012-11-06 00:02:58 -0500
commitd8d8dad6e63306b3c8e326b22fcfdf90856bb85e (patch)
treeb4dc354bdb2080e110a8f9067d108cbacab5b3b8 /decoder
parent552793bbd50f634ea755b84d47ddcc6cd4f158f2 (diff)
parent9e5107a05bfabb76ce547d2849173c5a11aeba60 (diff)
Merge branch 'master' of github.com:redpony/cdec
Diffstat (limited to 'decoder')
-rw-r--r--decoder/cdec_ff.cc3
-rw-r--r--decoder/ff_rules.cc48
-rw-r--r--decoder/ff_rules.h20
3 files changed, 63 insertions, 8 deletions
diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc
index 99ab7473..3ab0f9f6 100644
--- a/decoder/cdec_ff.cc
+++ b/decoder/cdec_ff.cc
@@ -49,8 +49,9 @@ void register_feature_functions() {
ff_registry.Register("RuleIdentityFeatures", new FFFactory<RuleIdentityFeatures>());
ff_registry.Register("SourceSyntaxFeatures", new FFFactory<SourceSyntaxFeatures>);
ff_registry.Register("SourceSpanSizeFeatures", new FFFactory<SourceSpanSizeFeatures>);
- ff_registry.Register("RuleNgramFeatures", new FFFactory<RuleNgramFeatures>());
ff_registry.Register("CMR2008ReorderingFeatures", new FFFactory<CMR2008ReorderingFeatures>());
+ ff_registry.Register("RuleSourceBigramFeatures", new FFFactory<RuleSourceBigramFeatures>());
+ ff_registry.Register("RuleTargetBigramFeatures", new FFFactory<RuleTargetBigramFeatures>());
ff_registry.Register("KLanguageModel", new KLanguageModelFactory());
ff_registry.Register("NonLatinCount", new FFFactory<NonLatinCount>);
ff_registry.Register("RuleShape", new FFFactory<RuleShapeFeatures>);
diff --git a/decoder/ff_rules.cc b/decoder/ff_rules.cc
index 0aafb0ba..6716d3da 100644
--- a/decoder/ff_rules.cc
+++ b/decoder/ff_rules.cc
@@ -68,15 +68,15 @@ void RuleIdentityFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,
features->add_value(it->second, 1);
}
-RuleNgramFeatures::RuleNgramFeatures(const std::string& param) {
+RuleSourceBigramFeatures::RuleSourceBigramFeatures(const std::string& param) {
}
-void RuleNgramFeatures::PrepareForInput(const SentenceMetadata& smeta) {
+void RuleSourceBigramFeatures::PrepareForInput(const SentenceMetadata& smeta) {
// std::map<const TRule*, SparseVector<double> >
rule2_feats_.clear();
}
-void RuleNgramFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,
+void RuleSourceBigramFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,
const Hypergraph::Edge& edge,
const vector<const void*>& ant_contexts,
SparseVector<double>* features,
@@ -94,14 +94,52 @@ void RuleNgramFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,
assert(w > 0);
const string& cur = TD::Convert(w);
ostringstream os;
- os << "RB:" << prev << '_' << cur;
+ os << "RBS:" << prev << '_' << cur;
const int fid = FD::Convert(Escape(os.str()));
if (fid <= 0) return;
f.add_value(fid, 1.0);
prev = cur;
}
ostringstream os;
- os << "RB:" << prev << '_' << "</r>";
+ os << "RBS:" << prev << '_' << "</r>";
+ f.set_value(FD::Convert(Escape(os.str())), 1.0);
+ }
+ (*features) += it->second;
+}
+
+RuleTargetBigramFeatures::RuleTargetBigramFeatures(const std::string& param) {
+}
+
+void RuleTargetBigramFeatures::PrepareForInput(const SentenceMetadata& smeta) {
+ rule2_feats_.clear();
+}
+
+void RuleTargetBigramFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,
+ const Hypergraph::Edge& edge,
+ const vector<const void*>& ant_contexts,
+ SparseVector<double>* features,
+ SparseVector<double>* estimated_features,
+ void* context) const {
+ map<const TRule*, SparseVector<double> >::iterator it = rule2_feats_.find(edge.rule_.get());
+ if (it == rule2_feats_.end()) {
+ const TRule& rule = *edge.rule_;
+ it = rule2_feats_.insert(make_pair(&rule, SparseVector<double>())).first;
+ SparseVector<double>& f = it->second;
+ string prev = "<r>";
+ for (int i = 0; i < rule.e_.size(); ++i) {
+ WordID w = rule.e_[i];
+ if (w < 0) w = -w;
+ if (w == 0) return;
+ const string& cur = TD::Convert(w);
+ ostringstream os;
+ os << "RBT:" << prev << '_' << cur;
+ const int fid = FD::Convert(Escape(os.str()));
+ if (fid <= 0) return;
+ f.add_value(fid, 1.0);
+ prev = cur;
+ }
+ ostringstream os;
+ os << "RBT:" << prev << '_' << "</r>";
f.set_value(FD::Convert(Escape(os.str())), 1.0);
}
(*features) += it->second;
diff --git a/decoder/ff_rules.h b/decoder/ff_rules.h
index 7f5e1dfa..b100ec34 100644
--- a/decoder/ff_rules.h
+++ b/decoder/ff_rules.h
@@ -5,6 +5,7 @@
#include <map>
#include "trule.h"
#include "ff.h"
+#include "hg.h"
#include "array2d.h"
#include "wordid.h"
@@ -23,9 +24,24 @@ class RuleIdentityFeatures : public FeatureFunction {
mutable std::map<const TRule*, int> rule2_fid_;
};
-class RuleNgramFeatures : public FeatureFunction {
+class RuleSourceBigramFeatures : public FeatureFunction {
public:
- RuleNgramFeatures(const std::string& param);
+ RuleSourceBigramFeatures(const std::string& param);
+ protected:
+ virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
+ const Hypergraph::Edge& edge,
+ const std::vector<const void*>& ant_contexts,
+ SparseVector<double>* features,
+ SparseVector<double>* estimated_features,
+ void* context) const;
+ virtual void PrepareForInput(const SentenceMetadata& smeta);
+ private:
+ mutable std::map<const TRule*, SparseVector<double> > rule2_feats_;
+};
+
+class RuleTargetBigramFeatures : public FeatureFunction {
+ public:
+ RuleTargetBigramFeatures(const std::string& param);
protected:
virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
const HG::Edge& edge,