summaryrefslogtreecommitdiff
path: root/decoder
diff options
context:
space:
mode:
authorPatrick Simianer <simianer@cl.uni-heidelberg.de>2012-06-24 16:42:56 +0200
committerPatrick Simianer <simianer@cl.uni-heidelberg.de>2012-06-24 16:42:56 +0200
commitdfd23deb734266fed5096043f5411c03feb2c356 (patch)
treeea8c68149c2b75e46093837a71b7f207810a73d8 /decoder
parentd790e7aea5ffdf3c3e15683fe3d8b2b17a92b62f (diff)
RuleTargetBigramFeatures, parallelize.rb
Diffstat (limited to 'decoder')
-rw-r--r--decoder/cdec_ff.cc3
-rw-r--r--decoder/ff_rules.cc48
-rw-r--r--decoder/ff_rules.h19
3 files changed, 62 insertions, 8 deletions
diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc
index b516c386..d64bdada 100644
--- a/decoder/cdec_ff.cc
+++ b/decoder/cdec_ff.cc
@@ -47,8 +47,9 @@ void register_feature_functions() {
ff_registry.Register("RuleIdentityFeatures", new FFFactory<RuleIdentityFeatures>());
ff_registry.Register("SourceSyntaxFeatures", new FFFactory<SourceSyntaxFeatures>);
ff_registry.Register("SourceSpanSizeFeatures", new FFFactory<SourceSpanSizeFeatures>);
- ff_registry.Register("RuleNgramFeatures", new FFFactory<RuleNgramFeatures>());
ff_registry.Register("CMR2008ReorderingFeatures", new FFFactory<CMR2008ReorderingFeatures>());
+ ff_registry.Register("RuleSourceBigramFeatures", new FFFactory<RuleSourceBigramFeatures>());
+ ff_registry.Register("RuleTargetBigramFeatures", new FFFactory<RuleTargetBigramFeatures>());
ff_registry.Register("KLanguageModel", new KLanguageModelFactory());
ff_registry.Register("NonLatinCount", new FFFactory<NonLatinCount>);
ff_registry.Register("RuleShape", new FFFactory<RuleShapeFeatures>);
diff --git a/decoder/ff_rules.cc b/decoder/ff_rules.cc
index bd4c4cc0..3d0e514a 100644
--- a/decoder/ff_rules.cc
+++ b/decoder/ff_rules.cc
@@ -66,15 +66,15 @@ void RuleIdentityFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,
features->add_value(it->second, 1);
}
-RuleNgramFeatures::RuleNgramFeatures(const std::string& param) {
+RuleSourceBigramFeatures::RuleSourceBigramFeatures(const std::string& param) {
}
-void RuleNgramFeatures::PrepareForInput(const SentenceMetadata& smeta) {
+void RuleSourceBigramFeatures::PrepareForInput(const SentenceMetadata& smeta) {
// std::map<const TRule*, SparseVector<double> >
rule2_feats_.clear();
}
-void RuleNgramFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,
+void RuleSourceBigramFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,
const Hypergraph::Edge& edge,
const vector<const void*>& ant_contexts,
SparseVector<double>* features,
@@ -92,14 +92,52 @@ void RuleNgramFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,
assert(w > 0);
const string& cur = TD::Convert(w);
ostringstream os;
- os << "RB:" << prev << '_' << cur;
+ os << "RBS:" << prev << '_' << cur;
const int fid = FD::Convert(Escape(os.str()));
if (fid <= 0) return;
f.add_value(fid, 1.0);
prev = cur;
}
ostringstream os;
- os << "RB:" << prev << '_' << "</r>";
+ os << "RBS:" << prev << '_' << "</r>";
+ f.set_value(FD::Convert(Escape(os.str())), 1.0);
+ }
+ (*features) += it->second;
+}
+
+RuleTargetBigramFeatures::RuleTargetBigramFeatures(const std::string& param) {
+}
+
+void RuleTargetBigramFeatures::PrepareForInput(const SentenceMetadata& smeta) {
+ rule2_feats_.clear();
+}
+
+void RuleTargetBigramFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,
+ const Hypergraph::Edge& edge,
+ const vector<const void*>& ant_contexts,
+ SparseVector<double>* features,
+ SparseVector<double>* estimated_features,
+ void* context) const {
+ map<const TRule*, SparseVector<double> >::iterator it = rule2_feats_.find(edge.rule_.get());
+ if (it == rule2_feats_.end()) {
+ const TRule& rule = *edge.rule_;
+ it = rule2_feats_.insert(make_pair(&rule, SparseVector<double>())).first;
+ SparseVector<double>& f = it->second;
+ string prev = "<r>";
+ for (int i = 0; i < rule.e_.size(); ++i) {
+ WordID w = rule.e_[i];
+ if (w < 0) w = -w;
+ if (w == 0) return;
+ const string& cur = TD::Convert(w);
+ ostringstream os;
+ os << "RBT:" << prev << '_' << cur;
+ const int fid = FD::Convert(Escape(os.str()));
+ if (fid <= 0) return;
+ f.add_value(fid, 1.0);
+ prev = cur;
+ }
+ ostringstream os;
+ os << "RBT:" << prev << '_' << "</r>";
f.set_value(FD::Convert(Escape(os.str())), 1.0);
}
(*features) += it->second;
diff --git a/decoder/ff_rules.h b/decoder/ff_rules.h
index 48d8bd05..08b168b0 100644
--- a/decoder/ff_rules.h
+++ b/decoder/ff_rules.h
@@ -22,9 +22,24 @@ class RuleIdentityFeatures : public FeatureFunction {
mutable std::map<const TRule*, int> rule2_fid_;
};
-class RuleNgramFeatures : public FeatureFunction {
+class RuleSourceBigramFeatures : public FeatureFunction {
public:
- RuleNgramFeatures(const std::string& param);
+ RuleSourceBigramFeatures(const std::string& param);
+ protected:
+ virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
+ const Hypergraph::Edge& edge,
+ const std::vector<const void*>& ant_contexts,
+ SparseVector<double>* features,
+ SparseVector<double>* estimated_features,
+ void* context) const;
+ virtual void PrepareForInput(const SentenceMetadata& smeta);
+ private:
+ mutable std::map<const TRule*, SparseVector<double> > rule2_feats_;
+};
+
+class RuleTargetBigramFeatures : public FeatureFunction {
+ public:
+ RuleTargetBigramFeatures(const std::string& param);
protected:
virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
const Hypergraph::Edge& edge,