summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2011-09-09 10:15:56 +0200
committerChris Dyer <cdyer@cs.cmu.edu>2011-09-09 10:15:56 +0200
commit4b6222733c4f9dcb3516bcc64394fa8b4716ce48 (patch)
treeb40ed574fa6abd9b4aa79a84410ec6a8fe04a7da
parent673949e9e7bb03e88ebc16e181634335aed96f2b (diff)
rule feature refactoring
-rw-r--r--decoder/Makefile.am1
-rw-r--r--decoder/cdec_ff.cc2
-rw-r--r--decoder/ff_rules.cc107
-rw-r--r--decoder/ff_rules.h40
-rw-r--r--decoder/ff_spans.cc39
-rw-r--r--decoder/ff_spans.h15
6 files changed, 150 insertions, 54 deletions
diff --git a/decoder/Makefile.am b/decoder/Makefile.am
index d884c431..e5f7505f 100644
--- a/decoder/Makefile.am
+++ b/decoder/Makefile.am
@@ -61,6 +61,7 @@ libcdec_a_SOURCES = \
phrasetable_fst.cc \
trule.cc \
ff.cc \
+ ff_rules.cc \
ff_wordset.cc \
ff_charset.cc \
ff_lm.cc \
diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc
index 1ef76a05..588842f1 100644
--- a/decoder/cdec_ff.cc
+++ b/decoder/cdec_ff.cc
@@ -9,6 +9,7 @@
#include "ff_wordalign.h"
#include "ff_tagger.h"
#include "ff_factory.h"
+#include "ff_rules.h"
#include "ff_ruleshape.h"
#include "ff_bleu.h"
#include "ff_lm_fsa.h"
@@ -53,6 +54,7 @@ void register_feature_functions() {
#endif
ff_registry.Register("SpanFeatures", new FFFactory<SpanFeatures>());
ff_registry.Register("NgramFeatures", new FFFactory<NgramDetector>());
+ ff_registry.Register("RuleIdentityFeatures", new FFFactory<RuleIdentityFeatures>());
ff_registry.Register("RuleNgramFeatures", new FFFactory<RuleNgramFeatures>());
ff_registry.Register("CMR2008ReorderingFeatures", new FFFactory<CMR2008ReorderingFeatures>());
ff_registry.Register("KLanguageModel", new KLanguageModelFactory());
diff --git a/decoder/ff_rules.cc b/decoder/ff_rules.cc
new file mode 100644
index 00000000..bd4c4cc0
--- /dev/null
+++ b/decoder/ff_rules.cc
@@ -0,0 +1,107 @@
+#include "ff_rules.h"
+
+#include <sstream>
+#include <cassert>
+#include <cmath>
+
+#include "filelib.h"
+#include "stringlib.h"
+#include "sentence_metadata.h"
+#include "lattice.h"
+#include "fdict.h"
+#include "verbose.h"
+
+using namespace std;
+
+namespace {
+ string Escape(const string& x) {
+ string y = x;
+ for (int i = 0; i < y.size(); ++i) {
+ if (y[i] == '=') y[i]='_';
+ if (y[i] == ';') y[i]='_';
+ }
+ return y;
+ }
+}
+
+RuleIdentityFeatures::RuleIdentityFeatures(const std::string& param) {
+}
+
+void RuleIdentityFeatures::PrepareForInput(const SentenceMetadata& smeta) {
+// std::map<const TRule*, SparseVector<double> >
+ rule2_fid_.clear();
+}
+
+void RuleIdentityFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,
+ const Hypergraph::Edge& edge,
+ const vector<const void*>& ant_contexts,
+ SparseVector<double>* features,
+ SparseVector<double>* estimated_features,
+ void* context) const {
+ map<const TRule*, int>::iterator it = rule2_fid_.find(edge.rule_.get());
+ if (it == rule2_fid_.end()) {
+ const TRule& rule = *edge.rule_;
+ ostringstream os;
+ os << "R:";
+ if (rule.lhs_ < 0) os << TD::Convert(-rule.lhs_) << ':';
+ for (unsigned i = 0; i < rule.f_.size(); ++i) {
+ if (i > 0) os << '_';
+ WordID w = rule.f_[i];
+ if (w < 0) { os << 'N'; w = -w; }
+ assert(w > 0);
+ os << TD::Convert(w);
+ }
+ os << ':';
+ for (unsigned i = 0; i < rule.e_.size(); ++i) {
+ if (i > 0) os << '_';
+ WordID w = rule.e_[i];
+ if (w <= 0) {
+ os << 'N' << (1-w);
+ } else {
+ os << TD::Convert(w);
+ }
+ }
+ it = rule2_fid_.insert(make_pair(&rule, FD::Convert(Escape(os.str())))).first;
+ }
+ features->add_value(it->second, 1);
+}
+
+RuleNgramFeatures::RuleNgramFeatures(const std::string& param) {
+}
+
+void RuleNgramFeatures::PrepareForInput(const SentenceMetadata& smeta) {
+// std::map<const TRule*, SparseVector<double> >
+ rule2_feats_.clear();
+}
+
+void RuleNgramFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,
+ const Hypergraph::Edge& edge,
+ const vector<const void*>& ant_contexts,
+ SparseVector<double>* features,
+ SparseVector<double>* estimated_features,
+ void* context) const {
+ map<const TRule*, SparseVector<double> >::iterator it = rule2_feats_.find(edge.rule_.get());
+ if (it == rule2_feats_.end()) {
+ const TRule& rule = *edge.rule_;
+ it = rule2_feats_.insert(make_pair(&rule, SparseVector<double>())).first;
+ SparseVector<double>& f = it->second;
+ string prev = "<r>";
+ for (int i = 0; i < rule.f_.size(); ++i) {
+ WordID w = rule.f_[i];
+ if (w < 0) w = -w;
+ assert(w > 0);
+ const string& cur = TD::Convert(w);
+ ostringstream os;
+ os << "RB:" << prev << '_' << cur;
+ const int fid = FD::Convert(Escape(os.str()));
+ if (fid <= 0) return;
+ f.add_value(fid, 1.0);
+ prev = cur;
+ }
+ ostringstream os;
+ os << "RB:" << prev << '_' << "</r>";
+ f.set_value(FD::Convert(Escape(os.str())), 1.0);
+ }
+ (*features) += it->second;
+}
+
diff --git a/decoder/ff_rules.h b/decoder/ff_rules.h
new file mode 100644
index 00000000..48d8bd05
--- /dev/null
+++ b/decoder/ff_rules.h
@@ -0,0 +1,40 @@
+#ifndef _FF_RULES_H_
+#define _FF_RULES_H_
+
+#include <vector>
+#include <map>
+#include "ff.h"
+#include "array2d.h"
+#include "wordid.h"
+
+class RuleIdentityFeatures : public FeatureFunction {
+ public:
+ RuleIdentityFeatures(const std::string& param);
+ protected:
+ virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
+ const Hypergraph::Edge& edge,
+ const std::vector<const void*>& ant_contexts,
+ SparseVector<double>* features,
+ SparseVector<double>* estimated_features,
+ void* context) const;
+ virtual void PrepareForInput(const SentenceMetadata& smeta);
+ private:
+ mutable std::map<const TRule*, int> rule2_fid_;
+};
+
+class RuleNgramFeatures : public FeatureFunction {
+ public:
+ RuleNgramFeatures(const std::string& param);
+ protected:
+ virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
+ const Hypergraph::Edge& edge,
+ const std::vector<const void*>& ant_contexts,
+ SparseVector<double>* features,
+ SparseVector<double>* estimated_features,
+ void* context) const;
+ virtual void PrepareForInput(const SentenceMetadata& smeta);
+ private:
+ mutable std::map<const TRule*, SparseVector<double> > rule2_feats_;
+};
+
+#endif
diff --git a/decoder/ff_spans.cc b/decoder/ff_spans.cc
index bc23974d..0483517b 100644
--- a/decoder/ff_spans.cc
+++ b/decoder/ff_spans.cc
@@ -193,45 +193,6 @@ void SpanFeatures::PrepareForInput(const SentenceMetadata& smeta) {
}
}
-RuleNgramFeatures::RuleNgramFeatures(const std::string& param) {
-}
-
-void RuleNgramFeatures::PrepareForInput(const SentenceMetadata& smeta) {
-// std::map<const TRule*, SparseVector<double> >
- rule2_feats_.clear();
-}
-
-void RuleNgramFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,
- const Hypergraph::Edge& edge,
- const vector<const void*>& ant_contexts,
- SparseVector<double>* features,
- SparseVector<double>* estimated_features,
- void* context) const {
- map<const TRule*, SparseVector<double> >::iterator it = rule2_feats_.find(edge.rule_.get());
- if (it == rule2_feats_.end()) {
- const TRule& rule = *edge.rule_;
- it = rule2_feats_.insert(make_pair(&rule, SparseVector<double>())).first;
- SparseVector<double>& f = it->second;
- string prev = "<r>";
- for (int i = 0; i < rule.f_.size(); ++i) {
- WordID w = rule.f_[i];
- if (w < 0) w = -w;
- assert(w > 0);
- const string& cur = TD::Convert(w);
- ostringstream os;
- os << "RB:" << prev << '_' << cur;
- const int fid = FD::Convert(Escape(os.str()));
- if (fid <= 0) return;
- f.add_value(fid, 1.0);
- prev = cur;
- }
- ostringstream os;
- os << "RB:" << prev << '_' << "</r>";
- f.set_value(FD::Convert(Escape(os.str())), 1.0);
- }
- (*features) += it->second;
-}
-
inline bool IsArity2RuleReordered(const TRule& rule) {
const vector<WordID>& e = rule.e_;
for (int i = 0; i < e.size(); ++i) {
diff --git a/decoder/ff_spans.h b/decoder/ff_spans.h
index b22c4d03..24e0dede 100644
--- a/decoder/ff_spans.h
+++ b/decoder/ff_spans.h
@@ -44,21 +44,6 @@ class SpanFeatures : public FeatureFunction {
WordID oov_;
};
-class RuleNgramFeatures : public FeatureFunction {
- public:
- RuleNgramFeatures(const std::string& param);
- protected:
- virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
- const Hypergraph::Edge& edge,
- const std::vector<const void*>& ant_contexts,
- SparseVector<double>* features,
- SparseVector<double>* estimated_features,
- void* context) const;
- virtual void PrepareForInput(const SentenceMetadata& smeta);
- private:
- mutable std::map<const TRule*, SparseVector<double> > rule2_feats_;
-};
-
class CMR2008ReorderingFeatures : public FeatureFunction {
public:
CMR2008ReorderingFeatures(const std::string& param);