summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2011-02-26 17:21:21 -0500
committerChris Dyer <cdyer@cs.cmu.edu>2011-02-26 17:21:21 -0500
commitc443a0d21004d06cb57b5e7d120180bd0519f827 (patch)
tree6556539b26a8610cccb790b0e16b24803fd91d54
parent27e9c0f30667697dc15263610b97afcc14e23c07 (diff)
chiang, marton, resnik fine reordering features
-rw-r--r--decoder/cdec_ff.cc1
-rw-r--r--decoder/ff_spans.cc62
-rw-r--r--decoder/ff_spans.h25
3 files changed, 88 insertions, 0 deletions
diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc
index 7bcee6b8..7ec54a5a 100644
--- a/decoder/cdec_ff.cc
+++ b/decoder/cdec_ff.cc
@@ -51,6 +51,7 @@ void register_feature_functions() {
ff_registry.Register("RandLM", new FFFactory<LanguageModelRandLM>);
#endif
ff_registry.Register("SpanFeatures", new FFFactory<SpanFeatures>());
+ ff_registry.Register("CMR2008ReorderingFeatures", new FFFactory<CMR2008ReorderingFeatures>());
ff_registry.Register("KLanguageModel", new FFFactory<KLanguageModel<lm::ngram::ProbingModel> >());
ff_registry.Register("KLanguageModel_Sorted", new FFFactory<KLanguageModel<lm::ngram::SortedModel> >());
ff_registry.Register("KLanguageModel_Trie", new FFFactory<KLanguageModel<lm::ngram::TrieModel> >());
diff --git a/decoder/ff_spans.cc b/decoder/ff_spans.cc
index b473c8a4..1cf72be9 100644
--- a/decoder/ff_spans.cc
+++ b/decoder/ff_spans.cc
@@ -2,6 +2,7 @@
#include <sstream>
#include <cassert>
+#include <cmath>
#include "filelib.h"
#include "stringlib.h"
@@ -155,3 +156,64 @@ void SpanFeatures::PrepareForInput(const SentenceMetadata& smeta) {
}
}
+inline bool IsArity2RuleReordered(const TRule& rule) {
+ const vector<WordID>& e = rule.e_;
+ for (int i = 0; i < e.size(); ++i) {
+ if (e[i] <= 0) { return e[i] < 0; }
+ }
+ cerr << "IsArity2RuleReordered failed on:\n" << rule.AsString() << endl;
+ abort();
+}
+
+// Chiang, Marton, Resnik 2008 "fine-grained" reordering features
+CMR2008ReorderingFeatures::CMR2008ReorderingFeatures(const std::string& param) :
+ kS(TD::Convert("S") * -1),
+ use_collapsed_features_(false) {
+ if (param.size() > 0) {
+ use_collapsed_features_ = true;
+ assert(!"not implemented"); // TODO
+ } else {
+ unconditioned_fids_.first = FD::Convert("CMRMono");
+ unconditioned_fids_.second = FD::Convert("CMRReorder");
+ fids_.resize(16); fids_[0].first = fids_[0].second = -1;
+ // since I use a log transform, I go a bit higher than David, who bins everything > 10
+ for (int span_size = 1; span_size <= 15; ++span_size) {
+ ostringstream m, r;
+ m << "CMRMono_" << SpanSizeTransform(span_size);
+ fids_[span_size].first = FD::Convert(m.str());
+ r << "CMRReorder_" << SpanSizeTransform(span_size);
+ fids_[span_size].second = FD::Convert(r.str());
+ }
+ }
+}
+
+int CMR2008ReorderingFeatures::SpanSizeTransform(unsigned span_size) {
+ if (!span_size) return 0;
+ return static_cast<int>(log(span_size+1) / log(1.39)) - 1;
+}
+
+void CMR2008ReorderingFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,
+ const Hypergraph::Edge& edge,
+ const vector<const void*>& ant_contexts,
+ SparseVector<double>* features,
+ SparseVector<double>* estimated_features,
+ void* context) const {
+ if (edge.Arity() != 2) return;
+ if (edge.rule_->lhs_ == kS) return;
+ assert(edge.i_ >= 0);
+ assert(edge.j_ > edge.i_);
+ const bool is_reordered = IsArity2RuleReordered(*edge.rule_);
+ const unsigned span_size = edge.j_ - edge.i_;
+ if (use_collapsed_features_) {
+ assert(!"not impl"); // TODO
+ } else {
+ if (is_reordered) {
+ features->set_value(unconditioned_fids_.second, 1.0);
+ features->set_value(fids_[span_size].second, 1.0);
+ } else {
+ features->set_value(unconditioned_fids_.first, 1.0);
+ features->set_value(fids_[span_size].first, 1.0);
+ }
+ }
+}
+
diff --git a/decoder/ff_spans.h b/decoder/ff_spans.h
index b93faec5..9928d70f 100644
--- a/decoder/ff_spans.h
+++ b/decoder/ff_spans.h
@@ -41,4 +41,29 @@ class SpanFeatures : public FeatureFunction {
WordID oov_;
};
+class CMR2008ReorderingFeatures : public FeatureFunction {
+ public:
+ CMR2008ReorderingFeatures(const std::string& param);
+ protected:
+ virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
+ const Hypergraph::Edge& edge,
+ const std::vector<const void*>& ant_contexts,
+ SparseVector<double>* features,
+ SparseVector<double>* estimated_features,
+ void* context) const;
+ private:
+ static int SpanSizeTransform(unsigned span_size);
+
+ const int kS;
+ std::pair<int, int> unconditioned_fids_; // first = monotone
+ // second = inverse
+ std::vector<std::pair<int, int> > fids_; // index=(j-i)
+
+ // collapsed feature values
+ bool use_collapsed_features_;
+ int fid_reorder_;
+ std::pair<double, double> uncoditioned_vals_;
+ std::vector<std::pair<double, double> > fvals_;
+};
+
#endif