From 6ce05aeddfc4e57e943a016e10a3c158a5c449bb Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sat, 26 Feb 2011 17:21:21 -0500 Subject: chiang, marton, resnik fine reordering features --- decoder/cdec_ff.cc | 1 + decoder/ff_spans.cc | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++ decoder/ff_spans.h | 25 +++++++++++++++++++++ 3 files changed, 88 insertions(+) diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc index 7bcee6b8..7ec54a5a 100644 --- a/decoder/cdec_ff.cc +++ b/decoder/cdec_ff.cc @@ -51,6 +51,7 @@ void register_feature_functions() { ff_registry.Register("RandLM", new FFFactory); #endif ff_registry.Register("SpanFeatures", new FFFactory()); + ff_registry.Register("CMR2008ReorderingFeatures", new FFFactory()); ff_registry.Register("KLanguageModel", new FFFactory >()); ff_registry.Register("KLanguageModel_Sorted", new FFFactory >()); ff_registry.Register("KLanguageModel_Trie", new FFFactory >()); diff --git a/decoder/ff_spans.cc b/decoder/ff_spans.cc index b473c8a4..1cf72be9 100644 --- a/decoder/ff_spans.cc +++ b/decoder/ff_spans.cc @@ -2,6 +2,7 @@ #include #include +#include #include "filelib.h" #include "stringlib.h" @@ -155,3 +156,64 @@ void SpanFeatures::PrepareForInput(const SentenceMetadata& smeta) { } } +inline bool IsArity2RuleReordered(const TRule& rule) { + const vector& e = rule.e_; + for (int i = 0; i < e.size(); ++i) { + if (e[i] <= 0) { return e[i] < 0; } + } + cerr << "IsArity2RuleReordered failed on:\n" << rule.AsString() << endl; + abort(); +} + +// Chiang, Marton, Resnik 2008 "fine-grained" reordering features +CMR2008ReorderingFeatures::CMR2008ReorderingFeatures(const std::string& param) : + kS(TD::Convert("S") * -1), + use_collapsed_features_(false) { + if (param.size() > 0) { + use_collapsed_features_ = true; + assert(!"not implemented"); // TODO + } else { + unconditioned_fids_.first = FD::Convert("CMRMono"); + unconditioned_fids_.second = FD::Convert("CMRReorder"); + fids_.resize(16); fids_[0].first = fids_[0].second = -1; + // since I use a log transform, I go a bit higher than David, who bins everything > 10 + for (int span_size = 1; span_size <= 15; ++span_size) { + ostringstream m, r; + m << "CMRMono_" << SpanSizeTransform(span_size); + fids_[span_size].first = FD::Convert(m.str()); + r << "CMRReorder_" << SpanSizeTransform(span_size); + fids_[span_size].second = FD::Convert(r.str()); + } + } +} + +int CMR2008ReorderingFeatures::SpanSizeTransform(unsigned span_size) { + if (!span_size) return 0; + return static_cast(log(span_size+1) / log(1.39)) - 1; +} + +void CMR2008ReorderingFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta, + const Hypergraph::Edge& edge, + const vector& ant_contexts, + SparseVector* features, + SparseVector* estimated_features, + void* context) const { + if (edge.Arity() != 2) return; + if (edge.rule_->lhs_ == kS) return; + assert(edge.i_ >= 0); + assert(edge.j_ > edge.i_); + const bool is_reordered = IsArity2RuleReordered(*edge.rule_); + const unsigned span_size = edge.j_ - edge.i_; + if (use_collapsed_features_) { + assert(!"not impl"); // TODO + } else { + if (is_reordered) { + features->set_value(unconditioned_fids_.second, 1.0); + features->set_value(fids_[span_size].second, 1.0); + } else { + features->set_value(unconditioned_fids_.first, 1.0); + features->set_value(fids_[span_size].first, 1.0); + } + } +} + diff --git a/decoder/ff_spans.h b/decoder/ff_spans.h index b93faec5..9928d70f 100644 --- a/decoder/ff_spans.h +++ b/decoder/ff_spans.h @@ -41,4 +41,29 @@ class SpanFeatures : public FeatureFunction { WordID oov_; }; +class CMR2008ReorderingFeatures : public FeatureFunction { + public: + CMR2008ReorderingFeatures(const std::string& param); + protected: + virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, + const Hypergraph::Edge& edge, + const std::vector& ant_contexts, + SparseVector* features, + SparseVector* estimated_features, + void* context) const; + private: + static int SpanSizeTransform(unsigned span_size); + + const int kS; + std::pair unconditioned_fids_; // first = monotone + // second = inverse + std::vector > fids_; // index=(j-i) + + // collapsed feature values + bool use_collapsed_features_; + int fid_reorder_; + std::pair uncoditioned_vals_; + std::vector > fvals_; +}; + #endif -- cgit v1.2.3