diff options
author | Chris Dyer <cdyer@cs.cmu.edu> | 2011-02-26 17:21:21 -0500 |
---|---|---|
committer | Chris Dyer <cdyer@cs.cmu.edu> | 2011-02-26 17:21:21 -0500 |
commit | 6ce05aeddfc4e57e943a016e10a3c158a5c449bb (patch) | |
tree | 15a5c41306b5ac596c74eb80c9c69c2bf8c5ccda /decoder | |
parent | bebc061c3ac9799a0c91abc1a1fff1f57f5a3522 (diff) |
chiang, marton, resnik fine reordering features
Diffstat (limited to 'decoder')
-rw-r--r-- | decoder/cdec_ff.cc | 1 | ||||
-rw-r--r-- | decoder/ff_spans.cc | 62 | ||||
-rw-r--r-- | decoder/ff_spans.h | 25 |
3 files changed, 88 insertions, 0 deletions
diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc index 7bcee6b8..7ec54a5a 100644 --- a/decoder/cdec_ff.cc +++ b/decoder/cdec_ff.cc @@ -51,6 +51,7 @@ void register_feature_functions() { ff_registry.Register("RandLM", new FFFactory<LanguageModelRandLM>); #endif ff_registry.Register("SpanFeatures", new FFFactory<SpanFeatures>()); + ff_registry.Register("CMR2008ReorderingFeatures", new FFFactory<CMR2008ReorderingFeatures>()); ff_registry.Register("KLanguageModel", new FFFactory<KLanguageModel<lm::ngram::ProbingModel> >()); ff_registry.Register("KLanguageModel_Sorted", new FFFactory<KLanguageModel<lm::ngram::SortedModel> >()); ff_registry.Register("KLanguageModel_Trie", new FFFactory<KLanguageModel<lm::ngram::TrieModel> >()); diff --git a/decoder/ff_spans.cc b/decoder/ff_spans.cc index b473c8a4..1cf72be9 100644 --- a/decoder/ff_spans.cc +++ b/decoder/ff_spans.cc @@ -2,6 +2,7 @@ #include <sstream> #include <cassert> +#include <cmath> #include "filelib.h" #include "stringlib.h" @@ -155,3 +156,64 @@ void SpanFeatures::PrepareForInput(const SentenceMetadata& smeta) { } } +inline bool IsArity2RuleReordered(const TRule& rule) { + const vector<WordID>& e = rule.e_; + for (int i = 0; i < e.size(); ++i) { + if (e[i] <= 0) { return e[i] < 0; } + } + cerr << "IsArity2RuleReordered failed on:\n" << rule.AsString() << endl; + abort(); +} + +// Chiang, Marton, Resnik 2008 "fine-grained" reordering features +CMR2008ReorderingFeatures::CMR2008ReorderingFeatures(const std::string& param) : + kS(TD::Convert("S") * -1), + use_collapsed_features_(false) { + if (param.size() > 0) { + use_collapsed_features_ = true; + assert(!"not implemented"); // TODO + } else { + unconditioned_fids_.first = FD::Convert("CMRMono"); + unconditioned_fids_.second = FD::Convert("CMRReorder"); + fids_.resize(16); fids_[0].first = fids_[0].second = -1; + // since I use a log transform, I go a bit higher than David, who bins everything > 10 + for (int span_size = 1; span_size <= 15; ++span_size) { + ostringstream m, r; + m << "CMRMono_" << SpanSizeTransform(span_size); + fids_[span_size].first = FD::Convert(m.str()); + r << "CMRReorder_" << SpanSizeTransform(span_size); + fids_[span_size].second = FD::Convert(r.str()); + } + } +} + +int CMR2008ReorderingFeatures::SpanSizeTransform(unsigned span_size) { + if (!span_size) return 0; + return static_cast<int>(log(span_size+1) / log(1.39)) - 1; +} + +void CMR2008ReorderingFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta, + const Hypergraph::Edge& edge, + const vector<const void*>& ant_contexts, + SparseVector<double>* features, + SparseVector<double>* estimated_features, + void* context) const { + if (edge.Arity() != 2) return; + if (edge.rule_->lhs_ == kS) return; + assert(edge.i_ >= 0); + assert(edge.j_ > edge.i_); + const bool is_reordered = IsArity2RuleReordered(*edge.rule_); + const unsigned span_size = edge.j_ - edge.i_; + if (use_collapsed_features_) { + assert(!"not impl"); // TODO + } else { + if (is_reordered) { + features->set_value(unconditioned_fids_.second, 1.0); + features->set_value(fids_[span_size].second, 1.0); + } else { + features->set_value(unconditioned_fids_.first, 1.0); + features->set_value(fids_[span_size].first, 1.0); + } + } +} + diff --git a/decoder/ff_spans.h b/decoder/ff_spans.h index b93faec5..9928d70f 100644 --- a/decoder/ff_spans.h +++ b/decoder/ff_spans.h @@ -41,4 +41,29 @@ class SpanFeatures : public FeatureFunction { WordID oov_; }; +class CMR2008ReorderingFeatures : public FeatureFunction { + public: + CMR2008ReorderingFeatures(const std::string& param); + protected: + virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, + const Hypergraph::Edge& edge, + const std::vector<const void*>& ant_contexts, + SparseVector<double>* features, + SparseVector<double>* estimated_features, + void* context) const; + private: + static int SpanSizeTransform(unsigned span_size); + + const int kS; + std::pair<int, int> unconditioned_fids_; // first = monotone + // second = inverse + std::vector<std::pair<int, int> > fids_; // index=(j-i) + + // collapsed feature values + bool use_collapsed_features_; + int fid_reorder_; + std::pair<double, double> uncoditioned_vals_; + std::vector<std::pair<double, double> > fvals_; +}; + #endif |