diff options
| -rw-r--r-- | decoder/cdec_ff.cc | 1 | ||||
| -rw-r--r-- | decoder/ff_spans.cc | 62 | ||||
| -rw-r--r-- | decoder/ff_spans.h | 25 | 
3 files changed, 88 insertions, 0 deletions
diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc index 7bcee6b8..7ec54a5a 100644 --- a/decoder/cdec_ff.cc +++ b/decoder/cdec_ff.cc @@ -51,6 +51,7 @@ void register_feature_functions() {    ff_registry.Register("RandLM", new FFFactory<LanguageModelRandLM>);  #endif    ff_registry.Register("SpanFeatures", new FFFactory<SpanFeatures>()); +  ff_registry.Register("CMR2008ReorderingFeatures", new FFFactory<CMR2008ReorderingFeatures>());    ff_registry.Register("KLanguageModel", new FFFactory<KLanguageModel<lm::ngram::ProbingModel> >());    ff_registry.Register("KLanguageModel_Sorted", new FFFactory<KLanguageModel<lm::ngram::SortedModel> >());    ff_registry.Register("KLanguageModel_Trie", new FFFactory<KLanguageModel<lm::ngram::TrieModel> >()); diff --git a/decoder/ff_spans.cc b/decoder/ff_spans.cc index b473c8a4..1cf72be9 100644 --- a/decoder/ff_spans.cc +++ b/decoder/ff_spans.cc @@ -2,6 +2,7 @@  #include <sstream>  #include <cassert> +#include <cmath>  #include "filelib.h"  #include "stringlib.h" @@ -155,3 +156,64 @@ void SpanFeatures::PrepareForInput(const SentenceMetadata& smeta) {    }   } +inline bool IsArity2RuleReordered(const TRule& rule) { +  const vector<WordID>& e = rule.e_; +  for (int i = 0; i < e.size(); ++i) { +    if (e[i] <= 0) { return e[i] < 0; } +  } +  cerr << "IsArity2RuleReordered failed on:\n" << rule.AsString() << endl; +  abort(); +} + +// Chiang, Marton, Resnik 2008 "fine-grained" reordering features +CMR2008ReorderingFeatures::CMR2008ReorderingFeatures(const std::string& param) : +    kS(TD::Convert("S") * -1), +    use_collapsed_features_(false) { +  if (param.size() > 0) { +    use_collapsed_features_ = true; +    assert(!"not implemented"); // TODO +  } else { +    unconditioned_fids_.first = FD::Convert("CMRMono"); +    unconditioned_fids_.second = FD::Convert("CMRReorder"); +    fids_.resize(16); fids_[0].first = fids_[0].second = -1; +    // since I use a log transform, I go a bit higher than David, who bins everything > 10 +    for (int span_size = 1; span_size <= 15; ++span_size) { +      ostringstream m, r; +      m << "CMRMono_" << SpanSizeTransform(span_size); +      fids_[span_size].first = FD::Convert(m.str()); +      r << "CMRReorder_" << SpanSizeTransform(span_size); +      fids_[span_size].second = FD::Convert(r.str()); +    } +  } +} + +int CMR2008ReorderingFeatures::SpanSizeTransform(unsigned span_size) { +  if (!span_size) return 0; +  return static_cast<int>(log(span_size+1) / log(1.39)) - 1; +} + +void CMR2008ReorderingFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta, +                                         const Hypergraph::Edge& edge, +                                         const vector<const void*>& ant_contexts, +                                         SparseVector<double>* features, +                                         SparseVector<double>* estimated_features, +                                         void* context) const { +  if (edge.Arity() != 2) return; +  if (edge.rule_->lhs_ == kS) return; +  assert(edge.i_ >= 0); +  assert(edge.j_ > edge.i_); +  const bool is_reordered = IsArity2RuleReordered(*edge.rule_); +  const unsigned span_size = edge.j_ - edge.i_; +  if (use_collapsed_features_) { +    assert(!"not impl"); // TODO +  } else { +    if (is_reordered) { +      features->set_value(unconditioned_fids_.second, 1.0); +      features->set_value(fids_[span_size].second, 1.0); +    } else { +      features->set_value(unconditioned_fids_.first, 1.0); +      features->set_value(fids_[span_size].first, 1.0); +    } +  } +} + diff --git a/decoder/ff_spans.h b/decoder/ff_spans.h index b93faec5..9928d70f 100644 --- a/decoder/ff_spans.h +++ b/decoder/ff_spans.h @@ -41,4 +41,29 @@ class SpanFeatures : public FeatureFunction {    WordID oov_;  }; +class CMR2008ReorderingFeatures : public FeatureFunction { + public: +  CMR2008ReorderingFeatures(const std::string& param); + protected: +  virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, +                                     const Hypergraph::Edge& edge, +                                     const std::vector<const void*>& ant_contexts, +                                     SparseVector<double>* features, +                                     SparseVector<double>* estimated_features, +                                     void* context) const; + private: +  static int SpanSizeTransform(unsigned span_size); + +  const int kS; +  std::pair<int, int> unconditioned_fids_;  // first = monotone +                                            // second = inverse +  std::vector<std::pair<int, int> > fids_;  // index=(j-i) + +  // collapsed feature values +  bool use_collapsed_features_; +  int fid_reorder_; +  std::pair<double, double> uncoditioned_vals_; +  std::vector<std::pair<double, double> > fvals_; +}; +  #endif  | 
