diff options
Diffstat (limited to 'decoder')
| -rw-r--r-- | decoder/cdec_ff.cc | 3 | ||||
| -rw-r--r-- | decoder/ff_rules.cc | 48 | ||||
| -rw-r--r-- | decoder/ff_rules.h | 19 | 
3 files changed, 62 insertions, 8 deletions
| diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc index b516c386..d64bdada 100644 --- a/decoder/cdec_ff.cc +++ b/decoder/cdec_ff.cc @@ -47,8 +47,9 @@ void register_feature_functions() {    ff_registry.Register("RuleIdentityFeatures", new FFFactory<RuleIdentityFeatures>());    ff_registry.Register("SourceSyntaxFeatures", new FFFactory<SourceSyntaxFeatures>);    ff_registry.Register("SourceSpanSizeFeatures", new FFFactory<SourceSpanSizeFeatures>); -  ff_registry.Register("RuleNgramFeatures", new FFFactory<RuleNgramFeatures>());    ff_registry.Register("CMR2008ReorderingFeatures", new FFFactory<CMR2008ReorderingFeatures>()); +  ff_registry.Register("RuleSourceBigramFeatures", new FFFactory<RuleSourceBigramFeatures>()); +  ff_registry.Register("RuleTargetBigramFeatures", new FFFactory<RuleTargetBigramFeatures>());    ff_registry.Register("KLanguageModel", new KLanguageModelFactory());    ff_registry.Register("NonLatinCount", new FFFactory<NonLatinCount>);    ff_registry.Register("RuleShape", new FFFactory<RuleShapeFeatures>); diff --git a/decoder/ff_rules.cc b/decoder/ff_rules.cc index bd4c4cc0..3d0e514a 100644 --- a/decoder/ff_rules.cc +++ b/decoder/ff_rules.cc @@ -66,15 +66,15 @@ void RuleIdentityFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,    features->add_value(it->second, 1);  } -RuleNgramFeatures::RuleNgramFeatures(const std::string& param) { +RuleSourceBigramFeatures::RuleSourceBigramFeatures(const std::string& param) {  } -void RuleNgramFeatures::PrepareForInput(const SentenceMetadata& smeta) { +void RuleSourceBigramFeatures::PrepareForInput(const SentenceMetadata& smeta) {  //  std::map<const TRule*, SparseVector<double> >    rule2_feats_.clear();  } -void RuleNgramFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta, +void RuleSourceBigramFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,                                           const Hypergraph::Edge& edge,                                           const vector<const void*>& ant_contexts,                                           SparseVector<double>* features, @@ -92,14 +92,52 @@ void RuleNgramFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,        assert(w > 0);        const string& cur = TD::Convert(w);        ostringstream os; -      os << "RB:" << prev << '_' << cur; +      os << "RBS:" << prev << '_' << cur;        const int fid = FD::Convert(Escape(os.str()));        if (fid <= 0) return;        f.add_value(fid, 1.0);        prev = cur;      }      ostringstream os; -    os << "RB:" << prev << '_' << "</r>"; +    os << "RBS:" << prev << '_' << "</r>"; +    f.set_value(FD::Convert(Escape(os.str())), 1.0); +  } +  (*features) += it->second; +} + +RuleTargetBigramFeatures::RuleTargetBigramFeatures(const std::string& param) { +} + +void RuleTargetBigramFeatures::PrepareForInput(const SentenceMetadata& smeta) { +  rule2_feats_.clear(); +} + +void RuleTargetBigramFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta, +                                         const Hypergraph::Edge& edge, +                                         const vector<const void*>& ant_contexts, +                                         SparseVector<double>* features, +                                         SparseVector<double>* estimated_features, +                                         void* context) const { +  map<const TRule*, SparseVector<double> >::iterator it = rule2_feats_.find(edge.rule_.get()); +  if (it == rule2_feats_.end()) { +    const TRule& rule = *edge.rule_; +    it = rule2_feats_.insert(make_pair(&rule, SparseVector<double>())).first; +    SparseVector<double>& f = it->second; +    string prev = "<r>"; +    for (int i = 0; i < rule.e_.size(); ++i) { +      WordID w = rule.e_[i]; +      if (w < 0) w = -w; +      if (w == 0) return; +      const string& cur = TD::Convert(w); +      ostringstream os; +      os << "RBT:" << prev << '_' << cur; +      const int fid = FD::Convert(Escape(os.str())); +      if (fid <= 0) return; +      f.add_value(fid, 1.0); +      prev = cur; +    } +    ostringstream os; +    os << "RBT:" << prev << '_' << "</r>";      f.set_value(FD::Convert(Escape(os.str())), 1.0);    }    (*features) += it->second; diff --git a/decoder/ff_rules.h b/decoder/ff_rules.h index 48d8bd05..08b168b0 100644 --- a/decoder/ff_rules.h +++ b/decoder/ff_rules.h @@ -22,9 +22,24 @@ class RuleIdentityFeatures : public FeatureFunction {    mutable std::map<const TRule*, int> rule2_fid_;  }; -class RuleNgramFeatures : public FeatureFunction { +class RuleSourceBigramFeatures : public FeatureFunction {   public: -  RuleNgramFeatures(const std::string& param); +  RuleSourceBigramFeatures(const std::string& param); + protected: +  virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, +                                     const Hypergraph::Edge& edge, +                                     const std::vector<const void*>& ant_contexts, +                                     SparseVector<double>* features, +                                     SparseVector<double>* estimated_features, +                                     void* context) const; +  virtual void PrepareForInput(const SentenceMetadata& smeta); + private: +  mutable std::map<const TRule*, SparseVector<double> > rule2_feats_; +}; + +class RuleTargetBigramFeatures : public FeatureFunction { + public: +  RuleTargetBigramFeatures(const std::string& param);   protected:    virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,                                       const Hypergraph::Edge& edge, | 
