#ifndef FF_LEXICAL_H_ #define FF_LEXICAL_H_ #include #include #include "trule.h" #include "ff.h" #include "hg.h" #include "array2d.h" #include "wordid.h" #include #include #include #include "filelib.h" #include "stringlib.h" #include "sentence_metadata.h" #include "lattice.h" #include "fdict.h" #include "verbose.h" #include "tdict.h" #include "hg.h" using namespace std; namespace { string Escape(const string& x) { string y = x; for (int i = 0; i < y.size(); ++i) { if (y[i] == '=') y[i]='_'; if (y[i] == ';') y[i]='_'; } return y; } } class LexicalFeatures : public FeatureFunction { public: LexicalFeatures(const std::string& param) { if (param.empty()) { cerr << "LexicalFeatures: using T,D,I\n"; T_ = true; I_ = true; D_ = true; } else { const vector argv = SplitOnWhitespace(param); assert(argv.size() == 3); T_ = (bool) atoi(argv[0].c_str()); I_ = (bool) atoi(argv[1].c_str()); D_ = (bool) atoi(argv[2].c_str()); cerr << "T=" << T_ << " I=" << I_ << " D=" << D_ << endl; } }; static std::string usage(bool p,bool d) { return usage_helper("LexicalFeatures","[0/1 0/1 0/1]","Sparse lexical word translation indicator features. If arguments are supplied, specify like this: translations insertions deletions",p,d); } protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, const HG::Edge& edge, const std::vector& ant_contexts, SparseVector* features, SparseVector* estimated_features, void* context) const; virtual void PrepareForInput(const SentenceMetadata& smeta); private: mutable std::map > rule2feats_; bool T_; bool I_; bool D_; }; void LexicalFeatures::PrepareForInput(const SentenceMetadata& smeta) { rule2feats_.clear(); // std::map > } void LexicalFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta, const HG::Edge& edge, const std::vector& ant_contexts, SparseVector* features, SparseVector* estimated_features, void* context) const { map >::iterator it = rule2feats_.find(edge.rule_.get()); if (it == rule2feats_.end()) { const TRule& rule = *edge.rule_; it = rule2feats_.insert(make_pair(&rule, SparseVector())).first; SparseVector& f = it->second; std::vector sf(edge.rule_->FLength(),false); // stores if source tokens are visited by alignment points std::vector se(edge.rule_->ELength(),false); // stores if target tokens are visited by alignment points int fid = 0; // translations for (unsigned i=0;i 0) {// if not visited and is terminal ostringstream os; os << "LD:" << Escape(TD::Convert(rule.f_[i])); fid = FD::Convert(os.str()); if (fid <= 0) continue; if (D_) f.add_value(fid, 1.0); } } // word insertions for (unsigned i=0;i= 1) {// if not visited and is terminal ostringstream os; os << "LI:" << Escape(TD::Convert(rule.e_[i])); fid = FD::Convert(os.str()); if (fid <= 0) continue; if (I_) f.add_value(fid, 1.0); } } } (*features) += it->second; } #endif