From 35142ef52f15d610ca08fa622b83594cf111ce4a Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Thu, 9 Dec 2010 17:04:29 -0500 Subject: major refactor of markov features for word alignment --- decoder/ff_wordalign.h | 100 +++++++++++-------------------------------------- 1 file changed, 22 insertions(+), 78 deletions(-) (limited to 'decoder/ff_wordalign.h') diff --git a/decoder/ff_wordalign.h b/decoder/ff_wordalign.h index 418c8768..a1ffd9ca 100644 --- a/decoder/ff_wordalign.h +++ b/decoder/ff_wordalign.h @@ -3,7 +3,9 @@ #include "ff.h" #include "array2d.h" +#include "factored_lexicon_helper.h" +#include #include class RelativeSentencePosition : public FeatureFunction { @@ -23,64 +25,6 @@ class RelativeSentencePosition : public FeatureFunction { std::map fids_; // fclass -> fid }; -class Model2BinaryFeatures : public FeatureFunction { - public: - Model2BinaryFeatures(const std::string& param); - protected: - virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, - const std::vector& ant_contexts, - SparseVector* features, - SparseVector* estimated_features, - void* out_context) const; - private: - boost::multi_array fids_; -}; - -class MarkovJump : public FeatureFunction { - public: - MarkovJump(const std::string& param); - protected: - virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, - const std::vector& ant_contexts, - SparseVector* features, - SparseVector* estimated_features, - void* out_context) const; - private: - const int fid_; - const int fid_lex_null_; - const int fid_null_lex_; - const int fid_null_null_; - const int fid_lex_lex_; - - bool binary_params_; - std::vector > flen2jump2fid_; -}; - -class MarkovJumpFClass : public FeatureFunction { - public: - MarkovJumpFClass(const std::string& param); - virtual void FinalTraversalFeatures(const void* context, - SparseVector* features) const; - protected: - virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, - const std::vector& ant_contexts, - SparseVector* features, - SparseVector* estimated_features, - void* context) const; - - void FireFeature(const SentenceMetadata& smeta, - int prev_src_pos, - int cur_src_pos, - SparseVector* features) const; - - private: - std::vector > > fids_; // flen -> fclass -> jumpsize -> fid - std::vector > pos_; -}; - typedef std::map Class2FID; typedef std::map Class2Class2FID; typedef std::map Class2Class2Class2FID; @@ -89,6 +33,7 @@ class SourceBigram : public FeatureFunction { SourceBigram(const std::string& param); virtual void FinalTraversalFeatures(const void* context, SparseVector* features) const; + void PrepareForInput(const SentenceMetadata& smeta); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, const Hypergraph::Edge& edge, @@ -100,7 +45,9 @@ class SourceBigram : public FeatureFunction { void FireFeature(WordID src, WordID trg, SparseVector* features) const; + std::string fid_str_; mutable Class2Class2FID fmap_; + boost::scoped_ptr lexmap_; // different view (stemmed, etc) of source }; class LexNullJump : public FeatureFunction { @@ -136,30 +83,27 @@ class NewJump : public FeatureFunction { const int cur_src_index, SparseVector* features) const; + WordID GetSourceWord(int sentence_id, int index) const { + if (index < 0) return kBOS_; + assert(src_.size() > sentence_id); + const std::vector& v = src_[sentence_id]; + if (index >= v.size()) return kEOS_; + return v[index]; + } + + const WordID kBOS_; + const WordID kEOS_; bool use_binned_log_lengths_; + bool flen_; + bool elen_; + bool f0_; + bool fm1_; + bool fp1_; + bool fprev_; + std::vector > src_; std::string fid_str_; // identifies configuration uniquely }; -class SourcePOSBigram : public FeatureFunction { - public: - SourcePOSBigram(const std::string& param); - virtual void FinalTraversalFeatures(const void* context, - SparseVector* features) const; - protected: - virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, - const std::vector& ant_contexts, - SparseVector* features, - SparseVector* estimated_features, - void* context) const; - private: - void FireFeature(WordID src, - WordID trg, - SparseVector* features) const; - mutable Class2Class2FID fmap_; - std::vector > pos_; -}; - class LexicalTranslationTrigger : public FeatureFunction { public: LexicalTranslationTrigger(const std::string& param); -- cgit v1.2.3