diff options
Diffstat (limited to 'decoder')
-rw-r--r-- | decoder/Makefile.am | 2 | ||||
-rw-r--r-- | decoder/cdec_ff.cc | 2 | ||||
-rw-r--r-- | decoder/ff_source_path.cc | 42 | ||||
-rw-r--r-- | decoder/ff_source_path.h | 26 |
4 files changed, 72 insertions, 0 deletions
diff --git a/decoder/Makefile.am b/decoder/Makefile.am index 6499b38b..82b50f19 100644 --- a/decoder/Makefile.am +++ b/decoder/Makefile.am @@ -60,6 +60,7 @@ libcdec_a_SOURCES = \ ff_rules.h \ ff_ruleshape.h \ ff_sample_fsa.h \ + ff_source_path.h \ ff_source_syntax.h \ ff_spans.h \ ff_tagger.h \ @@ -140,6 +141,7 @@ libcdec_a_SOURCES = \ ff_wordalign.cc \ ff_csplit.cc \ ff_tagger.cc \ + ff_source_path.cc \ ff_source_syntax.cc \ ff_bleu.cc \ ff_factory.cc \ diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc index 3ab0f9f6..0bf441d4 100644 --- a/decoder/cdec_ff.cc +++ b/decoder/cdec_ff.cc @@ -14,6 +14,7 @@ #include "ff_rules.h" #include "ff_ruleshape.h" #include "ff_bleu.h" +#include "ff_source_path.h" #include "ff_source_syntax.h" #include "ff_register.h" #include "ff_charset.h" @@ -70,6 +71,7 @@ void register_feature_functions() { ff_registry.Register("InputIndicator", new FFFactory<InputIndicator>); ff_registry.Register("LexicalTranslationTrigger", new FFFactory<LexicalTranslationTrigger>); ff_registry.Register("WordPairFeatures", new FFFactory<WordPairFeatures>); + ff_registry.Register("SourcePathFeatures", new FFFactory<SourcePathFeatures>); ff_registry.Register("WordSet", new FFFactory<WordSet>); ff_registry.Register("Dwarf", new FFFactory<Dwarf>); ff_registry.Register("External", new FFFactory<ExternalFeature>); diff --git a/decoder/ff_source_path.cc b/decoder/ff_source_path.cc new file mode 100644 index 00000000..2a3bee2e --- /dev/null +++ b/decoder/ff_source_path.cc @@ -0,0 +1,42 @@ +#include "ff_source_path.h" + +#include "hg.h" + +using namespace std; + +SourcePathFeatures::SourcePathFeatures(const string& param) : FeatureFunction(sizeof(int)) {} + +void SourcePathFeatures::FireBigramFeature(WordID prev, WordID cur, SparseVector<double>* features) const { + int& fid = bigram_fids[prev][cur]; + if (!fid) fid = FD::Convert("SB:"+TD::Convert(prev) + "_" + TD::Convert(cur)); + if (fid) features->add_value(fid, 1.0); +} + +void SourcePathFeatures::FireUnigramFeature(WordID cur, SparseVector<double>* features) const { + int& fid = unigram_fids[cur]; + if (!fid) fid = FD::Convert("SU:" + TD::Convert(cur)); + if (fid) features->add_value(fid, 1.0); +} + +void SourcePathFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta, + const HG::Edge& edge, + const vector<const void*>& ant_contexts, + SparseVector<double>* features, + SparseVector<double>* estimated_features, + void* context) const { + WordID* res = reinterpret_cast<WordID*>(context); + const vector<int>& f = edge.rule_->f(); + int prev = 0; + unsigned ntc = 0; + for (unsigned i = 0; i < f.size(); ++i) { + int cur = f[i]; + if (cur < 0) + cur = *reinterpret_cast<const WordID*>(ant_contexts[ntc++]); + else + FireUnigramFeature(cur, features); + if (prev) FireBigramFeature(prev, cur, features); + prev = cur; + } + *res = prev; +} + diff --git a/decoder/ff_source_path.h b/decoder/ff_source_path.h new file mode 100644 index 00000000..03126412 --- /dev/null +++ b/decoder/ff_source_path.h @@ -0,0 +1,26 @@ +#ifndef _FF_SOURCE_PATH_H_ +#define _FF_SOURCE_PATH_H_ + +#include <vector> +#include <map> +#include "ff.h" + +class SourcePathFeatures : public FeatureFunction { + public: + SourcePathFeatures(const std::string& param); + protected: + virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, + const HG::Edge& edge, + const std::vector<const void*>& ant_contexts, + SparseVector<double>* features, + SparseVector<double>* estimated_features, + void* context) const; + + private: + void FireBigramFeature(WordID prev, WordID cur, SparseVector<double>* features) const; + void FireUnigramFeature(WordID cur, SparseVector<double>* features) const; + mutable std::map<WordID, std::map<WordID, int> > bigram_fids; + mutable std::map<WordID, int> unigram_fids; +}; + +#endif |