summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Dyer <cdyer@Chriss-MacBook-Air.local>2013-03-14 23:38:41 -0400
committerChris Dyer <cdyer@Chriss-MacBook-Air.local>2013-03-14 23:38:41 -0400
commit5f680cc45266112c0a6f51f8d9eeb99732a6e2d7 (patch)
tree5bcfdab1ae30d8fb09dcf705aeebad86fab80a4c
parent7cc5d9c568890be7fe01363fdf52912d8c6d5665 (diff)
source path features
-rw-r--r--decoder/Makefile.am2
-rw-r--r--decoder/cdec_ff.cc2
-rw-r--r--decoder/ff_source_path.cc40
-rw-r--r--decoder/ff_source_path.h26
4 files changed, 70 insertions, 0 deletions
diff --git a/decoder/Makefile.am b/decoder/Makefile.am
index 6499b38b..82b50f19 100644
--- a/decoder/Makefile.am
+++ b/decoder/Makefile.am
@@ -60,6 +60,7 @@ libcdec_a_SOURCES = \
ff_rules.h \
ff_ruleshape.h \
ff_sample_fsa.h \
+ ff_source_path.h \
ff_source_syntax.h \
ff_spans.h \
ff_tagger.h \
@@ -140,6 +141,7 @@ libcdec_a_SOURCES = \
ff_wordalign.cc \
ff_csplit.cc \
ff_tagger.cc \
+ ff_source_path.cc \
ff_source_syntax.cc \
ff_bleu.cc \
ff_factory.cc \
diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc
index 3ab0f9f6..a60f2c33 100644
--- a/decoder/cdec_ff.cc
+++ b/decoder/cdec_ff.cc
@@ -14,6 +14,7 @@
#include "ff_rules.h"
#include "ff_ruleshape.h"
#include "ff_bleu.h"
+#include "ff_source_path.h"
#include "ff_source_syntax.h"
#include "ff_register.h"
#include "ff_charset.h"
@@ -70,6 +71,7 @@ void register_feature_functions() {
ff_registry.Register("InputIndicator", new FFFactory<InputIndicator>);
ff_registry.Register("LexicalTranslationTrigger", new FFFactory<LexicalTranslationTrigger>);
ff_registry.Register("WordPairFeatures", new FFFactory<WordPairFeatures>);
+ ff_registry.Register("SourthPathFeatures", new FFFactory<SourcePathFeatures>);
ff_registry.Register("WordSet", new FFFactory<WordSet>);
ff_registry.Register("Dwarf", new FFFactory<Dwarf>);
ff_registry.Register("External", new FFFactory<ExternalFeature>);
diff --git a/decoder/ff_source_path.cc b/decoder/ff_source_path.cc
new file mode 100644
index 00000000..d5fa6bb3
--- /dev/null
+++ b/decoder/ff_source_path.cc
@@ -0,0 +1,40 @@
+#include "ff_source_path.h"
+
+#include "hg.h"
+
+using namespace std;
+
+SourcePathFeatures::SourcePathFeatures(const string& param) : FeatureFunction(4) {}
+
+void SourcePathFeatures::FireBigramFeature(WordID prev, WordID cur, SparseVector<double>* features) const {
+ int& fid = bigram_fids[prev][cur];
+ if (!fid) fid = FD::Convert("SB:"+TD::Convert(prev) + "_" + TD::Convert(cur));
+ if (fid) features->add_value(fid, 1.0);
+}
+
+void SourcePathFeatures::FireUnigramFeature(WordID cur, SparseVector<double>* features) const {
+ int& fid = unigram_fids[cur];
+ if (!fid) fid = FD::Convert("SU:" + TD::Convert(cur));
+ if (fid) features->add_value(fid, 1.0);
+}
+
+void SourcePathFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,
+ const HG::Edge& edge,
+ const vector<const void*>& ant_contexts,
+ SparseVector<double>* features,
+ SparseVector<double>* estimated_features,
+ void* context) const {
+ WordID* res = reinterpret_cast<WordID*>(context);
+ const vector<int>& f = edge.rule_->f();
+ int prev = 0;
+ for (unsigned i = 0; i < f.size(); ++i) {
+ int cur = f[i];
+ if (cur <= 0)
+ cur = *reinterpret_cast<const WordID*>(ant_contexts[cur]);
+ else
+ FireUnigramFeature(cur, features);
+ if (prev) FireBigramFeature(prev, cur, features);
+ prev = cur;
+ }
+ *res = prev;
+}
diff --git a/decoder/ff_source_path.h b/decoder/ff_source_path.h
new file mode 100644
index 00000000..03126412
--- /dev/null
+++ b/decoder/ff_source_path.h
@@ -0,0 +1,26 @@
+#ifndef _FF_SOURCE_PATH_H_
+#define _FF_SOURCE_PATH_H_
+
+#include <vector>
+#include <map>
+#include "ff.h"
+
+class SourcePathFeatures : public FeatureFunction {
+ public:
+ SourcePathFeatures(const std::string& param);
+ protected:
+ virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
+ const HG::Edge& edge,
+ const std::vector<const void*>& ant_contexts,
+ SparseVector<double>* features,
+ SparseVector<double>* estimated_features,
+ void* context) const;
+
+ private:
+ void FireBigramFeature(WordID prev, WordID cur, SparseVector<double>* features) const;
+ void FireUnigramFeature(WordID cur, SparseVector<double>* features) const;
+ mutable std::map<WordID, std::map<WordID, int> > bigram_fids;
+ mutable std::map<WordID, int> unigram_fids;
+};
+
+#endif