summaryrefslogtreecommitdiff
path: root/decoder
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2013-03-15 11:39:18 +0100
committerPatrick Simianer <p@simianer.de>2013-03-15 11:39:18 +0100
commit3d8aa307d233f58cfe9ddcc616a8297551a738e3 (patch)
treeb9556fe62c7ac0d7b48d92481acdd06a546b1dc2 /decoder
parentcf67d34738e1487f75739dc1e027b1864a06513b (diff)
parent9f1d72fa4dc231eb8cdb737becfc10452b5daef4 (diff)
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'decoder')
-rw-r--r--decoder/Makefile.am2
-rw-r--r--decoder/cdec_ff.cc2
-rw-r--r--decoder/ff_source_path.cc42
-rw-r--r--decoder/ff_source_path.h26
4 files changed, 72 insertions, 0 deletions
diff --git a/decoder/Makefile.am b/decoder/Makefile.am
index 6499b38b..82b50f19 100644
--- a/decoder/Makefile.am
+++ b/decoder/Makefile.am
@@ -60,6 +60,7 @@ libcdec_a_SOURCES = \
ff_rules.h \
ff_ruleshape.h \
ff_sample_fsa.h \
+ ff_source_path.h \
ff_source_syntax.h \
ff_spans.h \
ff_tagger.h \
@@ -140,6 +141,7 @@ libcdec_a_SOURCES = \
ff_wordalign.cc \
ff_csplit.cc \
ff_tagger.cc \
+ ff_source_path.cc \
ff_source_syntax.cc \
ff_bleu.cc \
ff_factory.cc \
diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc
index 3ab0f9f6..0bf441d4 100644
--- a/decoder/cdec_ff.cc
+++ b/decoder/cdec_ff.cc
@@ -14,6 +14,7 @@
#include "ff_rules.h"
#include "ff_ruleshape.h"
#include "ff_bleu.h"
+#include "ff_source_path.h"
#include "ff_source_syntax.h"
#include "ff_register.h"
#include "ff_charset.h"
@@ -70,6 +71,7 @@ void register_feature_functions() {
ff_registry.Register("InputIndicator", new FFFactory<InputIndicator>);
ff_registry.Register("LexicalTranslationTrigger", new FFFactory<LexicalTranslationTrigger>);
ff_registry.Register("WordPairFeatures", new FFFactory<WordPairFeatures>);
+ ff_registry.Register("SourcePathFeatures", new FFFactory<SourcePathFeatures>);
ff_registry.Register("WordSet", new FFFactory<WordSet>);
ff_registry.Register("Dwarf", new FFFactory<Dwarf>);
ff_registry.Register("External", new FFFactory<ExternalFeature>);
diff --git a/decoder/ff_source_path.cc b/decoder/ff_source_path.cc
new file mode 100644
index 00000000..2a3bee2e
--- /dev/null
+++ b/decoder/ff_source_path.cc
@@ -0,0 +1,42 @@
+#include "ff_source_path.h"
+
+#include "hg.h"
+
+using namespace std;
+
+SourcePathFeatures::SourcePathFeatures(const string& param) : FeatureFunction(sizeof(int)) {}
+
+void SourcePathFeatures::FireBigramFeature(WordID prev, WordID cur, SparseVector<double>* features) const {
+ int& fid = bigram_fids[prev][cur];
+ if (!fid) fid = FD::Convert("SB:"+TD::Convert(prev) + "_" + TD::Convert(cur));
+ if (fid) features->add_value(fid, 1.0);
+}
+
+void SourcePathFeatures::FireUnigramFeature(WordID cur, SparseVector<double>* features) const {
+ int& fid = unigram_fids[cur];
+ if (!fid) fid = FD::Convert("SU:" + TD::Convert(cur));
+ if (fid) features->add_value(fid, 1.0);
+}
+
+void SourcePathFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,
+ const HG::Edge& edge,
+ const vector<const void*>& ant_contexts,
+ SparseVector<double>* features,
+ SparseVector<double>* estimated_features,
+ void* context) const {
+ WordID* res = reinterpret_cast<WordID*>(context);
+ const vector<int>& f = edge.rule_->f();
+ int prev = 0;
+ unsigned ntc = 0;
+ for (unsigned i = 0; i < f.size(); ++i) {
+ int cur = f[i];
+ if (cur < 0)
+ cur = *reinterpret_cast<const WordID*>(ant_contexts[ntc++]);
+ else
+ FireUnigramFeature(cur, features);
+ if (prev) FireBigramFeature(prev, cur, features);
+ prev = cur;
+ }
+ *res = prev;
+}
+
diff --git a/decoder/ff_source_path.h b/decoder/ff_source_path.h
new file mode 100644
index 00000000..03126412
--- /dev/null
+++ b/decoder/ff_source_path.h
@@ -0,0 +1,26 @@
+#ifndef _FF_SOURCE_PATH_H_
+#define _FF_SOURCE_PATH_H_
+
+#include <vector>
+#include <map>
+#include "ff.h"
+
+class SourcePathFeatures : public FeatureFunction {
+ public:
+ SourcePathFeatures(const std::string& param);
+ protected:
+ virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
+ const HG::Edge& edge,
+ const std::vector<const void*>& ant_contexts,
+ SparseVector<double>* features,
+ SparseVector<double>* estimated_features,
+ void* context) const;
+
+ private:
+ void FireBigramFeature(WordID prev, WordID cur, SparseVector<double>* features) const;
+ void FireUnigramFeature(WordID cur, SparseVector<double>* features) const;
+ mutable std::map<WordID, std::map<WordID, int> > bigram_fids;
+ mutable std::map<WordID, int> unigram_fids;
+};
+
+#endif