summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Dyer <redpony@gmail.com>2010-01-27 12:34:28 +0000
committerChris Dyer <redpony@gmail.com>2010-01-27 12:34:28 +0000
commitee4383b3bc67e2d8ce113fce716050dc2e1b8572 (patch)
treeaa4b9fbc1f6e397352a54ae98f70b26a8f91040d
parent9e7d60da4421074d279a91cb6e4e67438add4645 (diff)
add binary model2-like features
-rw-r--r--decoder/cdec_ff.cc1
-rw-r--r--decoder/ff_wordalign.cc37
-rw-r--r--decoder/ff_wordalign.h16
3 files changed, 54 insertions, 0 deletions
diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc
index 437de428..34499398 100644
--- a/decoder/cdec_ff.cc
+++ b/decoder/cdec_ff.cc
@@ -14,6 +14,7 @@ void register_feature_functions() {
global_ff_registry->Register("WordPenalty", new FFFactory<WordPenalty>);
global_ff_registry->Register("SourceWordPenalty", new FFFactory<SourceWordPenalty>);
global_ff_registry->Register("RelativeSentencePosition", new FFFactory<RelativeSentencePosition>);
+ global_ff_registry->Register("Model2BinaryFeatures", new FFFactory<Model2BinaryFeatures>);
global_ff_registry->Register("MarkovJump", new FFFactory<MarkovJump>);
global_ff_registry->Register("SourcePOSBigram", new FFFactory<SourcePOSBigram>);
global_ff_registry->Register("BlunsomSynchronousParseHack", new FFFactory<BlunsomSynchronousParseHack>);
diff --git a/decoder/ff_wordalign.cc b/decoder/ff_wordalign.cc
index c9f90541..c1b66a5e 100644
--- a/decoder/ff_wordalign.cc
+++ b/decoder/ff_wordalign.cc
@@ -12,8 +12,45 @@
#include "tdict.h" // Blunsom hack
#include "filelib.h" // Blunsom hack
+static const size_t MAX_SENTENCE_SIZE = 100;
+
using namespace std;
+Model2BinaryFeatures::Model2BinaryFeatures(const string& param) :
+ fids_(boost::extents[MAX_SENTENCE_SIZE][MAX_SENTENCE_SIZE][MAX_SENTENCE_SIZE]) {
+ for (int i = 0; i < MAX_SENTENCE_SIZE; ++i) {
+ for (int j = 0; j < MAX_SENTENCE_SIZE; ++j) {
+ for (int k = 0; k < MAX_SENTENCE_SIZE; ++k) {
+ int& val = fids_[i][j][k];
+ val = -1;
+ if (j < i) {
+ ostringstream os;
+ os << "M2_" << i << '_' << j << ':' << k;
+ val = FD::Convert(os.str());
+ }
+ }
+ }
+ }
+}
+
+void Model2BinaryFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,
+ const Hypergraph::Edge& edge,
+ const vector<const void*>& ant_states,
+ SparseVector<double>* features,
+ SparseVector<double>* estimated_features,
+ void* state) const {
+ // if the source word is either null or the generated word
+ // has no position in the reference
+ if (edge.i_ == -1 || edge.prev_i_ == -1)
+ return;
+
+ assert(smeta.GetTargetLength() > 0);
+ const int fid = fids_[smeta.GetSourceLength()][edge.i_][edge.prev_i_];
+ features->set_value(fid, 1.0);
+// cerr << f_len_ << " " << e_len_ << " [" << edge.i_ << "," << edge.j_ << "|" << edge.prev_i_ << "," << edge.prev_j_ << "]\t" << edge.rule_->AsString() << "\tVAL=" << val << endl;
+}
+
+
RelativeSentencePosition::RelativeSentencePosition(const string& param) :
fid_(FD::Convert("RelativeSentencePosition")) {
if (!param.empty()) {
diff --git a/decoder/ff_wordalign.h b/decoder/ff_wordalign.h
index aea4c950..582e8c9f 100644
--- a/decoder/ff_wordalign.h
+++ b/decoder/ff_wordalign.h
@@ -4,6 +4,8 @@
#include "ff.h"
#include "array2d.h"
+#include <boost/multi_array.hpp>
+
class RelativeSentencePosition : public FeatureFunction {
public:
RelativeSentencePosition(const std::string& param);
@@ -20,6 +22,20 @@ class RelativeSentencePosition : public FeatureFunction {
std::string template_;
};
+class Model2BinaryFeatures : public FeatureFunction {
+ public:
+ Model2BinaryFeatures(const std::string& param);
+ protected:
+ virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
+ const Hypergraph::Edge& edge,
+ const std::vector<const void*>& ant_contexts,
+ SparseVector<double>* features,
+ SparseVector<double>* estimated_features,
+ void* out_context) const;
+ private:
+ boost::multi_array<int, 3> fids_;
+};
+
class MarkovJump : public FeatureFunction {
public:
MarkovJump(const std::string& param);