summaryrefslogtreecommitdiff
path: root/decoder
diff options
context:
space:
mode:
authorredpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-09-29 20:45:48 +0000
committerredpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-09-29 20:45:48 +0000
commitf412aaab3d10fb82b20a2190f2cb1424959c599a (patch)
tree1942e2a05777694cc81724f3206c8972813b4224 /decoder
parent7f56dd65ee706683444b012d0afcfff3e376bfff (diff)
another feature, another POS
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@664 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'decoder')
-rw-r--r--decoder/cdec_ff.cc1
-rw-r--r--decoder/ff.cc4
-rw-r--r--decoder/ff_wordalign.cc66
-rw-r--r--decoder/ff_wordalign.h20
4 files changed, 90 insertions, 1 deletions
diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc
index 84ba19fa..c0c595a5 100644
--- a/decoder/cdec_ff.cc
+++ b/decoder/cdec_ff.cc
@@ -46,6 +46,7 @@ void register_feature_functions() {
ff_registry.Register("Model2BinaryFeatures", new FFFactory<Model2BinaryFeatures>);
ff_registry.Register("MarkovJump", new FFFactory<MarkovJump>);
ff_registry.Register("MarkovJumpFClass", new FFFactory<MarkovJumpFClass>);
+ ff_registry.Register("SourceBigram", new FFFactory<SourceBigram>);
ff_registry.Register("SourcePOSBigram", new FFFactory<SourcePOSBigram>);
ff_registry.Register("BlunsomSynchronousParseHack", new FFFactory<BlunsomSynchronousParseHack>);
ff_registry.Register("AlignerResults", new FFFactory<AlignerResults>);
diff --git a/decoder/ff.cc b/decoder/ff.cc
index 7bdd21e3..a32c0dcb 100644
--- a/decoder/ff.cc
+++ b/decoder/ff.cc
@@ -171,7 +171,9 @@ void ModelSet::AddFeaturesToEdge(const SentenceMetadata& smeta,
prob_t* combination_cost_estimate) const {
edge->reset_info();
context->resize(state_size_);
- memset(&(*context)[0], 0, state_size_);
+ if (state_size_ > 0) {
+ memset(&(*context)[0], 0, state_size_);
+ }
SparseVector<double> est_vals; // only computed if combination_cost_estimate is non-NULL
if (combination_cost_estimate) *combination_cost_estimate = prob_t::One();
for (int i = 0; i < models_.size(); ++i) {
diff --git a/decoder/ff_wordalign.cc b/decoder/ff_wordalign.cc
index a1968159..da86b714 100644
--- a/decoder/ff_wordalign.cc
+++ b/decoder/ff_wordalign.cc
@@ -266,6 +266,72 @@ void MarkovJump::TraversalFeaturesImpl(const SentenceMetadata& smeta,
}
}
+// state: src word used, number of trg words generated
+SourceBigram::SourceBigram(const std::string& param) :
+ FeatureFunction(sizeof(WordID) + sizeof(int)) {
+}
+
+void SourceBigram::FinalTraversalFeatures(const void* context,
+ SparseVector<double>* features) const {
+ WordID left = *static_cast<const WordID*>(context);
+ int left_wc = *(static_cast<const int*>(context) + 1);
+ if (left_wc == 1)
+ FireFeature(-1, left, features);
+ FireFeature(left, -1, features);
+}
+
+void SourceBigram::FireFeature(WordID left,
+ WordID right,
+ SparseVector<double>* features) const {
+ int& fid = fmap_[left][right];
+ // TODO important important !!! escape strings !!!
+ if (!fid) {
+ ostringstream os;
+ os << "SB:";
+ if (left < 0) { os << "BOS"; } else { os << TD::Convert(left); }
+ os << '_';
+ if (right < 0) { os << "EOS"; } else { os << TD::Convert(right); }
+ fid = FD::Convert(os.str());
+ if (fid == 0) fid = -1;
+ }
+ if (fid > 0) features->set_value(fid, 1.0);
+ int& ufid = ufmap_[left];
+ if (!ufid) {
+ ostringstream os;
+ os << "SU:";
+ if (left < 0) { os << "BOS"; } else { os << TD::Convert(left); }
+ ufid = FD::Convert(os.str());
+ if (ufid == 0) fid = -1;
+ }
+ if (ufid > 0) features->set_value(ufid, 1.0);
+}
+
+void SourceBigram::TraversalFeaturesImpl(const SentenceMetadata& smeta,
+ const Hypergraph::Edge& edge,
+ const std::vector<const void*>& ant_contexts,
+ SparseVector<double>* features,
+ SparseVector<double>* /* estimated_features */,
+ void* context) const {
+ WordID& out_context = *static_cast<WordID*>(context);
+ int& out_word_count = *(static_cast<int*>(context) + 1);
+ const int arity = edge.Arity();
+ if (arity == 0) {
+ out_context = edge.rule_->f()[0];
+ out_word_count = edge.rule_->EWords();
+ assert(out_word_count == 1); // this is only defined for lex translation!
+ // revisit this if you want to translate into null words
+ } else if (arity == 2) {
+ WordID left = *static_cast<const WordID*>(ant_contexts[0]);
+ WordID right = *static_cast<const WordID*>(ant_contexts[1]);
+ int left_wc = *(static_cast<const int*>(ant_contexts[0]) + 1);
+ int right_wc = *(static_cast<const int*>(ant_contexts[0]) + 1);
+ if (left_wc == 1 && right_wc == 1)
+ FireFeature(-1, left, features);
+ FireFeature(left, right, features);
+ out_word_count = left_wc + right_wc;
+ out_context = right;
+ }
+}
// state: POS of src word used, number of trg words generated
SourcePOSBigram::SourcePOSBigram(const std::string& param) :
FeatureFunction(sizeof(WordID) + sizeof(int)) {
diff --git a/decoder/ff_wordalign.h b/decoder/ff_wordalign.h
index c44ad26b..ebbecfea 100644
--- a/decoder/ff_wordalign.h
+++ b/decoder/ff_wordalign.h
@@ -78,6 +78,26 @@ class MarkovJumpFClass : public FeatureFunction {
typedef std::map<WordID, int> Class2FID;
typedef std::map<WordID, Class2FID> Class2Class2FID;
+class SourceBigram : public FeatureFunction {
+ public:
+ SourceBigram(const std::string& param);
+ virtual void FinalTraversalFeatures(const void* context,
+ SparseVector<double>* features) const;
+ protected:
+ virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
+ const Hypergraph::Edge& edge,
+ const std::vector<const void*>& ant_contexts,
+ SparseVector<double>* features,
+ SparseVector<double>* estimated_features,
+ void* context) const;
+ private:
+ void FireFeature(WordID src,
+ WordID trg,
+ SparseVector<double>* features) const;
+ mutable Class2Class2FID fmap_;
+ mutable Class2FID ufmap_;
+};
+
class SourcePOSBigram : public FeatureFunction {
public:
SourcePOSBigram(const std::string& param);