summaryrefslogtreecommitdiff
path: root/decoder
diff options
context:
space:
mode:
Diffstat (limited to 'decoder')
-rw-r--r--decoder/Makefile.am1
-rw-r--r--decoder/cdec_ff.cc3
-rw-r--r--decoder/ff_lexical.h128
-rw-r--r--decoder/ff_rules.cc22
-rw-r--r--decoder/ff_rules.h13
5 files changed, 131 insertions, 36 deletions
diff --git a/decoder/Makefile.am b/decoder/Makefile.am
index c85f17ed..8e61c13e 100644
--- a/decoder/Makefile.am
+++ b/decoder/Makefile.am
@@ -50,6 +50,7 @@ libcdec_a_SOURCES = \
ff_external.h \
ff_factory.h \
ff_klm.h \
+ ff_lexical.h \
ff_lm.h \
ff_ngrams.h \
ff_parse_match.h \
diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc
index 0411908f..7f7e075b 100644
--- a/decoder/cdec_ff.cc
+++ b/decoder/cdec_ff.cc
@@ -24,6 +24,7 @@
#include "ff_charset.h"
#include "ff_wordset.h"
#include "ff_external.h"
+#include "ff_lexical.h"
void register_feature_functions() {
@@ -39,13 +40,13 @@ void register_feature_functions() {
RegisterFF<SourceWordPenalty>();
RegisterFF<ArityPenalty>();
RegisterFF<BLEUModel>();
+ RegisterFF<LexicalFeatures>();
//TODO: use for all features the new Register which requires static FF::usage(false,false) give name
ff_registry.Register("SpanFeatures", new FFFactory<SpanFeatures>());
ff_registry.Register("NgramFeatures", new FFFactory<NgramDetector>());
ff_registry.Register("RuleContextFeatures", new FFFactory<RuleContextFeatures>());
ff_registry.Register("RuleIdentityFeatures", new FFFactory<RuleIdentityFeatures>());
- ff_registry.Register("RuleWordAlignmentFeatures", new FFFactory<RuleWordAlignmentFeatures>());
ff_registry.Register("ParseMatchFeatures", new FFFactory<ParseMatchFeatures>);
ff_registry.Register("SoftSyntaxFeatures", new FFFactory<SoftSyntaxFeatures>);
ff_registry.Register("SoftSyntaxFeaturesMindist", new FFFactory<SoftSyntaxFeaturesMindist>);
diff --git a/decoder/ff_lexical.h b/decoder/ff_lexical.h
new file mode 100644
index 00000000..21c85b27
--- /dev/null
+++ b/decoder/ff_lexical.h
@@ -0,0 +1,128 @@
+#ifndef FF_LEXICAL_H_
+#define FF_LEXICAL_H_
+
+#include <vector>
+#include <map>
+#include "trule.h"
+#include "ff.h"
+#include "hg.h"
+#include "array2d.h"
+#include "wordid.h"
+#include <sstream>
+#include <cassert>
+#include <cmath>
+
+#include "filelib.h"
+#include "stringlib.h"
+#include "sentence_metadata.h"
+#include "lattice.h"
+#include "fdict.h"
+#include "verbose.h"
+#include "tdict.h"
+#include "hg.h"
+
+using namespace std;
+
+namespace {
+ string Escape(const string& x) {
+ string y = x;
+ for (int i = 0; i < y.size(); ++i) {
+ if (y[i] == '=') y[i]='_';
+ if (y[i] == ';') y[i]='_';
+ }
+ return y;
+ }
+}
+
+class LexicalFeatures : public FeatureFunction {
+public:
+ LexicalFeatures(const std::string& param) {
+ if (param.empty()) {
+ cerr << "LexicalFeatures: using T,D,I\n";
+ T_ = true; I_ = true; D_ = true;
+ } else {
+ const vector<string> argv = SplitOnWhitespace(param);
+ assert(argv.size() == 3);
+ T_ = (bool) atoi(argv[0].c_str());
+ I_ = (bool) atoi(argv[1].c_str());
+ D_ = (bool) atoi(argv[2].c_str());
+ cerr << "T=" << T_ << " I=" << I_ << " D=" << D_ << endl;
+ }
+ };
+ static std::string usage(bool p,bool d) {
+ return usage_helper("LexicalFeatures","[0/1 0/1 0/1]","Sparse lexical word translation indicator features. If arguments are supplied, specify like this: translations insertions deletions",p,d);
+ }
+protected:
+ virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
+ const HG::Edge& edge,
+ const std::vector<const void*>& ant_contexts,
+ SparseVector<double>* features,
+ SparseVector<double>* estimated_features,
+ void* context) const;
+ virtual void PrepareForInput(const SentenceMetadata& smeta);
+private:
+ mutable std::map<const TRule*, SparseVector<double> > rule2feats_;
+ bool T_;
+ bool I_;
+ bool D_;
+};
+
+void LexicalFeatures::PrepareForInput(const SentenceMetadata& smeta) {
+ rule2feats_.clear(); // std::map<const TRule*, SparseVector<double> >
+}
+
+void LexicalFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,
+ const HG::Edge& edge,
+ const std::vector<const void*>& ant_contexts,
+ SparseVector<double>* features,
+ SparseVector<double>* estimated_features,
+ void* context) const {
+
+ map<const TRule*, SparseVector<double> >::iterator it = rule2feats_.find(edge.rule_.get());
+ if (it == rule2feats_.end()) {
+ const TRule& rule = *edge.rule_;
+ it = rule2feats_.insert(make_pair(&rule, SparseVector<double>())).first;
+ SparseVector<double>& f = it->second;
+ std::vector<bool> sf(edge.rule_->FLength(),false); // stores if source tokens are visited by alignment points
+ std::vector<bool> se(edge.rule_->ELength(),false); // stores if target tokens are visited by alignment points
+ int fid = 0;
+ // translations
+ for (unsigned i=0;i<rule.a_.size();++i) {
+ const AlignmentPoint& ap = rule.a_[i];
+ sf[ap.s_] = true; // mark index as seen
+ se[ap.t_] = true; // mark index as seen
+ ostringstream os;
+ os << "LT:" << Escape(TD::Convert(rule.f_[ap.s_])) << ":" << Escape(TD::Convert(rule.e_[ap.t_]));
+ fid = FD::Convert(os.str());
+ if (fid <= 0) continue;
+ if (T_)
+ f.add_value(fid, 1.0);
+ }
+ // word deletions
+ for (unsigned i=0;i<sf.size();++i) {
+ if (!sf[i] && rule.f_[i] > 0) {// if not visited and is terminal
+ ostringstream os;
+ os << "LD:" << Escape(TD::Convert(rule.f_[i]));
+ fid = FD::Convert(os.str());
+ if (fid <= 0) continue;
+ if (D_)
+ f.add_value(fid, 1.0);
+ }
+ }
+ // word insertions
+ for (unsigned i=0;i<se.size();++i) {
+ if (!se[i] && rule.e_[i] >= 1) {// if not visited and is terminal
+ ostringstream os;
+ os << "LI:" << Escape(TD::Convert(rule.e_[i]));
+ fid = FD::Convert(os.str());
+ if (fid <= 0) continue;
+ if (I_)
+ f.add_value(fid, 1.0);
+ }
+ }
+ }
+ (*features) += it->second;
+}
+
+
+#endif
diff --git a/decoder/ff_rules.cc b/decoder/ff_rules.cc
index 7bccf084..9533caed 100644
--- a/decoder/ff_rules.cc
+++ b/decoder/ff_rules.cc
@@ -69,28 +69,6 @@ void RuleIdentityFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,
features->add_value(it->second, 1);
}
-RuleWordAlignmentFeatures::RuleWordAlignmentFeatures(const std::string& param) {
-}
-
-void RuleWordAlignmentFeatures::PrepareForInput(const SentenceMetadata& smeta) {
-}
-
-void RuleWordAlignmentFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,
- const Hypergraph::Edge& edge,
- const vector<const void*>& ant_contexts,
- SparseVector<double>* features,
- SparseVector<double>* estimated_features,
- void* context) const {
- const TRule& rule = *edge.rule_;
- ostringstream os;
- vector<AlignmentPoint> als = rule.als();
- std::vector<AlignmentPoint>::const_iterator xx = als.begin();
- for (; xx != als.end(); ++xx) {
- os << "WA:" << TD::Convert(rule.f_[xx->s_]) << ":" << TD::Convert(rule.e_[xx->t_]);
- }
- features->add_value(FD::Convert(Escape(os.str())), 1);
-}
-
RuleSourceBigramFeatures::RuleSourceBigramFeatures(const std::string& param) {
}
diff --git a/decoder/ff_rules.h b/decoder/ff_rules.h
index 324d7a39..f210dc65 100644
--- a/decoder/ff_rules.h
+++ b/decoder/ff_rules.h
@@ -24,19 +24,6 @@ class RuleIdentityFeatures : public FeatureFunction {
mutable std::map<const TRule*, int> rule2_fid_;
};
-class RuleWordAlignmentFeatures : public FeatureFunction {
- public:
- RuleWordAlignmentFeatures(const std::string& param);
- protected:
- virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
- const HG::Edge& edge,
- const std::vector<const void*>& ant_contexts,
- SparseVector<double>* features,
- SparseVector<double>* estimated_features,
- void* context) const;
- virtual void PrepareForInput(const SentenceMetadata& smeta);
-};
-
class RuleSourceBigramFeatures : public FeatureFunction {
public:
RuleSourceBigramFeatures(const std::string& param);