summaryrefslogtreecommitdiff
path: root/decoder/ff_lexical.h
blob: 21c85b27511eb438dbfb4523332eab934be22b20 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#ifndef FF_LEXICAL_H_
#define FF_LEXICAL_H_

#include <vector>
#include <map>
#include "trule.h"
#include "ff.h"
#include "hg.h"
#include "array2d.h"
#include "wordid.h"
#include <sstream>
#include <cassert>
#include <cmath>

#include "filelib.h"
#include "stringlib.h"
#include "sentence_metadata.h"
#include "lattice.h"
#include "fdict.h"
#include "verbose.h"
#include "tdict.h"
#include "hg.h"

using namespace std;

namespace {
  string Escape(const string& x) {
    string y = x;
    for (int i = 0; i < y.size(); ++i) {
      if (y[i] == '=') y[i]='_';
      if (y[i] == ';') y[i]='_';
    }
    return y;
  }
}

class LexicalFeatures : public FeatureFunction {
public:
	LexicalFeatures(const std::string& param) {
		if (param.empty()) {
			cerr << "LexicalFeatures: using T,D,I\n";
			T_ = true; I_ = true; D_ = true;
		} else {
			const vector<string> argv = SplitOnWhitespace(param);
			assert(argv.size() == 3);
			T_ = (bool) atoi(argv[0].c_str());
			I_ = (bool) atoi(argv[1].c_str());
			D_ = (bool) atoi(argv[2].c_str());
			cerr << "T=" << T_ << " I=" << I_ << " D=" << D_ << endl;
		}
	};
	static std::string usage(bool p,bool d) {
	    return usage_helper("LexicalFeatures","[0/1 0/1 0/1]","Sparse lexical word translation indicator features. If arguments are supplied, specify like this: translations insertions deletions",p,d);
	}
protected:
	virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
			const HG::Edge& edge,
			const std::vector<const void*>& ant_contexts,
			SparseVector<double>* features,
			SparseVector<double>* estimated_features,
			void* context) const;
	virtual void PrepareForInput(const SentenceMetadata& smeta);
private:
	mutable std::map<const TRule*, SparseVector<double> > rule2feats_;
	bool T_;
	bool I_;
	bool D_;
};

void LexicalFeatures::PrepareForInput(const SentenceMetadata& smeta) {
  rule2feats_.clear(); //  std::map<const TRule*, SparseVector<double> >
}

void LexicalFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,
	const HG::Edge& edge,
	const std::vector<const void*>& ant_contexts,
	SparseVector<double>* features,
	SparseVector<double>* estimated_features,
	void* context) const {
	
	map<const TRule*, SparseVector<double> >::iterator it = rule2feats_.find(edge.rule_.get());	
	if (it == rule2feats_.end()) {
		const TRule& rule = *edge.rule_;
	    it = rule2feats_.insert(make_pair(&rule, SparseVector<double>())).first;
	    SparseVector<double>& f = it->second;
	    std::vector<bool> sf(edge.rule_->FLength(),false); // stores if source tokens are visited by alignment points
		std::vector<bool> se(edge.rule_->ELength(),false); // stores if target tokens are visited by alignment points
		int fid = 0;
	    // translations
	    for (unsigned i=0;i<rule.a_.size();++i) {
	    	const AlignmentPoint& ap = rule.a_[i];
	    	sf[ap.s_] = true; // mark index as seen
	    	se[ap.t_] = true; // mark index as seen
	    	ostringstream os;
			os << "LT:" << Escape(TD::Convert(rule.f_[ap.s_])) << ":" << Escape(TD::Convert(rule.e_[ap.t_]));
			fid = FD::Convert(os.str());
			if (fid <= 0) continue;
			if (T_)
				f.add_value(fid, 1.0);
	    }
	    // word deletions
	    for (unsigned i=0;i<sf.size();++i) {
	    	if (!sf[i] && rule.f_[i] > 0) {// if not visited and is terminal
	    		ostringstream os;
	    		os << "LD:" << Escape(TD::Convert(rule.f_[i]));
	    		fid = FD::Convert(os.str());
	    		if (fid <= 0) continue;
	    		if (D_)
		    		f.add_value(fid, 1.0);
	    	}
	    }
	    // word insertions
	    for (unsigned i=0;i<se.size();++i) {
	    	if (!se[i] && rule.e_[i] >= 1) {// if not visited and is terminal
	    		ostringstream os;
	    		os << "LI:" << Escape(TD::Convert(rule.e_[i]));
	    		fid = FD::Convert(os.str());
	    		if (fid <= 0) continue;
	    		if (I_)
		    		f.add_value(fid, 1.0);
	    	}
	    }
	}
	(*features) += it->second;
}


#endif