summaryrefslogtreecommitdiff
path: root/decoder/ff_soft_syntax.cc
diff options
context:
space:
mode:
authorChris Dyer <redpony@gmail.com>2013-11-13 11:22:24 -0800
committerChris Dyer <redpony@gmail.com>2013-11-13 11:22:24 -0800
commit1b39e848903743990ca16e2323235b31db20178c (patch)
tree19fceda02d09c9d4990aba692ed97193020d1e86 /decoder/ff_soft_syntax.cc
parentf83186887c94b2ff8b17aefcd0b395f116c09eb6 (diff)
parent4c7d24c9357f500839f04c7c8a8cfa0472801e18 (diff)
Merge pull request #27 from pks/master
Tidying (soft) syntax features; loo for C++ extractor; updates for dtrain
Diffstat (limited to 'decoder/ff_soft_syntax.cc')
-rw-r--r--decoder/ff_soft_syntax.cc49
1 files changed, 25 insertions, 24 deletions
diff --git a/decoder/ff_soft_syntax.cc b/decoder/ff_soft_syntax.cc
index 9981fa45..23fe87bd 100644
--- a/decoder/ff_soft_syntax.cc
+++ b/decoder/ff_soft_syntax.cc
@@ -13,16 +13,15 @@
using namespace std;
-// Implements the soft syntactic features described in
+// Implements the soft syntactic features described in
// Marton and Resnik (2008): "Soft Syntacitc Constraints for Hierarchical Phrase-Based Translation".
// Source trees must be represented in Penn Treebank format,
// e.g. (S (NP John) (VP (V left))).
-struct SoftSyntacticFeaturesImpl {
- SoftSyntacticFeaturesImpl(const string& param) {
+struct SoftSyntaxFeaturesImpl {
+ SoftSyntaxFeaturesImpl(const string& param) {
vector<string> labels = SplitOnWhitespace(param);
- for (unsigned int i = 0; i < labels.size(); i++)
- //cerr << "Labels: " << labels.at(i) << endl;
+ //for (unsigned int i = 0; i < labels.size(); i++) { cerr << "Labels: " << labels.at(i) << endl; }
for (unsigned int i = 0; i < labels.size(); i++) {
string label = labels.at(i);
pair<string, string> feat_label;
@@ -34,10 +33,8 @@ struct SoftSyntacticFeaturesImpl {
void InitializeGrids(const string& tree, unsigned src_len) {
assert(tree.size() > 0);
- //fids_cat.clear();
fids_ef.clear();
src_tree.clear();
- //fids_cat.resize(src_len, src_len + 1);
fids_ef.resize(src_len, src_len + 1);
src_tree.resize(src_len, src_len + 1, TD::Convert("XX"));
ParseTreeString(tree, src_len);
@@ -99,7 +96,7 @@ struct SoftSyntacticFeaturesImpl {
const WordID lhs = src_tree(i,j);
string lhs_str = TD::Convert(lhs);
//cerr << "LHS: " << lhs_str << " from " << i << " to " << j << endl;
- //cerr << "RULE :"<< rule << endl;
+ //cerr << "RULE :"<< rule << endl;
int& fid_ef = fids_ef(i,j)[&rule];
for (unsigned int i = 0; i < feat_labels.size(); i++) {
ostringstream os;
@@ -110,10 +107,10 @@ struct SoftSyntacticFeaturesImpl {
switch(feat_type) {
case '2':
if (lhs_str.compare(label) == 0) {
- os << "SYN:" << label << "_conform";
+ os << "SOFT:" << label << "_conform";
}
else {
- os << "SYN:" << label << "_cross";
+ os << "SOFT:" << label << "_cross";
}
fid_ef = FD::Convert(os.str());
if (fid_ef > 0) {
@@ -122,11 +119,11 @@ struct SoftSyntacticFeaturesImpl {
}
break;
case '_':
- os << "SYN:" << label;
+ os << "SOFT:" << label;
fid_ef = FD::Convert(os.str());
if (lhs_str.compare(label) == 0) {
if (fid_ef > 0) {
- //cerr << "Feature: " << os.str() << endl;
+ //cerr << "Feature: " << os.str() << endl;
feats->set_value(fid_ef, 1.0);
}
}
@@ -139,7 +136,7 @@ struct SoftSyntacticFeaturesImpl {
break;
case '+':
if (lhs_str.compare(label) == 0) {
- os << "SYN:" << label << "_conform";
+ os << "SOFT:" << label << "_conform";
fid_ef = FD::Convert(os.str());
if (fid_ef > 0) {
//cerr << "Feature: " << os.str() << endl;
@@ -147,10 +144,10 @@ struct SoftSyntacticFeaturesImpl {
}
}
break;
- case '-':
- //cerr << "-" << endl;
+ case '-':
+ //cerr << "-" << endl;
if (lhs_str.compare(label) != 0) {
- os << "SYN:" << label << "_cross";
+ os << "SOFT:" << label << "_cross";
fid_ef = FD::Convert(os.str());
if (fid_ef > 0) {
//cerr << "Feature :" << os.str() << endl;
@@ -167,22 +164,22 @@ struct SoftSyntacticFeaturesImpl {
return lhs;
}
- Array2D<WordID> src_tree; // src_tree(i,j) NT = type
- mutable Array2D<map<const TRule*, int> > fids_ef; // fires for fully lexicalized
+ Array2D<WordID> src_tree; // src_tree(i,j) NT = type
+ mutable Array2D<map<const TRule*, int> > fids_ef; // fires for fully lexicalized
vector<pair<string, string> > feat_labels;
};
-SoftSyntacticFeatures::SoftSyntacticFeatures(const string& param) :
+SoftSyntaxFeatures::SoftSyntaxFeatures(const string& param) :
FeatureFunction(sizeof(WordID)) {
- impl = new SoftSyntacticFeaturesImpl(param);
+ impl = new SoftSyntaxFeaturesImpl(param);
}
-SoftSyntacticFeatures::~SoftSyntacticFeatures() {
+SoftSyntaxFeatures::~SoftSyntaxFeatures() {
delete impl;
impl = NULL;
}
-void SoftSyntacticFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,
+void SoftSyntaxFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,
const Hypergraph::Edge& edge,
const vector<const void*>& ant_contexts,
SparseVector<double>* features,
@@ -196,6 +193,10 @@ void SoftSyntacticFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,
impl->FireFeatures(*edge.rule_, edge.i_, edge.j_, ants, features);
}
-void SoftSyntacticFeatures::PrepareForInput(const SentenceMetadata& smeta) {
- impl->InitializeGrids(smeta.GetSGMLValue("src_tree"), smeta.GetSourceLength());
+void SoftSyntaxFeatures::PrepareForInput(const SentenceMetadata& smeta) {
+ ReadFile f = ReadFile(smeta.GetSGMLValue("src_tree"));
+ string tree;
+ f.ReadAll(tree);
+ impl->InitializeGrids(tree, smeta.GetSourceLength());
}
+