summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2013-11-03 21:56:06 +0100
committerPatrick Simianer <p@simianer.de>2013-11-03 21:56:06 +0100
commita9171fa0aa0ad6d7611fe079ecee464bc5f78231 (patch)
tree340dd0d409a8ac21dc4e886fbf8797bf42aecc85
parent12577135f7504a3909111479c9053410bfed8354 (diff)
cleaned up parsematch features
-rw-r--r--decoder/ff_parse_match.cc17
-rw-r--r--decoder/ff_parse_match.h1
2 files changed, 11 insertions, 7 deletions
diff --git a/decoder/ff_parse_match.cc b/decoder/ff_parse_match.cc
index ed556b91..94634b27 100644
--- a/decoder/ff_parse_match.cc
+++ b/decoder/ff_parse_match.cc
@@ -13,6 +13,10 @@ using namespace std;
// implements the parse match features as described in Vilar et al. (2008)
// source trees must be represented in Penn Treebank format, e.g.
// (S (NP John) (VP (V left)))
+//
+// Annotate source sentences with <seg id="..." grammar="..." src_tree="(S ...)">...</seg>"
+// Note: You need to escape quite a lot of stuff in all your models!
+//
struct ParseMatchFeaturesImpl {
ParseMatchFeaturesImpl(const string& param) {
@@ -42,10 +46,8 @@ struct ParseMatchFeaturesImpl {
void InitializeGrids(const string& tree, unsigned src_len) {
assert(tree.size() > 0);
- //fids_cat.clear();
fids_ef.clear();
src_tree.clear();
- //fids_cat.resize(src_len, src_len + 1);
fids_ef.resize(src_len, src_len + 1);
src_tree.resize(src_len, src_len + 1, TD::Convert("XX"));
ParseTreeString(tree, src_len);
@@ -112,7 +114,7 @@ struct ParseMatchFeaturesImpl {
int fid_ef = FD::Convert("PM");
int min_dist; // minimal distance to next syntactic constituent of this rule's LHS
int summed_min_dists; // minimal distances of LHS and NTs summed up
- if (TD::Convert(lhs).compare("XX") != 0)
+ if (TD::Convert(lhs).compare("XX") != 0)
min_dist= 0;
// compute the distance to the next syntactical constituent
else {
@@ -131,7 +133,7 @@ struct ParseMatchFeaturesImpl {
ok = 1;
break;
}
- // check if removing k words from the rule span will
+ // check if removing k words from the rule span will
// lead to a syntactical constituent
else {
//cerr << "Hilfe...!" << endl;
@@ -144,7 +146,7 @@ struct ParseMatchFeaturesImpl {
ok = 1;
break;
}
- }
+ }
}
if (ok) break;
}
@@ -183,9 +185,9 @@ struct ParseMatchFeaturesImpl {
return min_dist;
}
- Array2D<WordID> src_tree; // src_tree(i,j) NT = type
+ Array2D<WordID> src_tree; // src_tree(i,j) NT = type
unsigned int src_sent_len;
- mutable Array2D<map<const TRule*, int> > fids_ef; // fires for fully lexicalized
+ mutable Array2D<map<const TRule*, int> > fids_ef; // fires for fully lexicalized
int scoring_method;
};
@@ -216,3 +218,4 @@ void ParseMatchFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,
void ParseMatchFeatures::PrepareForInput(const SentenceMetadata& smeta) {
impl->InitializeGrids(smeta.GetSGMLValue("src_tree"), smeta.GetSourceLength());
}
+
diff --git a/decoder/ff_parse_match.h b/decoder/ff_parse_match.h
index fa73481a..7820b418 100644
--- a/decoder/ff_parse_match.h
+++ b/decoder/ff_parse_match.h
@@ -23,3 +23,4 @@ class ParseMatchFeatures : public FeatureFunction {
};
#endif
+