diff options
author | Patrick Simianer <p@simianer.de> | 2013-11-03 21:56:06 +0100 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2013-11-03 21:56:06 +0100 |
commit | a9171fa0aa0ad6d7611fe079ecee464bc5f78231 (patch) | |
tree | 340dd0d409a8ac21dc4e886fbf8797bf42aecc85 /decoder | |
parent | 12577135f7504a3909111479c9053410bfed8354 (diff) |
cleaned up parsematch features
Diffstat (limited to 'decoder')
-rw-r--r-- | decoder/ff_parse_match.cc | 17 | ||||
-rw-r--r-- | decoder/ff_parse_match.h | 1 |
2 files changed, 11 insertions, 7 deletions
diff --git a/decoder/ff_parse_match.cc b/decoder/ff_parse_match.cc index ed556b91..94634b27 100644 --- a/decoder/ff_parse_match.cc +++ b/decoder/ff_parse_match.cc @@ -13,6 +13,10 @@ using namespace std; // implements the parse match features as described in Vilar et al. (2008) // source trees must be represented in Penn Treebank format, e.g. // (S (NP John) (VP (V left))) +// +// Annotate source sentences with <seg id="..." grammar="..." src_tree="(S ...)">...</seg>" +// Note: You need to escape quite a lot of stuff in all your models! +// struct ParseMatchFeaturesImpl { ParseMatchFeaturesImpl(const string& param) { @@ -42,10 +46,8 @@ struct ParseMatchFeaturesImpl { void InitializeGrids(const string& tree, unsigned src_len) { assert(tree.size() > 0); - //fids_cat.clear(); fids_ef.clear(); src_tree.clear(); - //fids_cat.resize(src_len, src_len + 1); fids_ef.resize(src_len, src_len + 1); src_tree.resize(src_len, src_len + 1, TD::Convert("XX")); ParseTreeString(tree, src_len); @@ -112,7 +114,7 @@ struct ParseMatchFeaturesImpl { int fid_ef = FD::Convert("PM"); int min_dist; // minimal distance to next syntactic constituent of this rule's LHS int summed_min_dists; // minimal distances of LHS and NTs summed up - if (TD::Convert(lhs).compare("XX") != 0) + if (TD::Convert(lhs).compare("XX") != 0) min_dist= 0; // compute the distance to the next syntactical constituent else { @@ -131,7 +133,7 @@ struct ParseMatchFeaturesImpl { ok = 1; break; } - // check if removing k words from the rule span will + // check if removing k words from the rule span will // lead to a syntactical constituent else { //cerr << "Hilfe...!" << endl; @@ -144,7 +146,7 @@ struct ParseMatchFeaturesImpl { ok = 1; break; } - } + } } if (ok) break; } @@ -183,9 +185,9 @@ struct ParseMatchFeaturesImpl { return min_dist; } - Array2D<WordID> src_tree; // src_tree(i,j) NT = type + Array2D<WordID> src_tree; // src_tree(i,j) NT = type unsigned int src_sent_len; - mutable Array2D<map<const TRule*, int> > fids_ef; // fires for fully lexicalized + mutable Array2D<map<const TRule*, int> > fids_ef; // fires for fully lexicalized int scoring_method; }; @@ -216,3 +218,4 @@ void ParseMatchFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta, void ParseMatchFeatures::PrepareForInput(const SentenceMetadata& smeta) { impl->InitializeGrids(smeta.GetSGMLValue("src_tree"), smeta.GetSourceLength()); } + diff --git a/decoder/ff_parse_match.h b/decoder/ff_parse_match.h index fa73481a..7820b418 100644 --- a/decoder/ff_parse_match.h +++ b/decoder/ff_parse_match.h @@ -23,3 +23,4 @@ class ParseMatchFeatures : public FeatureFunction { }; #endif + |