diff options
author | Chris Dyer <cdyer@cs.cmu.edu> | 2011-09-13 18:46:33 +0100 |
---|---|---|
committer | Chris Dyer <cdyer@cs.cmu.edu> | 2011-09-13 18:46:33 +0100 |
commit | ddc38ce211d4b38f66e56dfa072856a4e9de2c17 (patch) | |
tree | 5b29232adc859781972975904dd54b9800e01b09 | |
parent | dffebff1a33e581a4a36ba060faf5a2ba8e87faa (diff) |
remove features that are overfitting
-rw-r--r-- | decoder/ff_source_syntax.cc | 25 |
1 files changed, 9 insertions, 16 deletions
diff --git a/decoder/ff_source_syntax.cc b/decoder/ff_source_syntax.cc index 5b7c16f6..ffe07f03 100644 --- a/decoder/ff_source_syntax.cc +++ b/decoder/ff_source_syntax.cc @@ -25,12 +25,10 @@ struct SourceSyntaxFeaturesImpl { void InitializeGrids(const string& tree, unsigned src_len) { assert(tree.size() > 0); - fids_cat.clear(); - fids_fonly.clear(); + //fids_cat.clear(); fids_ef.clear(); src_tree.clear(); - fids_cat.resize(src_len, src_len + 1); - fids_fonly.resize(src_len, src_len + 1); + //fids_cat.resize(src_len, src_len + 1); fids_ef.resize(src_len, src_len + 1); src_tree.resize(src_len, src_len + 1, TD::Convert("XX")); ParseTreeString(tree, src_len); @@ -89,15 +87,14 @@ struct SourceSyntaxFeaturesImpl { WordID FireFeatures(const TRule& rule, const int i, const int j, const WordID* ants, SparseVector<double>* feats) { //cerr << "fire features: " << rule.AsString() << " for " << i << "," << j << endl; const WordID lhs = src_tree(i,j); - int& fid_cat = fids_cat(i,j); - int& fid_fonly = fids_fonly(i,j)[&rule]; + //int& fid_cat = fids_cat(i,j); int& fid_ef = fids_ef(i,j)[&rule]; if (fid_ef <= 0) { ostringstream os; - ostringstream os2; + //ostringstream os2; os << "SYN:" << TD::Convert(lhs); - os2 << "SYN:" << TD::Convert(lhs) << '_' << SpanSizeTransform(j - i); - fid_cat = FD::Convert(os2.str()); + //os2 << "SYN:" << TD::Convert(lhs) << '_' << SpanSizeTransform(j - i); + //fid_cat = FD::Convert(os2.str()); os << ':'; unsigned ntc = 0; for (unsigned k = 0; k < rule.f_.size(); ++k) { @@ -109,7 +106,6 @@ struct SourceSyntaxFeaturesImpl { os << TD::Convert(fj); } } - fid_fonly = FD::Convert(os.str()); os << ':'; for (unsigned k = 0; k < rule.e_.size(); ++k) { const int ei = rule.e_[k]; @@ -121,18 +117,15 @@ struct SourceSyntaxFeaturesImpl { } fid_ef = FD::Convert(os.str()); } - if (fid_cat > 0) - feats->set_value(fid_cat, 1.0); - if (fid_fonly > 0) - feats->set_value(fid_fonly, 1.0); + //if (fid_cat > 0) + // feats->set_value(fid_cat, 1.0); if (fid_ef > 0) feats->set_value(fid_ef, 1.0); return lhs; } Array2D<WordID> src_tree; // src_tree(i,j) NT = type - mutable Array2D<int> fids_cat; // fires for an LHS match - mutable Array2D<map<const TRule*, int> > fids_fonly; // fires for an f-string + // mutable Array2D<int> fids_cat; // this tends to overfit baddly mutable Array2D<map<const TRule*, int> > fids_ef; // fires for fully lexicalized }; |