diff options
author | Paul Baltescu <pauldb89@gmail.com> | 2013-11-23 17:33:47 +0000 |
---|---|---|
committer | Paul Baltescu <pauldb89@gmail.com> | 2013-11-23 17:33:47 +0000 |
commit | cc6313b23cac25eb05976b6cf64f96faf1ed4163 (patch) | |
tree | 3dc28060ad25b43773e875bea7388ab1cefcd927 /decoder/ff_source_syntax.cc | |
parent | 7990c750829af93f0a1e0fc14534582f52ee9e8c (diff) | |
parent | f2fb69b10a897e8beb4e6e6d6cbb4327096235ef (diff) |
Merge branch 'master' of https://github.com/redpony/cdec
Diffstat (limited to 'decoder/ff_source_syntax.cc')
-rw-r--r-- | decoder/ff_source_syntax.cc | 49 |
1 files changed, 37 insertions, 12 deletions
diff --git a/decoder/ff_source_syntax.cc b/decoder/ff_source_syntax.cc index a1997695..6b183863 100644 --- a/decoder/ff_source_syntax.cc +++ b/decoder/ff_source_syntax.cc @@ -2,8 +2,13 @@ #include <sstream> #include <stack> +#ifndef HAVE_OLD_CPP +# include <unordered_set> +#else +# include <tr1/unordered_set> +namespace std { using std::tr1::unordered_set; } +#endif -#include "hg.h" #include "sentence_metadata.h" #include "array2d.h" #include "filelib.h" @@ -24,6 +29,17 @@ inline int SpanSizeTransform(unsigned span_size) { struct SourceSyntaxFeaturesImpl { SourceSyntaxFeaturesImpl() {} + SourceSyntaxFeaturesImpl(const string& param) { + if (!(param.compare("") == 0)) { + string triggered_features_fn = param; + ReadFile triggered_features(triggered_features_fn); + string in; + while(getline(*triggered_features, in)) { + feature_filter.insert(FD::Convert(in)); + } + } + } + void InitializeGrids(const string& tree, unsigned src_len) { assert(tree.size() > 0); //fids_cat.clear(); @@ -93,7 +109,7 @@ struct SourceSyntaxFeaturesImpl { if (fid_ef <= 0) { ostringstream os; //ostringstream os2; - os << "SYN:" << TD::Convert(lhs); + os << "SSYN:" << TD::Convert(lhs); //os2 << "SYN:" << TD::Convert(lhs) << '_' << SpanSizeTransform(j - i); //fid_cat = FD::Convert(os2.str()); os << ':'; @@ -118,21 +134,28 @@ struct SourceSyntaxFeaturesImpl { } fid_ef = FD::Convert(os.str()); } - //if (fid_cat > 0) - // feats->set_value(fid_cat, 1.0); - if (fid_ef > 0) - feats->set_value(fid_ef, 1.0); + if (fid_ef > 0) { + if (feature_filter.size()>0) { + if (feature_filter.find(fid_ef) != feature_filter.end()) { + feats->set_value(fid_ef, 1.0); + } + } else { + feats->set_value(fid_ef, 1.0); + } + } + cerr << FD::Convert(fid_ef) << endl; return lhs; } - Array2D<WordID> src_tree; // src_tree(i,j) NT = type - // mutable Array2D<int> fids_cat; // this tends to overfit baddly - mutable Array2D<map<const TRule*, int> > fids_ef; // fires for fully lexicalized + Array2D<WordID> src_tree; // src_tree(i,j) NT = type + // mutable Array2D<int> fids_cat; // this tends to overfit baddly + mutable Array2D<map<const TRule*, int> > fids_ef; // fires for fully lexicalized + unordered_set<int> feature_filter; }; SourceSyntaxFeatures::SourceSyntaxFeatures(const string& param) : FeatureFunction(sizeof(WordID)) { - impl = new SourceSyntaxFeaturesImpl; + impl = new SourceSyntaxFeaturesImpl(param); } SourceSyntaxFeatures::~SourceSyntaxFeatures() { @@ -155,7 +178,10 @@ void SourceSyntaxFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta, } void SourceSyntaxFeatures::PrepareForInput(const SentenceMetadata& smeta) { - impl->InitializeGrids(smeta.GetSGMLValue("src_tree"), smeta.GetSourceLength()); + ReadFile f = ReadFile(smeta.GetSGMLValue("src_tree")); + string tree; + f.ReadAll(tree); + impl->InitializeGrids(tree, smeta.GetSourceLength()); } struct SourceSpanSizeFeaturesImpl { @@ -230,4 +256,3 @@ void SourceSpanSizeFeatures::PrepareForInput(const SentenceMetadata& smeta) { impl->InitializeGrids(smeta.GetSourceLength()); } - |