From decd2c4b1d4fb42a73a3217f347ea8f317e50869 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Tue, 5 Nov 2013 18:15:18 +0100 Subject: syntax features now read trees from files -- no more escaping! --- decoder/ff_parse_match.cc | 5 ++++- decoder/ff_soft_syntax.cc | 15 +++++++++------ decoder/ff_soft_syntax_mindist.cc | 15 +++++++++------ decoder/ff_source_syntax.cc | 7 +++++-- decoder/ff_source_syntax2.cc | 7 +++++-- utils/filelib.h | 5 ++++- 6 files changed, 36 insertions(+), 18 deletions(-) diff --git a/decoder/ff_parse_match.cc b/decoder/ff_parse_match.cc index 7c79302b..58026975 100644 --- a/decoder/ff_parse_match.cc +++ b/decoder/ff_parse_match.cc @@ -212,6 +212,9 @@ void ParseMatchFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta, } void ParseMatchFeatures::PrepareForInput(const SentenceMetadata& smeta) { - impl->InitializeGrids(smeta.GetSGMLValue("src_tree"), smeta.GetSourceLength()); + ReadFile f = ReadFile(smeta.GetSGMLValue("src_tree")); + string tree; + f.ReadAll(tree); + impl->InitializeGrids(tree, smeta.GetSourceLength()); } diff --git a/decoder/ff_soft_syntax.cc b/decoder/ff_soft_syntax.cc index a3d26135..23fe87bd 100644 --- a/decoder/ff_soft_syntax.cc +++ b/decoder/ff_soft_syntax.cc @@ -107,10 +107,10 @@ struct SoftSyntaxFeaturesImpl { switch(feat_type) { case '2': if (lhs_str.compare(label) == 0) { - os << "SYN:" << label << "_conform"; + os << "SOFT:" << label << "_conform"; } else { - os << "SYN:" << label << "_cross"; + os << "SOFT:" << label << "_cross"; } fid_ef = FD::Convert(os.str()); if (fid_ef > 0) { @@ -119,7 +119,7 @@ struct SoftSyntaxFeaturesImpl { } break; case '_': - os << "SYN:" << label; + os << "SOFT:" << label; fid_ef = FD::Convert(os.str()); if (lhs_str.compare(label) == 0) { if (fid_ef > 0) { @@ -136,7 +136,7 @@ struct SoftSyntaxFeaturesImpl { break; case '+': if (lhs_str.compare(label) == 0) { - os << "SYN:" << label << "_conform"; + os << "SOFT:" << label << "_conform"; fid_ef = FD::Convert(os.str()); if (fid_ef > 0) { //cerr << "Feature: " << os.str() << endl; @@ -147,7 +147,7 @@ struct SoftSyntaxFeaturesImpl { case '-': //cerr << "-" << endl; if (lhs_str.compare(label) != 0) { - os << "SYN:" << label << "_cross"; + os << "SOFT:" << label << "_cross"; fid_ef = FD::Convert(os.str()); if (fid_ef > 0) { //cerr << "Feature :" << os.str() << endl; @@ -194,6 +194,9 @@ void SoftSyntaxFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta, } void SoftSyntaxFeatures::PrepareForInput(const SentenceMetadata& smeta) { - impl->InitializeGrids(smeta.GetSGMLValue("src_tree"), smeta.GetSourceLength()); + ReadFile f = ReadFile(smeta.GetSGMLValue("src_tree")); + string tree; + f.ReadAll(tree); + impl->InitializeGrids(tree, smeta.GetSourceLength()); } diff --git a/decoder/ff_soft_syntax_mindist.cc b/decoder/ff_soft_syntax_mindist.cc index 3f531986..a23f70f8 100644 --- a/decoder/ff_soft_syntax_mindist.cc +++ b/decoder/ff_soft_syntax_mindist.cc @@ -146,10 +146,10 @@ struct SoftSyntaxFeaturesMindistImpl { case '2': if (min_dist_label.compare(label) == 0) { if (min_dist == 0) { - os << "SYN:" << label << "_conform"; + os << "SOFTM:" << label << "_conform"; } else { - os << "SYN:" << label << "_cross"; + os << "SOFTM:" << label << "_cross"; } fid_ef = FD::Convert(os.str()); //cerr << "Feature :" << os.str() << endl; @@ -157,7 +157,7 @@ struct SoftSyntaxFeaturesMindistImpl { } break; case '_': - os << "SYN:" << label; + os << "SOFTM:" << label; fid_ef = FD::Convert(os.str()); if (min_dist_label.compare(label) == 0) { //cerr << "Feature: " << os.str() << endl; @@ -172,7 +172,7 @@ struct SoftSyntaxFeaturesMindistImpl { break; case '+': if (min_dist_label.compare(label) == 0) { - os << "SYN:" << label << "_conform"; + os << "SOFTM:" << label << "_conform"; fid_ef = FD::Convert(os.str()); if (min_dist == 0) { //cerr << "Feature: " << os.str() << endl; @@ -183,7 +183,7 @@ struct SoftSyntaxFeaturesMindistImpl { case '-': //cerr << "-" << endl; if (min_dist_label.compare(label) != 0) { - os << "SYN:" << label << "_cross"; + os << "SOFTM:" << label << "_cross"; fid_ef = FD::Convert(os.str()); if (min_dist > 0) { //cerr << "Feature :" << os.str() << endl; @@ -230,6 +230,9 @@ void SoftSyntaxFeaturesMindist::TraversalFeaturesImpl(const SentenceMetadata& sm } void SoftSyntaxFeaturesMindist::PrepareForInput(const SentenceMetadata& smeta) { - impl->InitializeGrids(smeta.GetSGMLValue("src_tree"), smeta.GetSourceLength()); + ReadFile f = ReadFile(smeta.GetSGMLValue("src_tree")); + string tree; + f.ReadAll(tree); + impl->InitializeGrids(tree, smeta.GetSourceLength()); } diff --git a/decoder/ff_source_syntax.cc b/decoder/ff_source_syntax.cc index 34e7ab69..4879ca1d 100644 --- a/decoder/ff_source_syntax.cc +++ b/decoder/ff_source_syntax.cc @@ -104,7 +104,7 @@ struct SourceSyntaxFeaturesImpl { if (fid_ef <= 0) { ostringstream os; //ostringstream os2; - os << "SYN:" << TD::Convert(lhs); + os << "SSYN:" << TD::Convert(lhs); //os2 << "SYN:" << TD::Convert(lhs) << '_' << SpanSizeTransform(j - i); //fid_cat = FD::Convert(os2.str()); os << ':'; @@ -173,7 +173,10 @@ void SourceSyntaxFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta, } void SourceSyntaxFeatures::PrepareForInput(const SentenceMetadata& smeta) { - impl->InitializeGrids(smeta.GetSGMLValue("src_tree"), smeta.GetSourceLength()); + ReadFile f = ReadFile(smeta.GetSGMLValue("src_tree")); + string tree; + f.ReadAll(tree); + impl->InitializeGrids(tree, smeta.GetSourceLength()); } struct SourceSpanSizeFeaturesImpl { diff --git a/decoder/ff_source_syntax2.cc b/decoder/ff_source_syntax2.cc index 63736342..9d0bc33f 100644 --- a/decoder/ff_source_syntax2.cc +++ b/decoder/ff_source_syntax2.cc @@ -90,7 +90,7 @@ struct SourceSyntaxFeatures2Impl { const WordID lhs = src_tree(i,j); int& fid_ef = fids_ef(i,j)[&rule]; ostringstream os; - os << "SYN:" << TD::Convert(lhs); + os << "SSYN2:" << TD::Convert(lhs); os << ':'; unsigned ntc = 0; for (unsigned k = 0; k < rule.f_.size(); ++k) { @@ -159,6 +159,9 @@ void SourceSyntaxFeatures2::TraversalFeaturesImpl(const SentenceMetadata& smeta, } void SourceSyntaxFeatures2::PrepareForInput(const SentenceMetadata& smeta) { - impl->InitializeGrids(smeta.GetSGMLValue("src_tree"), smeta.GetSourceLength()); + ReadFile f = ReadFile(smeta.GetSGMLValue("src_tree")); + string tree; + f.ReadAll(tree); + impl->InitializeGrids(tree, smeta.GetSourceLength()); } diff --git a/utils/filelib.h b/utils/filelib.h index b9ea3940..4fa69760 100644 --- a/utils/filelib.h +++ b/utils/filelib.h @@ -75,7 +75,10 @@ class ReadFile : public BaseFile { } } } - + void ReadAll(std::string& s) { + getline(*stream(), s, (char) EOF); + if (s.size() > 0) s.resize(s.size()-1); + } }; class WriteFile : public BaseFile { -- cgit v1.2.3