From decd2c4b1d4fb42a73a3217f347ea8f317e50869 Mon Sep 17 00:00:00 2001 From: Patrick Simianer
Date: Tue, 5 Nov 2013 18:15:18 +0100
Subject: syntax features now read trees from files -- no more escaping!
---
decoder/ff_parse_match.cc | 5 ++++-
decoder/ff_soft_syntax.cc | 15 +++++++++------
decoder/ff_soft_syntax_mindist.cc | 15 +++++++++------
decoder/ff_source_syntax.cc | 7 +++++--
decoder/ff_source_syntax2.cc | 7 +++++--
utils/filelib.h | 5 ++++-
6 files changed, 36 insertions(+), 18 deletions(-)
diff --git a/decoder/ff_parse_match.cc b/decoder/ff_parse_match.cc
index 7c79302b..58026975 100644
--- a/decoder/ff_parse_match.cc
+++ b/decoder/ff_parse_match.cc
@@ -212,6 +212,9 @@ void ParseMatchFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,
}
void ParseMatchFeatures::PrepareForInput(const SentenceMetadata& smeta) {
- impl->InitializeGrids(smeta.GetSGMLValue("src_tree"), smeta.GetSourceLength());
+ ReadFile f = ReadFile(smeta.GetSGMLValue("src_tree"));
+ string tree;
+ f.ReadAll(tree);
+ impl->InitializeGrids(tree, smeta.GetSourceLength());
}
diff --git a/decoder/ff_soft_syntax.cc b/decoder/ff_soft_syntax.cc
index a3d26135..23fe87bd 100644
--- a/decoder/ff_soft_syntax.cc
+++ b/decoder/ff_soft_syntax.cc
@@ -107,10 +107,10 @@ struct SoftSyntaxFeaturesImpl {
switch(feat_type) {
case '2':
if (lhs_str.compare(label) == 0) {
- os << "SYN:" << label << "_conform";
+ os << "SOFT:" << label << "_conform";
}
else {
- os << "SYN:" << label << "_cross";
+ os << "SOFT:" << label << "_cross";
}
fid_ef = FD::Convert(os.str());
if (fid_ef > 0) {
@@ -119,7 +119,7 @@ struct SoftSyntaxFeaturesImpl {
}
break;
case '_':
- os << "SYN:" << label;
+ os << "SOFT:" << label;
fid_ef = FD::Convert(os.str());
if (lhs_str.compare(label) == 0) {
if (fid_ef > 0) {
@@ -136,7 +136,7 @@ struct SoftSyntaxFeaturesImpl {
break;
case '+':
if (lhs_str.compare(label) == 0) {
- os << "SYN:" << label << "_conform";
+ os << "SOFT:" << label << "_conform";
fid_ef = FD::Convert(os.str());
if (fid_ef > 0) {
//cerr << "Feature: " << os.str() << endl;
@@ -147,7 +147,7 @@ struct SoftSyntaxFeaturesImpl {
case '-':
//cerr << "-" << endl;
if (lhs_str.compare(label) != 0) {
- os << "SYN:" << label << "_cross";
+ os << "SOFT:" << label << "_cross";
fid_ef = FD::Convert(os.str());
if (fid_ef > 0) {
//cerr << "Feature :" << os.str() << endl;
@@ -194,6 +194,9 @@ void SoftSyntaxFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,
}
void SoftSyntaxFeatures::PrepareForInput(const SentenceMetadata& smeta) {
- impl->InitializeGrids(smeta.GetSGMLValue("src_tree"), smeta.GetSourceLength());
+ ReadFile f = ReadFile(smeta.GetSGMLValue("src_tree"));
+ string tree;
+ f.ReadAll(tree);
+ impl->InitializeGrids(tree, smeta.GetSourceLength());
}
diff --git a/decoder/ff_soft_syntax_mindist.cc b/decoder/ff_soft_syntax_mindist.cc
index 3f531986..a23f70f8 100644
--- a/decoder/ff_soft_syntax_mindist.cc
+++ b/decoder/ff_soft_syntax_mindist.cc
@@ -146,10 +146,10 @@ struct SoftSyntaxFeaturesMindistImpl {
case '2':
if (min_dist_label.compare(label) == 0) {
if (min_dist == 0) {
- os << "SYN:" << label << "_conform";
+ os << "SOFTM:" << label << "_conform";
}
else {
- os << "SYN:" << label << "_cross";
+ os << "SOFTM:" << label << "_cross";
}
fid_ef = FD::Convert(os.str());
//cerr << "Feature :" << os.str() << endl;
@@ -157,7 +157,7 @@ struct SoftSyntaxFeaturesMindistImpl {
}
break;
case '_':
- os << "SYN:" << label;
+ os << "SOFTM:" << label;
fid_ef = FD::Convert(os.str());
if (min_dist_label.compare(label) == 0) {
//cerr << "Feature: " << os.str() << endl;
@@ -172,7 +172,7 @@ struct SoftSyntaxFeaturesMindistImpl {
break;
case '+':
if (min_dist_label.compare(label) == 0) {
- os << "SYN:" << label << "_conform";
+ os << "SOFTM:" << label << "_conform";
fid_ef = FD::Convert(os.str());
if (min_dist == 0) {
//cerr << "Feature: " << os.str() << endl;
@@ -183,7 +183,7 @@ struct SoftSyntaxFeaturesMindistImpl {
case '-':
//cerr << "-" << endl;
if (min_dist_label.compare(label) != 0) {
- os << "SYN:" << label << "_cross";
+ os << "SOFTM:" << label << "_cross";
fid_ef = FD::Convert(os.str());
if (min_dist > 0) {
//cerr << "Feature :" << os.str() << endl;
@@ -230,6 +230,9 @@ void SoftSyntaxFeaturesMindist::TraversalFeaturesImpl(const SentenceMetadata& sm
}
void SoftSyntaxFeaturesMindist::PrepareForInput(const SentenceMetadata& smeta) {
- impl->InitializeGrids(smeta.GetSGMLValue("src_tree"), smeta.GetSourceLength());
+ ReadFile f = ReadFile(smeta.GetSGMLValue("src_tree"));
+ string tree;
+ f.ReadAll(tree);
+ impl->InitializeGrids(tree, smeta.GetSourceLength());
}
diff --git a/decoder/ff_source_syntax.cc b/decoder/ff_source_syntax.cc
index 34e7ab69..4879ca1d 100644
--- a/decoder/ff_source_syntax.cc
+++ b/decoder/ff_source_syntax.cc
@@ -104,7 +104,7 @@ struct SourceSyntaxFeaturesImpl {
if (fid_ef <= 0) {
ostringstream os;
//ostringstream os2;
- os << "SYN:" << TD::Convert(lhs);
+ os << "SSYN:" << TD::Convert(lhs);
//os2 << "SYN:" << TD::Convert(lhs) << '_' << SpanSizeTransform(j - i);
//fid_cat = FD::Convert(os2.str());
os << ':';
@@ -173,7 +173,10 @@ void SourceSyntaxFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,
}
void SourceSyntaxFeatures::PrepareForInput(const SentenceMetadata& smeta) {
- impl->InitializeGrids(smeta.GetSGMLValue("src_tree"), smeta.GetSourceLength());
+ ReadFile f = ReadFile(smeta.GetSGMLValue("src_tree"));
+ string tree;
+ f.ReadAll(tree);
+ impl->InitializeGrids(tree, smeta.GetSourceLength());
}
struct SourceSpanSizeFeaturesImpl {
diff --git a/decoder/ff_source_syntax2.cc b/decoder/ff_source_syntax2.cc
index 63736342..9d0bc33f 100644
--- a/decoder/ff_source_syntax2.cc
+++ b/decoder/ff_source_syntax2.cc
@@ -90,7 +90,7 @@ struct SourceSyntaxFeatures2Impl {
const WordID lhs = src_tree(i,j);
int& fid_ef = fids_ef(i,j)[&rule];
ostringstream os;
- os << "SYN:" << TD::Convert(lhs);
+ os << "SSYN2:" << TD::Convert(lhs);
os << ':';
unsigned ntc = 0;
for (unsigned k = 0; k < rule.f_.size(); ++k) {
@@ -159,6 +159,9 @@ void SourceSyntaxFeatures2::TraversalFeaturesImpl(const SentenceMetadata& smeta,
}
void SourceSyntaxFeatures2::PrepareForInput(const SentenceMetadata& smeta) {
- impl->InitializeGrids(smeta.GetSGMLValue("src_tree"), smeta.GetSourceLength());
+ ReadFile f = ReadFile(smeta.GetSGMLValue("src_tree"));
+ string tree;
+ f.ReadAll(tree);
+ impl->InitializeGrids(tree, smeta.GetSourceLength());
}
diff --git a/utils/filelib.h b/utils/filelib.h
index b9ea3940..4fa69760 100644
--- a/utils/filelib.h
+++ b/utils/filelib.h
@@ -75,7 +75,10 @@ class ReadFile : public BaseFile