From 5a23ee2ae792b629e0f52b9c7fdf293de60a0ca1 Mon Sep 17 00:00:00 2001
From: Patrick Simianer
Date: Tue, 5 Nov 2013 16:29:03 +0100
Subject: cleaning up syntax features
---
decoder/Makefile.am | 5 +-
decoder/cdec_ff.cc | 25 +---
decoder/ff_parse_match.cc | 4 -
decoder/ff_soft_syntax.cc | 34 +++---
decoder/ff_soft_syntax.h | 16 +--
decoder/ff_soft_syntax2.cc | 234 ------------------------------------
decoder/ff_soft_syntax2.h | 27 -----
decoder/ff_soft_syntax_mindist.cc | 235 ++++++++++++++++++++++++++++++++++++
decoder/ff_soft_syntax_mindist.h | 27 +++++
decoder/ff_source_syntax.cc | 37 ++++--
decoder/ff_source_syntax.h | 10 +-
decoder/ff_source_syntax2.cc | 25 ++--
decoder/ff_source_syntax2.h | 5 +-
decoder/ff_source_syntax2_p.cc | 166 --------------------------
decoder/ff_source_syntax2_p.h | 25 ----
decoder/ff_source_syntax_p.cc | 245 --------------------------------------
decoder/ff_source_syntax_p.h | 42 -------
17 files changed, 342 insertions(+), 820 deletions(-)
delete mode 100644 decoder/ff_soft_syntax2.cc
delete mode 100644 decoder/ff_soft_syntax2.h
create mode 100644 decoder/ff_soft_syntax_mindist.cc
create mode 100644 decoder/ff_soft_syntax_mindist.h
delete mode 100644 decoder/ff_source_syntax2_p.cc
delete mode 100644 decoder/ff_source_syntax2_p.h
delete mode 100644 decoder/ff_source_syntax_p.cc
delete mode 100644 decoder/ff_source_syntax_p.h
diff --git a/decoder/Makefile.am b/decoder/Makefile.am
index 914faaea..e7ebe840 100644
--- a/decoder/Makefile.am
+++ b/decoder/Makefile.am
@@ -62,7 +62,6 @@ libcdec_a_SOURCES = \
ff_ruleshape.h \
ff_sample_fsa.h \
ff_source_path.h \
- ff_source_syntax.h \
ff_spans.h \
ff_tagger.h \
ff_wordalign.h \
@@ -145,11 +144,9 @@ libcdec_a_SOURCES = \
ff_source_path.cc \
ff_parse_match.cc \
ff_soft_syntax.cc \
- ff_soft_syntax2.cc \
+ ff_soft_syntax_mindist.cc \
ff_source_syntax.cc \
- ff_source_syntax_p.cc \
ff_source_syntax2.cc \
- ff_source_syntax2_p.cc \
ff_bleu.cc \
ff_factory.cc \
incremental.cc \
diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc
index e7b31f50..a36a0f5f 100644
--- a/decoder/cdec_ff.cc
+++ b/decoder/cdec_ff.cc
@@ -15,17 +15,11 @@
#include "ff_ruleshape.h"
#include "ff_bleu.h"
#include "ff_soft_syntax.h"
-#include "ff_soft_syntax2.h"
+#include "ff_soft_syntax_mindist.h"
#include "ff_source_path.h"
-
-
#include "ff_parse_match.h"
#include "ff_source_syntax.h"
-#include "ff_source_syntax_p.h"
#include "ff_source_syntax2.h"
-#include "ff_source_syntax2_p.h"
-
-
#include "ff_register.h"
#include "ff_charset.h"
#include "ff_wordset.h"
@@ -58,23 +52,12 @@ void register_feature_functions() {
ff_registry.Register("NgramFeatures", new FFFactory());
ff_registry.Register("RuleContextFeatures", new FFFactory());
ff_registry.Register("RuleIdentityFeatures", new FFFactory());
-
-
ff_registry.Register("ParseMatchFeatures", new FFFactory);
-
- ff_registry.Register("SoftSyntacticFeatures", new FFFactory);
- ff_registry.Register("SoftSyntacticFeatures2", new FFFactory);
-
+ ff_registry.Register("SoftSyntaxFeatures", new FFFactory);
+ ff_registry.Register("SoftSyntaxFeaturesMindist", new FFFactory);
ff_registry.Register("SourceSyntaxFeatures", new FFFactory);
- ff_registry.Register("SourceSyntaxFeatures2", new FFFactory);
-
ff_registry.Register("SourceSpanSizeFeatures", new FFFactory);
-
- //ff_registry.Register("PSourceSyntaxFeatures", new FFFactory);
- //ff_registry.Register("PSourceSpanSizeFeatures", new FFFactory);
- //ff_registry.Register("PSourceSyntaxFeatures2", new FFFactory);
-
-
+ ff_registry.Register("SourceSyntaxFeatures2", new FFFactory);
ff_registry.Register("CMR2008ReorderingFeatures", new FFFactory());
ff_registry.Register("RuleSourceBigramFeatures", new FFFactory());
ff_registry.Register("RuleTargetBigramFeatures", new FFFactory());
diff --git a/decoder/ff_parse_match.cc b/decoder/ff_parse_match.cc
index 94634b27..7c79302b 100644
--- a/decoder/ff_parse_match.cc
+++ b/decoder/ff_parse_match.cc
@@ -13,10 +13,6 @@ using namespace std;
// implements the parse match features as described in Vilar et al. (2008)
// source trees must be represented in Penn Treebank format, e.g.
// (S (NP John) (VP (V left)))
-//
-// Annotate source sentences with ..."
-// Note: You need to escape quite a lot of stuff in all your models!
-//
struct ParseMatchFeaturesImpl {
ParseMatchFeaturesImpl(const string& param) {
diff --git a/decoder/ff_soft_syntax.cc b/decoder/ff_soft_syntax.cc
index d84f2e6d..a3d26135 100644
--- a/decoder/ff_soft_syntax.cc
+++ b/decoder/ff_soft_syntax.cc
@@ -13,16 +13,15 @@
using namespace std;
-// Implements the soft syntactic features described in
+// Implements the soft syntactic features described in
// Marton and Resnik (2008): "Soft Syntacitc Constraints for Hierarchical Phrase-Based Translation".
// Source trees must be represented in Penn Treebank format,
// e.g. (S (NP John) (VP (V left))).
-struct SoftSyntacticFeaturesImpl {
- SoftSyntacticFeaturesImpl(const string& param) {
+struct SoftSyntaxFeaturesImpl {
+ SoftSyntaxFeaturesImpl(const string& param) {
vector labels = SplitOnWhitespace(param);
- //for (unsigned int i = 0; i < labels.size(); i++)
- //cerr << "Labels: " << labels.at(i) << endl;
+ //for (unsigned int i = 0; i < labels.size(); i++) { cerr << "Labels: " << labels.at(i) << endl; }
for (unsigned int i = 0; i < labels.size(); i++) {
string label = labels.at(i);
pair feat_label;
@@ -34,10 +33,8 @@ struct SoftSyntacticFeaturesImpl {
void InitializeGrids(const string& tree, unsigned src_len) {
assert(tree.size() > 0);
- //fids_cat.clear();
fids_ef.clear();
src_tree.clear();
- //fids_cat.resize(src_len, src_len + 1);
fids_ef.resize(src_len, src_len + 1);
src_tree.resize(src_len, src_len + 1, TD::Convert("XX"));
ParseTreeString(tree, src_len);
@@ -99,7 +96,7 @@ struct SoftSyntacticFeaturesImpl {
const WordID lhs = src_tree(i,j);
string lhs_str = TD::Convert(lhs);
//cerr << "LHS: " << lhs_str << " from " << i << " to " << j << endl;
- //cerr << "RULE :"<< rule << endl;
+ //cerr << "RULE :"<< rule << endl;
int& fid_ef = fids_ef(i,j)[&rule];
for (unsigned int i = 0; i < feat_labels.size(); i++) {
ostringstream os;
@@ -126,7 +123,7 @@ struct SoftSyntacticFeaturesImpl {
fid_ef = FD::Convert(os.str());
if (lhs_str.compare(label) == 0) {
if (fid_ef > 0) {
- //cerr << "Feature: " << os.str() << endl;
+ //cerr << "Feature: " << os.str() << endl;
feats->set_value(fid_ef, 1.0);
}
}
@@ -147,8 +144,8 @@ struct SoftSyntacticFeaturesImpl {
}
}
break;
- case '-':
- //cerr << "-" << endl;
+ case '-':
+ //cerr << "-" << endl;
if (lhs_str.compare(label) != 0) {
os << "SYN:" << label << "_cross";
fid_ef = FD::Convert(os.str());
@@ -167,22 +164,22 @@ struct SoftSyntacticFeaturesImpl {
return lhs;
}
- Array2D src_tree; // src_tree(i,j) NT = type
- mutable Array2D