From 7cdaf68c0189b9ad0e4ba4482e4c75d7643ba982 Mon Sep 17 00:00:00 2001
From: Patrick Simianer
Date: Tue, 29 Jul 2014 15:02:22 +0200
Subject: make PassThrough1..N optional
---
decoder/scfg_translator.cc | 25 ++++++++++++++++---------
1 file changed, 16 insertions(+), 9 deletions(-)
(limited to 'decoder/scfg_translator.cc')
diff --git a/decoder/scfg_translator.cc b/decoder/scfg_translator.cc
index 88f62769..57ad85e2 100644
--- a/decoder/scfg_translator.cc
+++ b/decoder/scfg_translator.cc
@@ -28,7 +28,7 @@ struct GlueGrammar : public TextGrammar {
};
struct PassThroughGrammar : public TextGrammar {
- PassThroughGrammar(const Lattice& input, const std::string& cat, const unsigned int ctf_level=0);
+ PassThroughGrammar(const Lattice& input, const std::string& cat, const unsigned int ctf_level=0, const unsigned int num_pt_features=0);
virtual bool HasRuleForSpan(int i, int j, int distance) const;
};
@@ -56,7 +56,7 @@ bool GlueGrammar::HasRuleForSpan(int i, int /* j */, int /* distance */) const {
return (i == 0);
}
-PassThroughGrammar::PassThroughGrammar(const Lattice& input, const string& cat, const unsigned int ctf_level) {
+PassThroughGrammar::PassThroughGrammar(const Lattice& input, const string& cat, const unsigned int ctf_level, const unsigned num_pt_features) {
unordered_set ss;
for (int i = 0; i < input.size(); ++i) {
const vector& alts = input[i];
@@ -64,12 +64,17 @@ PassThroughGrammar::PassThroughGrammar(const Lattice& input, const string& cat,
const int j = alts[k].dist2next + i;
const string& src = TD::Convert(alts[k].label);
if (ss.count(alts[k].label) == 0) {
- int length = static_cast(log(UTF8StringLen(src)) / log(1.6)) + 1;
- if (length > 6) length = 6;
- string len_feat = "PassThrough_0=1";
- len_feat[12] += length;
- TRulePtr pt(new TRule("[" + cat + "] ||| " + src + " ||| " + src + " ||| PassThrough=1 " + len_feat));
- pt->a_.push_back(AlignmentPoint(0,0));
+ TRulePtr pt;
+ if (num_pt_features > 0) {
+ int length = static_cast(log(UTF8StringLen(src)) / log(1.6)) + 1;
+ if (length > num_pt_features) length = num_pt_features;
+ string len_feat = "PassThrough_0=1";
+ len_feat[12] += length;
+ TRulePtr pt(new TRule("[" + cat + "] ||| " + src + " ||| " + src + " ||| PassThrough=1 " + len_feat));
+ } else {
+ TRulePtr pt(new TRule("[" + cat + "] ||| " + src + " ||| " + src + " ||| PassThrough=1 "));
+ pt->a_.push_back(AlignmentPoint(0,0));
+ }
AddRule(pt);
RefineRule(pt, ctf_level);
ss.insert(alts[k].label);
@@ -86,6 +91,7 @@ struct SCFGTranslatorImpl {
SCFGTranslatorImpl(const boost::program_options::variables_map& conf) :
max_span_limit(conf["scfg_max_span_limit"].as()),
add_pass_through_rules(conf.count("add_pass_through_rules")),
+ num_pt_features(conf["add_pass_through_features"].as()),
goal(conf["goal"].as()),
default_nt(conf["scfg_default_nt"].as()),
use_ctf_(conf.count("coarse_to_fine_beam_prune"))
@@ -140,6 +146,7 @@ struct SCFGTranslatorImpl {
const int max_span_limit;
const bool add_pass_through_rules;
+ const unsigned int num_pt_features;
const string goal;
const string default_nt;
const bool use_ctf_;
@@ -187,7 +194,7 @@ struct SCFGTranslatorImpl {
smeta->SetSourceLength(lattice.size());
if (add_pass_through_rules){
if (!SILENT) cerr << "Adding pass through grammar" << endl;
- PassThroughGrammar* g = new PassThroughGrammar(lattice, default_nt, ctf_iterations_);
+ PassThroughGrammar* g = new PassThroughGrammar(lattice, default_nt, ctf_iterations_, num_pt_features);
g->SetGrammarName("PassThrough");
glist.push_back(GrammarPtr(g));
}
--
cgit v1.2.3
From 014714dfd7d19a9d84bfccf48f71502ba0e7024d Mon Sep 17 00:00:00 2001
From: Patrick Simianer
Date: Sat, 2 Aug 2014 14:11:14 +0200
Subject: fix
---
decoder/decoder.cc | 2 +-
decoder/scfg_translator.cc | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
(limited to 'decoder/scfg_translator.cc')
diff --git a/decoder/decoder.cc b/decoder/decoder.cc
index 2c044192..081da8d6 100644
--- a/decoder/decoder.cc
+++ b/decoder/decoder.cc
@@ -366,7 +366,7 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream
("beam_prune3", po::value(), "Optional pass 3")
("add_pass_through_rules,P","Add rules to translate OOV words as themselves")
- ("add_extra_pass_through_features,Q",po::value()->default_value(6), "Add PassThrough{1..N} features, capped at N.")
+ ("add_extra_pass_through_features,Q", po::value()->default_value(6), "Add PassThrough{1..N} features, capped at N.")
("k_best,k",po::value(),"Extract the k best derivations")
("unique_k_best,r", "Unique k-best translation list")
("aligner,a", "Run as a word/phrase aligner (src & ref required)")
diff --git a/decoder/scfg_translator.cc b/decoder/scfg_translator.cc
index 57ad85e2..0c3e4cf7 100644
--- a/decoder/scfg_translator.cc
+++ b/decoder/scfg_translator.cc
@@ -91,7 +91,7 @@ struct SCFGTranslatorImpl {
SCFGTranslatorImpl(const boost::program_options::variables_map& conf) :
max_span_limit(conf["scfg_max_span_limit"].as()),
add_pass_through_rules(conf.count("add_pass_through_rules")),
- num_pt_features(conf["add_pass_through_features"].as()),
+ num_pt_features(conf["add_extra_pass_through_features"].as()),
goal(conf["goal"].as()),
default_nt(conf["scfg_default_nt"].as()),
use_ctf_(conf.count("coarse_to_fine_beam_prune"))
--
cgit v1.2.3
From f33ab2ff3f1cf135ffb80721e1f4d71d124bc8f9 Mon Sep 17 00:00:00 2001
From: Patrick Simianer
Date: Sat, 2 Aug 2014 14:34:57 +0200
Subject: finally
---
decoder/decoder.cc | 2 +-
decoder/scfg_translator.cc | 8 +++++---
training/dtrain/examples/toy/cdec.ini | 1 +
training/dtrain/examples/toy/dtrain.ini | 2 +-
4 files changed, 8 insertions(+), 5 deletions(-)
(limited to 'decoder/scfg_translator.cc')
diff --git a/decoder/decoder.cc b/decoder/decoder.cc
index 081da8d6..c384c33f 100644
--- a/decoder/decoder.cc
+++ b/decoder/decoder.cc
@@ -366,7 +366,7 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream
("beam_prune3", po::value(), "Optional pass 3")
("add_pass_through_rules,P","Add rules to translate OOV words as themselves")
- ("add_extra_pass_through_features,Q", po::value()->default_value(6), "Add PassThrough{1..N} features, capped at N.")
+ ("add_extra_pass_through_features,Q", po::value()->default_value(0), "Add PassThrough{1..N} features, capped at N.")
("k_best,k",po::value(),"Extract the k best derivations")
("unique_k_best,r", "Unique k-best translation list")
("aligner,a", "Run as a word/phrase aligner (src & ref required)")
diff --git a/decoder/scfg_translator.cc b/decoder/scfg_translator.cc
index 0c3e4cf7..c3cfcaad 100644
--- a/decoder/scfg_translator.cc
+++ b/decoder/scfg_translator.cc
@@ -64,19 +64,21 @@ PassThroughGrammar::PassThroughGrammar(const Lattice& input, const string& cat,
const int j = alts[k].dist2next + i;
const string& src = TD::Convert(alts[k].label);
if (ss.count(alts[k].label) == 0) {
- TRulePtr pt;
if (num_pt_features > 0) {
int length = static_cast(log(UTF8StringLen(src)) / log(1.6)) + 1;
if (length > num_pt_features) length = num_pt_features;
string len_feat = "PassThrough_0=1";
len_feat[12] += length;
TRulePtr pt(new TRule("[" + cat + "] ||| " + src + " ||| " + src + " ||| PassThrough=1 " + len_feat));
+ pt->a_.push_back(AlignmentPoint(0,0));
+ AddRule(pt);
+ RefineRule(pt, ctf_level);
} else {
TRulePtr pt(new TRule("[" + cat + "] ||| " + src + " ||| " + src + " ||| PassThrough=1 "));
pt->a_.push_back(AlignmentPoint(0,0));
+ AddRule(pt);
+ RefineRule(pt, ctf_level);
}
- AddRule(pt);
- RefineRule(pt, ctf_level);
ss.insert(alts[k].label);
}
}
diff --git a/training/dtrain/examples/toy/cdec.ini b/training/dtrain/examples/toy/cdec.ini
index b14f4819..e6c19abe 100644
--- a/training/dtrain/examples/toy/cdec.ini
+++ b/training/dtrain/examples/toy/cdec.ini
@@ -1,3 +1,4 @@
formalism=scfg
add_pass_through_rules=true
grammar=grammar.gz
+#add_extra_pass_through_features=6
diff --git a/training/dtrain/examples/toy/dtrain.ini b/training/dtrain/examples/toy/dtrain.ini
index cd715f26..ef956df7 100644
--- a/training/dtrain/examples/toy/dtrain.ini
+++ b/training/dtrain/examples/toy/dtrain.ini
@@ -2,7 +2,7 @@ decoder_config=cdec.ini
input=src
refs=tgt
output=-
-print_weights=logp shell_rule house_rule small_rule little_rule PassThrough
+print_weights=logp shell_rule house_rule small_rule little_rule PassThrough PassThrough_1 PassThrough_2 PassThrough_3 PassThrough_4 PassThrough_5 PassThrough_6
k=4
N=4
epochs=2
--
cgit v1.2.3