From 7cdaf68c0189b9ad0e4ba4482e4c75d7643ba982 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Tue, 29 Jul 2014 15:02:22 +0200 Subject: make PassThrough1..N optional --- decoder/scfg_translator.cc | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) (limited to 'decoder/scfg_translator.cc') diff --git a/decoder/scfg_translator.cc b/decoder/scfg_translator.cc index 88f62769..57ad85e2 100644 --- a/decoder/scfg_translator.cc +++ b/decoder/scfg_translator.cc @@ -28,7 +28,7 @@ struct GlueGrammar : public TextGrammar { }; struct PassThroughGrammar : public TextGrammar { - PassThroughGrammar(const Lattice& input, const std::string& cat, const unsigned int ctf_level=0); + PassThroughGrammar(const Lattice& input, const std::string& cat, const unsigned int ctf_level=0, const unsigned int num_pt_features=0); virtual bool HasRuleForSpan(int i, int j, int distance) const; }; @@ -56,7 +56,7 @@ bool GlueGrammar::HasRuleForSpan(int i, int /* j */, int /* distance */) const { return (i == 0); } -PassThroughGrammar::PassThroughGrammar(const Lattice& input, const string& cat, const unsigned int ctf_level) { +PassThroughGrammar::PassThroughGrammar(const Lattice& input, const string& cat, const unsigned int ctf_level, const unsigned num_pt_features) { unordered_set ss; for (int i = 0; i < input.size(); ++i) { const vector& alts = input[i]; @@ -64,12 +64,17 @@ PassThroughGrammar::PassThroughGrammar(const Lattice& input, const string& cat, const int j = alts[k].dist2next + i; const string& src = TD::Convert(alts[k].label); if (ss.count(alts[k].label) == 0) { - int length = static_cast(log(UTF8StringLen(src)) / log(1.6)) + 1; - if (length > 6) length = 6; - string len_feat = "PassThrough_0=1"; - len_feat[12] += length; - TRulePtr pt(new TRule("[" + cat + "] ||| " + src + " ||| " + src + " ||| PassThrough=1 " + len_feat)); - pt->a_.push_back(AlignmentPoint(0,0)); + TRulePtr pt; + if (num_pt_features > 0) { + int length = static_cast(log(UTF8StringLen(src)) / log(1.6)) + 1; + if (length > num_pt_features) length = num_pt_features; + string len_feat = "PassThrough_0=1"; + len_feat[12] += length; + TRulePtr pt(new TRule("[" + cat + "] ||| " + src + " ||| " + src + " ||| PassThrough=1 " + len_feat)); + } else { + TRulePtr pt(new TRule("[" + cat + "] ||| " + src + " ||| " + src + " ||| PassThrough=1 ")); + pt->a_.push_back(AlignmentPoint(0,0)); + } AddRule(pt); RefineRule(pt, ctf_level); ss.insert(alts[k].label); @@ -86,6 +91,7 @@ struct SCFGTranslatorImpl { SCFGTranslatorImpl(const boost::program_options::variables_map& conf) : max_span_limit(conf["scfg_max_span_limit"].as()), add_pass_through_rules(conf.count("add_pass_through_rules")), + num_pt_features(conf["add_pass_through_features"].as()), goal(conf["goal"].as()), default_nt(conf["scfg_default_nt"].as()), use_ctf_(conf.count("coarse_to_fine_beam_prune")) @@ -140,6 +146,7 @@ struct SCFGTranslatorImpl { const int max_span_limit; const bool add_pass_through_rules; + const unsigned int num_pt_features; const string goal; const string default_nt; const bool use_ctf_; @@ -187,7 +194,7 @@ struct SCFGTranslatorImpl { smeta->SetSourceLength(lattice.size()); if (add_pass_through_rules){ if (!SILENT) cerr << "Adding pass through grammar" << endl; - PassThroughGrammar* g = new PassThroughGrammar(lattice, default_nt, ctf_iterations_); + PassThroughGrammar* g = new PassThroughGrammar(lattice, default_nt, ctf_iterations_, num_pt_features); g->SetGrammarName("PassThrough"); glist.push_back(GrammarPtr(g)); } -- cgit v1.2.3 From 014714dfd7d19a9d84bfccf48f71502ba0e7024d Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Sat, 2 Aug 2014 14:11:14 +0200 Subject: fix --- decoder/decoder.cc | 2 +- decoder/scfg_translator.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'decoder/scfg_translator.cc') diff --git a/decoder/decoder.cc b/decoder/decoder.cc index 2c044192..081da8d6 100644 --- a/decoder/decoder.cc +++ b/decoder/decoder.cc @@ -366,7 +366,7 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream ("beam_prune3", po::value(), "Optional pass 3") ("add_pass_through_rules,P","Add rules to translate OOV words as themselves") - ("add_extra_pass_through_features,Q",po::value()->default_value(6), "Add PassThrough{1..N} features, capped at N.") + ("add_extra_pass_through_features,Q", po::value()->default_value(6), "Add PassThrough{1..N} features, capped at N.") ("k_best,k",po::value(),"Extract the k best derivations") ("unique_k_best,r", "Unique k-best translation list") ("aligner,a", "Run as a word/phrase aligner (src & ref required)") diff --git a/decoder/scfg_translator.cc b/decoder/scfg_translator.cc index 57ad85e2..0c3e4cf7 100644 --- a/decoder/scfg_translator.cc +++ b/decoder/scfg_translator.cc @@ -91,7 +91,7 @@ struct SCFGTranslatorImpl { SCFGTranslatorImpl(const boost::program_options::variables_map& conf) : max_span_limit(conf["scfg_max_span_limit"].as()), add_pass_through_rules(conf.count("add_pass_through_rules")), - num_pt_features(conf["add_pass_through_features"].as()), + num_pt_features(conf["add_extra_pass_through_features"].as()), goal(conf["goal"].as()), default_nt(conf["scfg_default_nt"].as()), use_ctf_(conf.count("coarse_to_fine_beam_prune")) -- cgit v1.2.3 From f33ab2ff3f1cf135ffb80721e1f4d71d124bc8f9 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Sat, 2 Aug 2014 14:34:57 +0200 Subject: finally --- decoder/decoder.cc | 2 +- decoder/scfg_translator.cc | 8 +++++--- training/dtrain/examples/toy/cdec.ini | 1 + training/dtrain/examples/toy/dtrain.ini | 2 +- 4 files changed, 8 insertions(+), 5 deletions(-) (limited to 'decoder/scfg_translator.cc') diff --git a/decoder/decoder.cc b/decoder/decoder.cc index 081da8d6..c384c33f 100644 --- a/decoder/decoder.cc +++ b/decoder/decoder.cc @@ -366,7 +366,7 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream ("beam_prune3", po::value(), "Optional pass 3") ("add_pass_through_rules,P","Add rules to translate OOV words as themselves") - ("add_extra_pass_through_features,Q", po::value()->default_value(6), "Add PassThrough{1..N} features, capped at N.") + ("add_extra_pass_through_features,Q", po::value()->default_value(0), "Add PassThrough{1..N} features, capped at N.") ("k_best,k",po::value(),"Extract the k best derivations") ("unique_k_best,r", "Unique k-best translation list") ("aligner,a", "Run as a word/phrase aligner (src & ref required)") diff --git a/decoder/scfg_translator.cc b/decoder/scfg_translator.cc index 0c3e4cf7..c3cfcaad 100644 --- a/decoder/scfg_translator.cc +++ b/decoder/scfg_translator.cc @@ -64,19 +64,21 @@ PassThroughGrammar::PassThroughGrammar(const Lattice& input, const string& cat, const int j = alts[k].dist2next + i; const string& src = TD::Convert(alts[k].label); if (ss.count(alts[k].label) == 0) { - TRulePtr pt; if (num_pt_features > 0) { int length = static_cast(log(UTF8StringLen(src)) / log(1.6)) + 1; if (length > num_pt_features) length = num_pt_features; string len_feat = "PassThrough_0=1"; len_feat[12] += length; TRulePtr pt(new TRule("[" + cat + "] ||| " + src + " ||| " + src + " ||| PassThrough=1 " + len_feat)); + pt->a_.push_back(AlignmentPoint(0,0)); + AddRule(pt); + RefineRule(pt, ctf_level); } else { TRulePtr pt(new TRule("[" + cat + "] ||| " + src + " ||| " + src + " ||| PassThrough=1 ")); pt->a_.push_back(AlignmentPoint(0,0)); + AddRule(pt); + RefineRule(pt, ctf_level); } - AddRule(pt); - RefineRule(pt, ctf_level); ss.insert(alts[k].label); } } diff --git a/training/dtrain/examples/toy/cdec.ini b/training/dtrain/examples/toy/cdec.ini index b14f4819..e6c19abe 100644 --- a/training/dtrain/examples/toy/cdec.ini +++ b/training/dtrain/examples/toy/cdec.ini @@ -1,3 +1,4 @@ formalism=scfg add_pass_through_rules=true grammar=grammar.gz +#add_extra_pass_through_features=6 diff --git a/training/dtrain/examples/toy/dtrain.ini b/training/dtrain/examples/toy/dtrain.ini index cd715f26..ef956df7 100644 --- a/training/dtrain/examples/toy/dtrain.ini +++ b/training/dtrain/examples/toy/dtrain.ini @@ -2,7 +2,7 @@ decoder_config=cdec.ini input=src refs=tgt output=- -print_weights=logp shell_rule house_rule small_rule little_rule PassThrough +print_weights=logp shell_rule house_rule small_rule little_rule PassThrough PassThrough_1 PassThrough_2 PassThrough_3 PassThrough_4 PassThrough_5 PassThrough_6 k=4 N=4 epochs=2 -- cgit v1.2.3