diff options
author | Chris Dyer <redpony@gmail.com> | 2014-09-07 13:57:52 -0400 |
---|---|---|
committer | Chris Dyer <redpony@gmail.com> | 2014-09-07 13:57:52 -0400 |
commit | 2bc24dd0f10e2acbad118d5fce5aecdff6a90764 (patch) | |
tree | 99a79b38f1c293f299522c0ff080c045b346b179 /decoder/scfg_translator.cc | |
parent | b40c064987b1fb188daf040a068a459711385eac (diff) | |
parent | f33ab2ff3f1cf135ffb80721e1f4d71d124bc8f9 (diff) |
Merge pull request #50 from pks/master
alignment features, PassThroughN features, dtrain update, mira qsub, and pro fix
Diffstat (limited to 'decoder/scfg_translator.cc')
-rw-r--r-- | decoder/scfg_translator.cc | 31 |
1 files changed, 20 insertions, 11 deletions
diff --git a/decoder/scfg_translator.cc b/decoder/scfg_translator.cc index 88f62769..c3cfcaad 100644 --- a/decoder/scfg_translator.cc +++ b/decoder/scfg_translator.cc @@ -28,7 +28,7 @@ struct GlueGrammar : public TextGrammar { }; struct PassThroughGrammar : public TextGrammar { - PassThroughGrammar(const Lattice& input, const std::string& cat, const unsigned int ctf_level=0); + PassThroughGrammar(const Lattice& input, const std::string& cat, const unsigned int ctf_level=0, const unsigned int num_pt_features=0); virtual bool HasRuleForSpan(int i, int j, int distance) const; }; @@ -56,7 +56,7 @@ bool GlueGrammar::HasRuleForSpan(int i, int /* j */, int /* distance */) const { return (i == 0); } -PassThroughGrammar::PassThroughGrammar(const Lattice& input, const string& cat, const unsigned int ctf_level) { +PassThroughGrammar::PassThroughGrammar(const Lattice& input, const string& cat, const unsigned int ctf_level, const unsigned num_pt_features) { unordered_set<WordID> ss; for (int i = 0; i < input.size(); ++i) { const vector<LatticeArc>& alts = input[i]; @@ -64,14 +64,21 @@ PassThroughGrammar::PassThroughGrammar(const Lattice& input, const string& cat, const int j = alts[k].dist2next + i; const string& src = TD::Convert(alts[k].label); if (ss.count(alts[k].label) == 0) { - int length = static_cast<int>(log(UTF8StringLen(src)) / log(1.6)) + 1; - if (length > 6) length = 6; - string len_feat = "PassThrough_0=1"; - len_feat[12] += length; - TRulePtr pt(new TRule("[" + cat + "] ||| " + src + " ||| " + src + " ||| PassThrough=1 " + len_feat)); - pt->a_.push_back(AlignmentPoint(0,0)); - AddRule(pt); - RefineRule(pt, ctf_level); + if (num_pt_features > 0) { + int length = static_cast<int>(log(UTF8StringLen(src)) / log(1.6)) + 1; + if (length > num_pt_features) length = num_pt_features; + string len_feat = "PassThrough_0=1"; + len_feat[12] += length; + TRulePtr pt(new TRule("[" + cat + "] ||| " + src + " ||| " + src + " ||| PassThrough=1 " + len_feat)); + pt->a_.push_back(AlignmentPoint(0,0)); + AddRule(pt); + RefineRule(pt, ctf_level); + } else { + TRulePtr pt(new TRule("[" + cat + "] ||| " + src + " ||| " + src + " ||| PassThrough=1 ")); + pt->a_.push_back(AlignmentPoint(0,0)); + AddRule(pt); + RefineRule(pt, ctf_level); + } ss.insert(alts[k].label); } } @@ -86,6 +93,7 @@ struct SCFGTranslatorImpl { SCFGTranslatorImpl(const boost::program_options::variables_map& conf) : max_span_limit(conf["scfg_max_span_limit"].as<int>()), add_pass_through_rules(conf.count("add_pass_through_rules")), + num_pt_features(conf["add_extra_pass_through_features"].as<unsigned int>()), goal(conf["goal"].as<string>()), default_nt(conf["scfg_default_nt"].as<string>()), use_ctf_(conf.count("coarse_to_fine_beam_prune")) @@ -140,6 +148,7 @@ struct SCFGTranslatorImpl { const int max_span_limit; const bool add_pass_through_rules; + const unsigned int num_pt_features; const string goal; const string default_nt; const bool use_ctf_; @@ -187,7 +196,7 @@ struct SCFGTranslatorImpl { smeta->SetSourceLength(lattice.size()); if (add_pass_through_rules){ if (!SILENT) cerr << "Adding pass through grammar" << endl; - PassThroughGrammar* g = new PassThroughGrammar(lattice, default_nt, ctf_iterations_); + PassThroughGrammar* g = new PassThroughGrammar(lattice, default_nt, ctf_iterations_, num_pt_features); g->SetGrammarName("PassThrough"); glist.push_back(GrammarPtr(g)); } |