diff options
author | Patrick Simianer <simianer@cl.uni-heidelberg.de> | 2012-11-05 15:29:46 +0100 |
---|---|---|
committer | Patrick Simianer <simianer@cl.uni-heidelberg.de> | 2012-11-05 15:29:46 +0100 |
commit | 6f29f345dc06c1a1033475eac1d1340781d1d603 (patch) | |
tree | 6fa4cdd7aefd7d54c9585c2c6274db61bb8b159a /decoder/scfg_translator.cc | |
parent | b510da2e562c695c90d565eb295c749569c59be8 (diff) | |
parent | c615c37501fa8576584a510a9d2bfe2fdd5bace7 (diff) |
merge upstream/master
Diffstat (limited to 'decoder/scfg_translator.cc')
-rw-r--r-- | decoder/scfg_translator.cc | 60 |
1 files changed, 60 insertions, 0 deletions
diff --git a/decoder/scfg_translator.cc b/decoder/scfg_translator.cc index a978cfc2..3b43b586 100644 --- a/decoder/scfg_translator.cc +++ b/decoder/scfg_translator.cc @@ -15,13 +15,73 @@ #include "tdict.h" #include "viterbi.h" #include "verbose.h" +#include <tr1/unordered_map> #define foreach BOOST_FOREACH #define reverse_foreach BOOST_REVERSE_FOREACH using namespace std; +using namespace std::tr1; static bool printGrammarsUsed = false; +struct GlueGrammar : public TextGrammar { + // read glue grammar from file + explicit GlueGrammar(const std::string& file); + GlueGrammar(const std::string& goal_nt, const std::string& default_nt, const unsigned int ctf_level=0); // "S", "X" + virtual bool HasRuleForSpan(int i, int j, int distance) const; +}; + +struct PassThroughGrammar : public TextGrammar { + PassThroughGrammar(const Lattice& input, const std::string& cat, const unsigned int ctf_level=0); + virtual bool HasRuleForSpan(int i, int j, int distance) const; +}; + +GlueGrammar::GlueGrammar(const string& file) : TextGrammar(file) {} + +static void RefineRule(TRulePtr pt, const unsigned int ctf_level){ + for (unsigned int i=0; i<ctf_level; ++i){ + TRulePtr r(new TRule(*pt)); + pt->fine_rules_.reset(new vector<TRulePtr>); + pt->fine_rules_->push_back(r); + pt = r; + } +} + +GlueGrammar::GlueGrammar(const string& goal_nt, const string& default_nt, const unsigned int ctf_level) { + TRulePtr stop_glue(new TRule("[" + goal_nt + "] ||| [" + default_nt + ",1] ||| [1]")); + AddRule(stop_glue); + RefineRule(stop_glue, ctf_level); + TRulePtr glue(new TRule("[" + goal_nt + "] ||| [" + goal_nt + ",1] ["+ default_nt + ",2] ||| [1] [2] ||| Glue=1")); + AddRule(glue); + RefineRule(glue, ctf_level); +} + +bool GlueGrammar::HasRuleForSpan(int i, int /* j */, int /* distance */) const { + return (i == 0); +} + +PassThroughGrammar::PassThroughGrammar(const Lattice& input, const string& cat, const unsigned int ctf_level) { + unordered_set<WordID> ss; + for (int i = 0; i < input.size(); ++i) { + const vector<LatticeArc>& alts = input[i]; + for (int k = 0; k < alts.size(); ++k) { + const int j = alts[k].dist2next + i; + const string& src = TD::Convert(alts[k].label); + if (ss.count(alts[k].label) == 0) { + TRulePtr pt(new TRule("[" + cat + "] ||| " + src + " ||| " + src + " ||| PassThrough=1")); + pt->a_.push_back(AlignmentPoint(0,0)); + AddRule(pt); + RefineRule(pt, ctf_level); + ss.insert(alts[k].label); + } + } + } +} + +bool PassThroughGrammar::HasRuleForSpan(int, int, int distance) const { + return (distance < 2); +} + struct SCFGTranslatorImpl { SCFGTranslatorImpl(const boost::program_options::variables_map& conf) : max_span_limit(conf["scfg_max_span_limit"].as<int>()), |