diff options
author | Chris Dyer <cdyer@cab.ark.cs.cmu.edu> | 2012-10-25 23:29:39 -0400 |
---|---|---|
committer | Chris Dyer <cdyer@cab.ark.cs.cmu.edu> | 2012-10-25 23:29:39 -0400 |
commit | 6bbf03ac46bd57400aa9e65a321a304a234af935 (patch) | |
tree | b7c4ff48209115fd40ec4fdebee155f45a43e2ad | |
parent | 17abaf1a6c1523f4d53a11c5d940ab59dcbc63b4 (diff) | |
parent | 8f5efe046c0a14aaa3ea5925fafdf71c26309b68 (diff) |
Merge branch 'master' of github.com:redpony/cdec
-rw-r--r-- | decoder/grammar.cc | 45 | ||||
-rw-r--r-- | decoder/grammar.h | 14 | ||||
-rw-r--r-- | decoder/scfg_translator.cc | 60 |
3 files changed, 60 insertions, 59 deletions
diff --git a/decoder/grammar.cc b/decoder/grammar.cc index d1fe53af..ee43f537 100644 --- a/decoder/grammar.cc +++ b/decoder/grammar.cc @@ -127,48 +127,3 @@ bool TextGrammar::HasRuleForSpan(int /* i */, int /* j */, int distance) const { return (max_span_ >= distance); } -GlueGrammar::GlueGrammar(const string& file) : TextGrammar(file) {} - -void RefineRule(TRulePtr pt, const unsigned int ctf_level){ - for (unsigned int i=0; i<ctf_level; ++i){ - TRulePtr r(new TRule(*pt)); - pt->fine_rules_.reset(new vector<TRulePtr>); - pt->fine_rules_->push_back(r); - pt = r; - } -} - -GlueGrammar::GlueGrammar(const string& goal_nt, const string& default_nt, const unsigned int ctf_level) { - TRulePtr stop_glue(new TRule("[" + goal_nt + "] ||| [" + default_nt + ",1] ||| [1]")); - AddRule(stop_glue); - RefineRule(stop_glue, ctf_level); - TRulePtr glue(new TRule("[" + goal_nt + "] ||| [" + goal_nt + ",1] ["+ default_nt + ",2] ||| [1] [2] ||| Glue=1")); - AddRule(glue); - RefineRule(glue, ctf_level); -} - -bool GlueGrammar::HasRuleForSpan(int i, int /* j */, int /* distance */) const { - return (i == 0); -} - -PassThroughGrammar::PassThroughGrammar(const Lattice& input, const string& cat, const unsigned int ctf_level) { - unordered_set<WordID> ss; - for (int i = 0; i < input.size(); ++i) { - const vector<LatticeArc>& alts = input[i]; - for (int k = 0; k < alts.size(); ++k) { - const int j = alts[k].dist2next + i; - const string& src = TD::Convert(alts[k].label); - if (ss.count(alts[k].label) == 0) { - TRulePtr pt(new TRule("[" + cat + "] ||| " + src + " ||| " + src + " ||| PassThrough=1")); - pt->a_.push_back(AlignmentPoint(0,0)); - AddRule(pt); - RefineRule(pt, ctf_level); - ss.insert(alts[k].label); - } - } - } -} - -bool PassThroughGrammar::HasRuleForSpan(int, int, int distance) const { - return (distance < 2); -} diff --git a/decoder/grammar.h b/decoder/grammar.h index e6a15a69..add1a235 100644 --- a/decoder/grammar.h +++ b/decoder/grammar.h @@ -81,18 +81,4 @@ struct TextGrammar : public Grammar { }; -struct GlueGrammar : public TextGrammar { - // read glue grammar from file - explicit GlueGrammar(const std::string& file); - GlueGrammar(const std::string& goal_nt, const std::string& default_nt, const unsigned int ctf_level=0); // "S", "X" - virtual bool HasRuleForSpan(int i, int j, int distance) const; -}; - -struct PassThroughGrammar : public TextGrammar { - PassThroughGrammar(const Lattice& input, const std::string& cat, const unsigned int ctf_level=0); - virtual bool HasRuleForSpan(int i, int j, int distance) const; -}; - -void RefineRule(TRulePtr pt, const unsigned int ctf_level); - #endif diff --git a/decoder/scfg_translator.cc b/decoder/scfg_translator.cc index a978cfc2..3b43b586 100644 --- a/decoder/scfg_translator.cc +++ b/decoder/scfg_translator.cc @@ -15,13 +15,73 @@ #include "tdict.h" #include "viterbi.h" #include "verbose.h" +#include <tr1/unordered_map> #define foreach BOOST_FOREACH #define reverse_foreach BOOST_REVERSE_FOREACH using namespace std; +using namespace std::tr1; static bool printGrammarsUsed = false; +struct GlueGrammar : public TextGrammar { + // read glue grammar from file + explicit GlueGrammar(const std::string& file); + GlueGrammar(const std::string& goal_nt, const std::string& default_nt, const unsigned int ctf_level=0); // "S", "X" + virtual bool HasRuleForSpan(int i, int j, int distance) const; +}; + +struct PassThroughGrammar : public TextGrammar { + PassThroughGrammar(const Lattice& input, const std::string& cat, const unsigned int ctf_level=0); + virtual bool HasRuleForSpan(int i, int j, int distance) const; +}; + +GlueGrammar::GlueGrammar(const string& file) : TextGrammar(file) {} + +static void RefineRule(TRulePtr pt, const unsigned int ctf_level){ + for (unsigned int i=0; i<ctf_level; ++i){ + TRulePtr r(new TRule(*pt)); + pt->fine_rules_.reset(new vector<TRulePtr>); + pt->fine_rules_->push_back(r); + pt = r; + } +} + +GlueGrammar::GlueGrammar(const string& goal_nt, const string& default_nt, const unsigned int ctf_level) { + TRulePtr stop_glue(new TRule("[" + goal_nt + "] ||| [" + default_nt + ",1] ||| [1]")); + AddRule(stop_glue); + RefineRule(stop_glue, ctf_level); + TRulePtr glue(new TRule("[" + goal_nt + "] ||| [" + goal_nt + ",1] ["+ default_nt + ",2] ||| [1] [2] ||| Glue=1")); + AddRule(glue); + RefineRule(glue, ctf_level); +} + +bool GlueGrammar::HasRuleForSpan(int i, int /* j */, int /* distance */) const { + return (i == 0); +} + +PassThroughGrammar::PassThroughGrammar(const Lattice& input, const string& cat, const unsigned int ctf_level) { + unordered_set<WordID> ss; + for (int i = 0; i < input.size(); ++i) { + const vector<LatticeArc>& alts = input[i]; + for (int k = 0; k < alts.size(); ++k) { + const int j = alts[k].dist2next + i; + const string& src = TD::Convert(alts[k].label); + if (ss.count(alts[k].label) == 0) { + TRulePtr pt(new TRule("[" + cat + "] ||| " + src + " ||| " + src + " ||| PassThrough=1")); + pt->a_.push_back(AlignmentPoint(0,0)); + AddRule(pt); + RefineRule(pt, ctf_level); + ss.insert(alts[k].label); + } + } + } +} + +bool PassThroughGrammar::HasRuleForSpan(int, int, int distance) const { + return (distance < 2); +} + struct SCFGTranslatorImpl { SCFGTranslatorImpl(const boost::program_options::variables_map& conf) : max_span_limit(conf["scfg_max_span_limit"].as<int>()), |