summaryrefslogtreecommitdiff
path: root/decoder/grammar.cc
diff options
context:
space:
mode:
authoradam.d.lopez <adam.d.lopez@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-13 03:27:59 +0000
committeradam.d.lopez <adam.d.lopez@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-13 03:27:59 +0000
commit47c75319638866609f669346e15663c5ba43af7f (patch)
tree50bc1867bdb1ce27e679fb8981b1b805c4897cd6 /decoder/grammar.cc
parenta2360c873ba8b72744e16752a067276a46d63645 (diff)
cdec now supports coarse-to-fine decoding (for SCFG only).
CTF has several options: -coarse_to_fine_beam_prune=<double> (required to activate CTF) assign an alpha parameter for pruning the coarse foreast -ctf_beam_widen=<double> (optional, defaults to 2.0): ratio to widen coarse pruning beam if fine parse fails. -ctf_num_widenings=<int> (optional, defaults to 2): number of times to widen coarse beam before defaulting to exhaustive source parsing -ctf_no_exhaustive (optional) do not attempt exhaustive parse if CTF fails to find a parse. Additionally, script extools/coarsen_grammar.pl will create a coarse-to-fine grammar (for X?? categories *only*). cdec will read CTF grammars in a format identical to the original, in which refinements of a rule immediately follow the coarse projection, preceded by an additional whitespace character. Not fully tested, but should be backwards compatible. Also not yet integrated into pipelines, but should work on the command line. git-svn-id: https://ws10smt.googlecode.com/svn/trunk@231 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'decoder/grammar.cc')
-rw-r--r--decoder/grammar.cc37
1 files changed, 25 insertions, 12 deletions
diff --git a/decoder/grammar.cc b/decoder/grammar.cc
index 499e79fe..26efaf99 100644
--- a/decoder/grammar.cc
+++ b/decoder/grammar.cc
@@ -81,8 +81,14 @@ const GrammarIter* TextGrammar::GetRoot() const {
return &pimpl_->root_;
}
-void TextGrammar::AddRule(const TRulePtr& rule) {
- if (rule->IsUnary()) {
+void TextGrammar::AddRule(const TRulePtr& rule, const unsigned int ctf_level, const TRulePtr& coarse_rule) {
+ if (ctf_level > 0) {
+ // assume that coarse_rule is already in tree (would be safer to check)
+ if (coarse_rule->fine_rules_ == 0)
+ coarse_rule->fine_rules_.reset(new std::vector<TRulePtr>());
+ coarse_rule->fine_rules_->push_back(rule);
+ ctf_levels_ = std::max(ctf_levels_, ctf_level);
+ } else if (rule->IsUnary()) {
rhs2unaries_[rule->f().front()].push_back(rule);
unaries_.push_back(rule);
} else {
@@ -95,8 +101,8 @@ void TextGrammar::AddRule(const TRulePtr& rule) {
}
}
-static void AddRuleHelper(const TRulePtr& new_rule, void* extra) {
- static_cast<TextGrammar*>(extra)->AddRule(new_rule);
+static void AddRuleHelper(const TRulePtr& new_rule, const unsigned int ctf_level, const TRulePtr& coarse_rule, void* extra) {
+ static_cast<TextGrammar*>(extra)->AddRule(new_rule, ctf_level, coarse_rule);
}
void TextGrammar::ReadFromFile(const string& filename) {
@@ -110,22 +116,29 @@ bool TextGrammar::HasRuleForSpan(int /* i */, int /* j */, int distance) const {
GlueGrammar::GlueGrammar(const string& file) : TextGrammar(file) {}
-GlueGrammar::GlueGrammar(const string& goal_nt, const string& default_nt) {
- TRulePtr stop_glue(new TRule("[" + goal_nt + "] ||| [" + default_nt + ",1] ||| [" + default_nt + ",1]"));
- TRulePtr glue(new TRule("[" + goal_nt + "] ||| [" + goal_nt + ",1] ["
- + default_nt + ",2] ||| [" + goal_nt + ",1] [" + default_nt + ",2] ||| Glue=1"));
+void RefineRule(TRulePtr pt, const unsigned int ctf_level){
+ for (unsigned int i=0; i<ctf_level; ++i){
+ TRulePtr r(new TRule(*pt));
+ pt->fine_rules_.reset(new vector<TRulePtr>);
+ pt->fine_rules_->push_back(r);
+ pt = r;
+ }
+}
+GlueGrammar::GlueGrammar(const string& goal_nt, const string& default_nt, const unsigned int ctf_level) {
+ TRulePtr stop_glue(new TRule("[" + goal_nt + "] ||| [" + default_nt + ",1] ||| [" + default_nt + ",1]"));
AddRule(stop_glue);
+ RefineRule(stop_glue, ctf_level);
+ TRulePtr glue(new TRule("[" + goal_nt + "] ||| [" + goal_nt + ",1] ["+ default_nt + ",2] ||| [" + goal_nt + ",1] [" + default_nt + ",2] ||| Glue=1"));
AddRule(glue);
- //cerr << "GLUE: " << stop_glue->AsString() << endl;
- //cerr << "GLUE: " << glue->AsString() << endl;
+ RefineRule(glue, ctf_level);
}
bool GlueGrammar::HasRuleForSpan(int i, int /* j */, int /* distance */) const {
return (i == 0);
}
-PassThroughGrammar::PassThroughGrammar(const Lattice& input, const string& cat) :
+PassThroughGrammar::PassThroughGrammar(const Lattice& input, const string& cat, const unsigned int ctf_level) :
has_rule_(input.size() + 1) {
for (int i = 0; i < input.size(); ++i) {
const vector<LatticeArc>& alts = input[i];
@@ -135,7 +148,7 @@ PassThroughGrammar::PassThroughGrammar(const Lattice& input, const string& cat)
const string& src = TD::Convert(alts[k].label);
TRulePtr pt(new TRule("[" + cat + "] ||| " + src + " ||| " + src + " ||| PassThrough=1"));
AddRule(pt);
-// cerr << "PT: " << pt->AsString() << endl;
+ RefineRule(pt, ctf_level);
}
}
}