summaryrefslogtreecommitdiff
path: root/decoder/rule_lexer.l
diff options
context:
space:
mode:
authoradam.d.lopez <adam.d.lopez@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-13 03:27:59 +0000
committeradam.d.lopez <adam.d.lopez@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-13 03:27:59 +0000
commitbded9a46cb3a27b8049f74e9948be783ae6ec42a (patch)
tree6d161d306ae43db7226dbefc84d5cf58dbe50c43 /decoder/rule_lexer.l
parent2530464e1c2cc7eec9445b2f9b0ae90c59265f57 (diff)
cdec now supports coarse-to-fine decoding (for SCFG only).
CTF has several options: -coarse_to_fine_beam_prune=<double> (required to activate CTF) assign an alpha parameter for pruning the coarse foreast -ctf_beam_widen=<double> (optional, defaults to 2.0): ratio to widen coarse pruning beam if fine parse fails. -ctf_num_widenings=<int> (optional, defaults to 2): number of times to widen coarse beam before defaulting to exhaustive source parsing -ctf_no_exhaustive (optional) do not attempt exhaustive parse if CTF fails to find a parse. Additionally, script extools/coarsen_grammar.pl will create a coarse-to-fine grammar (for X?? categories *only*). cdec will read CTF grammars in a format identical to the original, in which refinements of a rule immediately follow the coarse projection, preceded by an additional whitespace character. Not fully tested, but should be backwards compatible. Also not yet integrated into pipelines, but should work on the command line. git-svn-id: https://ws10smt.googlecode.com/svn/trunk@231 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'decoder/rule_lexer.l')
-rw-r--r--decoder/rule_lexer.l48
1 files changed, 42 insertions, 6 deletions
diff --git a/decoder/rule_lexer.l b/decoder/rule_lexer.l
index e2acd752..0216b119 100644
--- a/decoder/rule_lexer.l
+++ b/decoder/rule_lexer.l
@@ -6,6 +6,7 @@
#include <sstream>
#include <cstring>
#include <cassert>
+#include <stack>
#include "tdict.h"
#include "fdict.h"
#include "trule.h"
@@ -45,7 +46,8 @@ int scfglex_nt_sanity[MAX_ARITY];
int scfglex_src_nts[MAX_ARITY];
float scfglex_nt_size_means[MAX_ARITY];
float scfglex_nt_size_vars[MAX_ARITY];
-
+std::stack<TRulePtr> ctf_rule_stack;
+unsigned int ctf_level = 0;
void sanity_check_trg_symbol(WordID nt, int index) {
if (scfglex_src_nts[index-1] != nt) {
@@ -77,6 +79,34 @@ void scfglex_reset() {
scfglex_trg_rhs_size = 0;
}
+void check_and_update_ctf_stack(const TRulePtr& rp) {
+ if (ctf_level > ctf_rule_stack.size()){
+ std::cerr << "Found rule at projection level " << ctf_level << " but previous rule was at level "
+ << ctf_rule_stack.size()-1 << " (cannot exceed previous level by more than one; line " << lex_line << ")" << std::endl;
+ abort();
+ }
+ while (ctf_rule_stack.size() > ctf_level)
+ ctf_rule_stack.pop();
+ // ensure that rule has the same signature as parent (coarse) rule. Rules may *only*
+ // differ by the rhs nonterminals, not terminals or permutation of nonterminals.
+ if (ctf_rule_stack.size() > 0) {
+ TRulePtr& coarse_rp = ctf_rule_stack.top();
+ if (rp->f_.size() != coarse_rp->f_.size() || rp->e_ != coarse_rp->e_) {
+ std::cerr << "Rule " << (rp->AsString()) << " is not a projection of " <<
+ (coarse_rp->AsString()) << std::endl;
+ abort();
+ }
+ for (int i=0; i<rp->f_.size(); ++i) {
+ if (((rp->f_[i]<0) != (coarse_rp->f_[i]<0)) ||
+ ((rp->f_[i]>0) && (rp->f_[i] != coarse_rp->f_[i]))) {
+ std::cerr << "Rule " << (rp->AsString()) << " is not a projection of " <<
+ (coarse_rp->AsString()) << std::endl;
+ abort();
+ }
+ }
+ }
+}
+
%}
REAL [\-+]?[0-9]+(\.[0-9]*([eE][-+]*[0-9]+)?)?|inf|[\-+]inf
@@ -85,7 +115,9 @@ NT [\-#$A-Z_:=.",\\][\-#$".A-Z+/=_0-9!:@\\]*
%x LHS_END SRC TRG FEATS FEATVAL ALIGNS
%%
-<INITIAL>[ \t] ;
+<INITIAL>[ \t] {
+ ctf_level++;
+ };
<INITIAL>\[{NT}\] {
scfglex_tmp_token.assign(yytext + 1, yyleng - 2);
@@ -182,12 +214,16 @@ NT [\-#$A-Z_:=.",\\][\-#$".A-Z+/=_0-9!:@\\]*
abort();
}
TRulePtr rp(new TRule(scfglex_lhs, scfglex_src_rhs, scfglex_src_rhs_size, scfglex_trg_rhs, scfglex_trg_rhs_size, scfglex_feat_ids, scfglex_feat_vals, scfglex_num_feats, scfglex_src_arity));
- rule_callback(rp, rule_callback_extra);
+ check_and_update_ctf_stack(rp);
+ TRulePtr coarse_rp = ((ctf_level == 0) ? TRulePtr() : ctf_rule_stack.top());
+ rule_callback(rp, ctf_level, coarse_rp, rule_callback_extra);
+ ctf_rule_stack.push(rp);
// std::cerr << rp->AsString() << std::endl;
num_rules++;
- lex_line++;
- if (num_rules % 50000 == 0) { std::cerr << '.' << std::flush; fl = true; }
- if (num_rules % 2000000 == 0) { std::cerr << " [" << num_rules << "]\n"; fl = false; }
+ lex_line++;
+ if (num_rules % 50000 == 0) { std::cerr << '.' << std::flush; fl = true; }
+ if (num_rules % 2000000 == 0) { std::cerr << " [" << num_rules << "]\n"; fl = false; }
+ ctf_level = 0;
BEGIN(INITIAL);
}