From 9d5fce4f9a59ea6f45a2d094361bacc07f4148e7 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Mon, 16 Dec 2013 23:49:58 -0500 Subject: better parsing errors, thx to nschneid, also increase limits on number of features --- decoder/grammar.cc | 4 ++-- decoder/rule_lexer.h | 3 ++- decoder/rule_lexer.ll | 20 +++++++++++--------- decoder/trule.cc | 2 +- 4 files changed, 16 insertions(+), 13 deletions(-) diff --git a/decoder/grammar.cc b/decoder/grammar.cc index 160d00e6..439e448d 100644 --- a/decoder/grammar.cc +++ b/decoder/grammar.cc @@ -121,11 +121,11 @@ static void AddRuleHelper(const TRulePtr& new_rule, const unsigned int ctf_level void TextGrammar::ReadFromFile(const string& filename) { ReadFile in(filename); - ReadFromStream(in.stream()); + RuleLexer::ReadRules(in.stream(), &AddRuleHelper, filename, this); } void TextGrammar::ReadFromStream(istream* in) { - RuleLexer::ReadRules(in, &AddRuleHelper, this); + RuleLexer::ReadRules(in, &AddRuleHelper, "UNKNOWN", this); } bool TextGrammar::HasRuleForSpan(int /* i */, int /* j */, int distance) const { diff --git a/decoder/rule_lexer.h b/decoder/rule_lexer.h index 976ea02b..f844e5b2 100644 --- a/decoder/rule_lexer.h +++ b/decoder/rule_lexer.h @@ -2,12 +2,13 @@ #define _RULE_LEXER_H_ #include +#include #include "trule.h" struct RuleLexer { typedef void (*RuleCallback)(const TRulePtr& new_rule, const unsigned int ctf_level, const TRulePtr& coarse_rule, void* extra); - static void ReadRules(std::istream* in, RuleCallback func, void* extra); + static void ReadRules(std::istream* in, RuleCallback func, const std::string& fname, void* extra); }; #endif diff --git a/decoder/rule_lexer.ll b/decoder/rule_lexer.ll index 083a5bb1..c6a85919 100644 --- a/decoder/rule_lexer.ll +++ b/decoder/rule_lexer.ll @@ -18,6 +18,7 @@ std::istream* scfglex_stream = NULL; RuleLexer::RuleCallback rule_callback = NULL; void* rule_callback_extra = NULL; std::vector scfglex_phrase_fnames; +std::string scfglex_fname; #undef YY_INPUT #define YY_INPUT(buf, result, max_size) (result = scfglex_stream->read(buf, max_size).gcount()) @@ -38,12 +39,12 @@ WordID scfglex_lhs; int scfglex_src_arity; int scfglex_trg_arity; -#define MAX_FEATS 100 +#define MAX_FEATS 10000 int scfglex_feat_ids[MAX_FEATS]; double scfglex_feat_vals[MAX_FEATS]; int scfglex_num_feats; -#define MAX_ARITY 200 +#define MAX_ARITY 1000 int scfglex_nt_sanity[MAX_ARITY]; int scfglex_src_nts[MAX_ARITY]; // float scfglex_nt_size_means[MAX_ARITY]; @@ -51,7 +52,7 @@ int scfglex_src_nts[MAX_ARITY]; std::stack ctf_rule_stack; unsigned int ctf_level = 0; -#define MAX_ALS 200 +#define MAX_ALS 2000 AlignmentPoint scfglex_als[MAX_ALS]; int scfglex_num_als; @@ -190,7 +191,7 @@ NT [^\t \[\],]+ BEGIN(SRC); } . { - std::cerr << "Line " << lex_line << ": unexpected input in LHS: " << yytext << std::endl; + std::cerr << "Grammar " << scfglex_fname << " line " << lex_line << ": unexpected input in LHS: " << yytext << std::endl; abort(); } @@ -217,7 +218,7 @@ NT [^\t \[\],]+ \n { if (scfglex_src_arity != scfglex_trg_arity) { - std::cerr << "Line " << lex_line << ": LHS and RHS arity mismatch!\n"; + std::cerr << "Grammar " << scfglex_fname << " line " << lex_line << ": LHS and RHS arity mismatch!\n"; abort(); } // const bool ignore_grammar_features = false; @@ -258,7 +259,7 @@ NT [^\t \[\],]+ BEGIN(FEATS); } . { - std::cerr << "Line " << lex_line << ": unexpected input in feature value: " << yytext << std::endl; + std::cerr << "Grammar " << scfglex_fname << " line " << lex_line << ": unexpected input in feature value: " << yytext << std::endl; abort(); } {REAL} { @@ -267,7 +268,7 @@ NT [^\t \[\],]+ ++scfglex_num_feats; } . { - std::cerr << "Line " << lex_line << " unexpected input in features: " << yytext << std::endl; + std::cerr << "Grammar " << scfglex_fname << " line " << lex_line << " unexpected input in features: " << yytext << std::endl; abort(); } [0-9]+-[0-9]+ { @@ -291,14 +292,14 @@ NT [^\t \[\],]+ } [ \t] ; . { - std::cerr << "Line " << lex_line << ": unexpected input in alignment: " << yytext << std::endl; + std::cerr << "Grammar " << scfglex_fname << " line " << lex_line << ": unexpected input in alignment: " << yytext << std::endl; abort(); } %% #include "filelib.h" -void RuleLexer::ReadRules(std::istream* in, RuleLexer::RuleCallback func, void* extra) { +void RuleLexer::ReadRules(std::istream* in, RuleLexer::RuleCallback func, const std::string& fname, void* extra) { if (scfglex_phrase_fnames.empty()) { scfglex_phrase_fnames.resize(100); for (int i = 0; i < scfglex_phrase_fnames.size(); ++i) { @@ -308,6 +309,7 @@ void RuleLexer::ReadRules(std::istream* in, RuleLexer::RuleCallback func, void* } } lex_line = 1; + scfglex_fname = fname; scfglex_stream = in; rule_callback_extra = extra, rule_callback = func; diff --git a/decoder/trule.cc b/decoder/trule.cc index 896f9f3d..c22baae3 100644 --- a/decoder/trule.cc +++ b/decoder/trule.cc @@ -117,7 +117,7 @@ bool TRule::ReadFromString(const string& line, bool strict, bool mono) { // use lexer istringstream il(line); n_assigned=0; - RuleLexer::ReadRules(&il,assign_trule,this); + RuleLexer::ReadRules(&il,assign_trule,"STRING",this); if (n_assigned>1) cerr<<"\nWARNING: more than one rule parsed from multi-line string; kept last: "<