summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2013-12-16 23:49:58 -0500
committerChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2013-12-16 23:49:58 -0500
commit9d5fce4f9a59ea6f45a2d094361bacc07f4148e7 (patch)
tree0b0694b2af6e3b6809ad30ac952fe1210ce958fb
parent09b71909b0ecc2d69dfdfb81f535db2519cd5642 (diff)
better parsing errors, thx to nschneid, also increase limits on number of features
-rw-r--r--decoder/grammar.cc4
-rw-r--r--decoder/rule_lexer.h3
-rw-r--r--decoder/rule_lexer.ll20
-rw-r--r--decoder/trule.cc2
4 files changed, 16 insertions, 13 deletions
diff --git a/decoder/grammar.cc b/decoder/grammar.cc
index 160d00e6..439e448d 100644
--- a/decoder/grammar.cc
+++ b/decoder/grammar.cc
@@ -121,11 +121,11 @@ static void AddRuleHelper(const TRulePtr& new_rule, const unsigned int ctf_level
void TextGrammar::ReadFromFile(const string& filename) {
ReadFile in(filename);
- ReadFromStream(in.stream());
+ RuleLexer::ReadRules(in.stream(), &AddRuleHelper, filename, this);
}
void TextGrammar::ReadFromStream(istream* in) {
- RuleLexer::ReadRules(in, &AddRuleHelper, this);
+ RuleLexer::ReadRules(in, &AddRuleHelper, "UNKNOWN", this);
}
bool TextGrammar::HasRuleForSpan(int /* i */, int /* j */, int distance) const {
diff --git a/decoder/rule_lexer.h b/decoder/rule_lexer.h
index 976ea02b..f844e5b2 100644
--- a/decoder/rule_lexer.h
+++ b/decoder/rule_lexer.h
@@ -2,12 +2,13 @@
#define _RULE_LEXER_H_
#include <iostream>
+#include <string>
#include "trule.h"
struct RuleLexer {
typedef void (*RuleCallback)(const TRulePtr& new_rule, const unsigned int ctf_level, const TRulePtr& coarse_rule, void* extra);
- static void ReadRules(std::istream* in, RuleCallback func, void* extra);
+ static void ReadRules(std::istream* in, RuleCallback func, const std::string& fname, void* extra);
};
#endif
diff --git a/decoder/rule_lexer.ll b/decoder/rule_lexer.ll
index 083a5bb1..c6a85919 100644
--- a/decoder/rule_lexer.ll
+++ b/decoder/rule_lexer.ll
@@ -18,6 +18,7 @@ std::istream* scfglex_stream = NULL;
RuleLexer::RuleCallback rule_callback = NULL;
void* rule_callback_extra = NULL;
std::vector<int> scfglex_phrase_fnames;
+std::string scfglex_fname;
#undef YY_INPUT
#define YY_INPUT(buf, result, max_size) (result = scfglex_stream->read(buf, max_size).gcount())
@@ -38,12 +39,12 @@ WordID scfglex_lhs;
int scfglex_src_arity;
int scfglex_trg_arity;
-#define MAX_FEATS 100
+#define MAX_FEATS 10000
int scfglex_feat_ids[MAX_FEATS];
double scfglex_feat_vals[MAX_FEATS];
int scfglex_num_feats;
-#define MAX_ARITY 200
+#define MAX_ARITY 1000
int scfglex_nt_sanity[MAX_ARITY];
int scfglex_src_nts[MAX_ARITY];
// float scfglex_nt_size_means[MAX_ARITY];
@@ -51,7 +52,7 @@ int scfglex_src_nts[MAX_ARITY];
std::stack<TRulePtr> ctf_rule_stack;
unsigned int ctf_level = 0;
-#define MAX_ALS 200
+#define MAX_ALS 2000
AlignmentPoint scfglex_als[MAX_ALS];
int scfglex_num_als;
@@ -190,7 +191,7 @@ NT [^\t \[\],]+
BEGIN(SRC);
}
<INITIAL,LHS_END>. {
- std::cerr << "Line " << lex_line << ": unexpected input in LHS: " << yytext << std::endl;
+ std::cerr << "Grammar " << scfglex_fname << " line " << lex_line << ": unexpected input in LHS: " << yytext << std::endl;
abort();
}
@@ -217,7 +218,7 @@ NT [^\t \[\],]+
<TRG,FEATS,ALIGNS>\n {
if (scfglex_src_arity != scfglex_trg_arity) {
- std::cerr << "Line " << lex_line << ": LHS and RHS arity mismatch!\n";
+ std::cerr << "Grammar " << scfglex_fname << " line " << lex_line << ": LHS and RHS arity mismatch!\n";
abort();
}
// const bool ignore_grammar_features = false;
@@ -258,7 +259,7 @@ NT [^\t \[\],]+
BEGIN(FEATS);
}
<FEATVAL>. {
- std::cerr << "Line " << lex_line << ": unexpected input in feature value: " << yytext << std::endl;
+ std::cerr << "Grammar " << scfglex_fname << " line " << lex_line << ": unexpected input in feature value: " << yytext << std::endl;
abort();
}
<FEATS>{REAL} {
@@ -267,7 +268,7 @@ NT [^\t \[\],]+
++scfglex_num_feats;
}
<FEATS>. {
- std::cerr << "Line " << lex_line << " unexpected input in features: " << yytext << std::endl;
+ std::cerr << "Grammar " << scfglex_fname << " line " << lex_line << " unexpected input in features: " << yytext << std::endl;
abort();
}
<ALIGNS>[0-9]+-[0-9]+ {
@@ -291,14 +292,14 @@ NT [^\t \[\],]+
}
<ALIGNS>[ \t] ;
<ALIGNS>. {
- std::cerr << "Line " << lex_line << ": unexpected input in alignment: " << yytext << std::endl;
+ std::cerr << "Grammar " << scfglex_fname << " line " << lex_line << ": unexpected input in alignment: " << yytext << std::endl;
abort();
}
%%
#include "filelib.h"
-void RuleLexer::ReadRules(std::istream* in, RuleLexer::RuleCallback func, void* extra) {
+void RuleLexer::ReadRules(std::istream* in, RuleLexer::RuleCallback func, const std::string& fname, void* extra) {
if (scfglex_phrase_fnames.empty()) {
scfglex_phrase_fnames.resize(100);
for (int i = 0; i < scfglex_phrase_fnames.size(); ++i) {
@@ -308,6 +309,7 @@ void RuleLexer::ReadRules(std::istream* in, RuleLexer::RuleCallback func, void*
}
}
lex_line = 1;
+ scfglex_fname = fname;
scfglex_stream = in;
rule_callback_extra = extra,
rule_callback = func;
diff --git a/decoder/trule.cc b/decoder/trule.cc
index 896f9f3d..c22baae3 100644
--- a/decoder/trule.cc
+++ b/decoder/trule.cc
@@ -117,7 +117,7 @@ bool TRule::ReadFromString(const string& line, bool strict, bool mono) {
// use lexer
istringstream il(line);
n_assigned=0;
- RuleLexer::ReadRules(&il,assign_trule,this);
+ RuleLexer::ReadRules(&il,assign_trule,"STRING",this);
if (n_assigned>1)
cerr<<"\nWARNING: more than one rule parsed from multi-line string; kept last: "<<line<<".\n";
return n_assigned;