Merge branch 'master' of https://github.com/redpony/cdec

author: Chris Dyer <cdyer@Chriss-MacBook-Air.local> 2013-12-19 00:13:17 -0500
committer: Chris Dyer <cdyer@Chriss-MacBook-Air.local> 2013-12-19 00:13:17 -0500
commit: 53f704b61e2ad0c048f1fe460e116207fa9e6062 (patch)
tree: 74b655e439b025f43ca41b17ee17126749bc5242
parent: fb59fa66f4555dc957003f2ed8bad4882db09cc0 (diff)
parent: ac224d3f0c63760ad33fa04719d26d3a3e42495c (diff)
4 files changed, 16 insertions, 13 deletions
diff --git a/decoder/grammar.cc b/decoder/grammar.cc
index 160d00e6..439e448d 100644
--- a/decoder/grammar.cc
+++ b/decoder/grammar.cc
@@ -121,11 +121,11 @@ static void AddRuleHelper(const TRulePtr& new_rule, const unsigned int ctf_level
 
 void TextGrammar::ReadFromFile(const string& filename) {
   ReadFile in(filename);
-  ReadFromStream(in.stream());
+  RuleLexer::ReadRules(in.stream(), &AddRuleHelper, filename, this);
 }
 
 void TextGrammar::ReadFromStream(istream* in) {
-  RuleLexer::ReadRules(in, &AddRuleHelper, this);
+  RuleLexer::ReadRules(in, &AddRuleHelper, "UNKNOWN", this);
 }
 
 bool TextGrammar::HasRuleForSpan(int /* i */, int /* j */, int distance) const {
diff --git a/decoder/rule_lexer.h b/decoder/rule_lexer.h
index 976ea02b..f844e5b2 100644
--- a/decoder/rule_lexer.h
+++ b/decoder/rule_lexer.h
@@ -2,12 +2,13 @@
 #define _RULE_LEXER_H_
 
 #include <iostream>
+#include <string>
 
 #include "trule.h"
 
 struct RuleLexer {
   typedef void (*RuleCallback)(const TRulePtr& new_rule, const unsigned int ctf_level, const TRulePtr& coarse_rule, void* extra);
-  static void ReadRules(std::istream* in, RuleCallback func, void* extra);
+  static void ReadRules(std::istream* in, RuleCallback func, const std::string& fname, void* extra);
 };
 
 #endif
diff --git a/decoder/rule_lexer.ll b/decoder/rule_lexer.ll
index 083a5bb1..c6a85919 100644
--- a/decoder/rule_lexer.ll
+++ b/decoder/rule_lexer.ll
@@ -18,6 +18,7 @@ std::istream* scfglex_stream = NULL;
 RuleLexer::RuleCallback rule_callback = NULL;
 void* rule_callback_extra = NULL;
 std::vector<int> scfglex_phrase_fnames;
+std::string scfglex_fname;
 
 #undef YY_INPUT
 #define YY_INPUT(buf, result, max_size) (result = scfglex_stream->read(buf, max_size).gcount())
@@ -38,12 +39,12 @@ WordID scfglex_lhs;
 int scfglex_src_arity;
 int scfglex_trg_arity;
 
-#define MAX_FEATS 100
+#define MAX_FEATS 10000
 int scfglex_feat_ids[MAX_FEATS];
 double scfglex_feat_vals[MAX_FEATS];
 int scfglex_num_feats;
 
-#define MAX_ARITY 200
+#define MAX_ARITY 1000
 int scfglex_nt_sanity[MAX_ARITY];
 int scfglex_src_nts[MAX_ARITY];
 // float scfglex_nt_size_means[MAX_ARITY];
@@ -51,7 +52,7 @@ int scfglex_src_nts[MAX_ARITY];
 std::stack<TRulePtr> ctf_rule_stack;
 unsigned int ctf_level = 0;
 
-#define MAX_ALS 200
+#define MAX_ALS 2000
 AlignmentPoint scfglex_als[MAX_ALS];
 int scfglex_num_als;
 
@@ -190,7 +191,7 @@ NT [^\t \[\],]+
 		BEGIN(SRC);
 		}
 <INITIAL,LHS_END>.	{
-		std::cerr << "Line " << lex_line << ": unexpected input in LHS: " << yytext << std::endl;
+		std::cerr << "Grammar " << scfglex_fname << " line " << lex_line << ": unexpected input in LHS: " << yytext << std::endl;
 		abort();
 		}
 
@@ -217,7 +218,7 @@ NT [^\t \[\],]+
 
 <TRG,FEATS,ALIGNS>\n	{
                 if (scfglex_src_arity != scfglex_trg_arity) {
-                  std::cerr << "Line " << lex_line << ": LHS and RHS arity mismatch!\n";
+                  std::cerr << "Grammar " << scfglex_fname << " line " << lex_line << ": LHS and RHS arity mismatch!\n";
                   abort();
                 }
 		// const bool ignore_grammar_features = false;
@@ -258,7 +259,7 @@ NT [^\t \[\],]+
 		BEGIN(FEATS);
 		}
 <FEATVAL>.	{
-		std::cerr << "Line " << lex_line << ": unexpected input in feature value: " << yytext << std::endl;
+		std::cerr << "Grammar " << scfglex_fname << " line " << lex_line << ": unexpected input in feature value: " << yytext << std::endl;
 		abort();
 		}
 <FEATS>{REAL} 	{
@@ -267,7 +268,7 @@ NT [^\t \[\],]+
 		++scfglex_num_feats;
 		}
 <FEATS>.	{
-		std::cerr << "Line " << lex_line << " unexpected input in features: " << yytext << std::endl;
+		std::cerr << "Grammar " << scfglex_fname << " line " << lex_line << " unexpected input in features: " << yytext << std::endl;
 		abort();
 		}
 <ALIGNS>[0-9]+-[0-9]+	{
@@ -291,14 +292,14 @@ NT [^\t \[\],]+
 		}
 <ALIGNS>[ \t]	;
 <ALIGNS>.	{
-		std::cerr << "Line " << lex_line << ": unexpected input in alignment: " << yytext << std::endl;
+		std::cerr << "Grammar " << scfglex_fname << " line " << lex_line << ": unexpected input in alignment: " << yytext << std::endl;
 		abort();
 		}
 %%
 
 #include "filelib.h"
 
-void RuleLexer::ReadRules(std::istream* in, RuleLexer::RuleCallback func, void* extra) {
+void RuleLexer::ReadRules(std::istream* in, RuleLexer::RuleCallback func, const std::string& fname, void* extra) {
   if (scfglex_phrase_fnames.empty()) {
     scfglex_phrase_fnames.resize(100);
     for (int i = 0; i < scfglex_phrase_fnames.size(); ++i) {
@@ -308,6 +309,7 @@ void RuleLexer::ReadRules(std::istream* in, RuleLexer::RuleCallback func, void*
     }
   }
   lex_line = 1;
+  scfglex_fname = fname;
   scfglex_stream = in;
   rule_callback_extra = extra,
   rule_callback = func;
diff --git a/decoder/trule.cc b/decoder/trule.cc
index 896f9f3d..c22baae3 100644
--- a/decoder/trule.cc
+++ b/decoder/trule.cc
@@ -117,7 +117,7 @@ bool TRule::ReadFromString(const string& line, bool strict, bool mono) {
     // use lexer
     istringstream il(line);
     n_assigned=0;
-    RuleLexer::ReadRules(&il,assign_trule,this);
+    RuleLexer::ReadRules(&il,assign_trule,"STRING",this);
     if (n_assigned>1)
       cerr<<"\nWARNING: more than one rule parsed from multi-line string; kept last: "<<line<<".\n";
     return n_assigned;
author	Chris Dyer <cdyer@Chriss-MacBook-Air.local>	2013-12-19 00:13:17 -0500
committer	Chris Dyer <cdyer@Chriss-MacBook-Air.local>	2013-12-19 00:13:17 -0500
commit	53f704b61e2ad0c048f1fe460e116207fa9e6062 (patch)
tree	74b655e439b025f43ca41b17ee17126749bc5242
parent	fb59fa66f4555dc957003f2ed8bad4882db09cc0 (diff)
parent	ac224d3f0c63760ad33fa04719d26d3a3e42495c (diff)