From da176941c1f481f14e93bd7d055cc29cac0ea8c8 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sun, 12 Aug 2012 23:33:21 -0400 Subject: use new union api --- extools/sg_lexer.l | 294 ----------------------------------------------------- 1 file changed, 294 deletions(-) delete mode 100644 extools/sg_lexer.l (limited to 'extools/sg_lexer.l') diff --git a/extools/sg_lexer.l b/extools/sg_lexer.l deleted file mode 100644 index c85cdea7..00000000 --- a/extools/sg_lexer.l +++ /dev/null @@ -1,294 +0,0 @@ -%{ -#include -#include -#include -#include -#include -#include "tdict.h" -#include "fdict.h" -#include "striped_grammar.h" - -int lex_line = 0; -int read_contexts = 0; -std::istream* sglex_stream = NULL; -StripedGrammarLexer::GrammarCallback grammar_callback = NULL; -StripedGrammarLexer::ContextCallback context_callback = NULL; -void* grammar_callback_extra = NULL; -void* context_callback_extra = NULL; - -#undef YY_INPUT -#define YY_INPUT(buf, result, max_size) (result = sglex_stream->read(buf, max_size).gcount()) - -#define YY_SKIP_YYWRAP 1 -int num_rules = 0; -int yywrap() { return 1; } -bool fl = true; -#define MAX_TOKEN_SIZE 255 -std::string sglex_tmp_token(MAX_TOKEN_SIZE, '\0'); - -#define MAX_RULE_SIZE 48 -WordID sglex_src_rhs[MAX_RULE_SIZE]; -WordID sglex_trg_rhs[MAX_RULE_SIZE]; -int sglex_src_rhs_size; -int sglex_trg_rhs_size; -WordID sglex_lhs; -int sglex_src_arity; -int sglex_trg_arity; - -#define MAX_FEATS 100 -int sglex_feat_ids[MAX_FEATS]; -double sglex_feat_vals[MAX_FEATS]; -int sglex_num_feats; - -#define MAX_ARITY 20 -int sglex_nt_sanity[MAX_ARITY]; -int sglex_src_nts[MAX_ARITY]; -float sglex_nt_size_means[MAX_ARITY]; -float sglex_nt_size_vars[MAX_ARITY]; - -std::vector cur_src_rhs; -std::vector cur_trg_rhs; -ID2RuleStatistics cur_options; -RuleStatistics* cur_stats = NULL; -int sglex_cur_fid = 0; - -static void sanity_check_trg_index(int index) { - if (index > sglex_src_arity) { - std::cerr << "Target index " << index << " exceeds source arity " << sglex_src_arity << std::endl; - abort(); - } - int& flag = sglex_nt_sanity[index - 1]; - if (flag) { - std::cerr << "Target index " << index << " used multiple times!" << std::endl; - abort(); - } - flag = 1; -} - -static void sglex_reset() { - sglex_src_arity = 0; - sglex_trg_arity = 0; - sglex_num_feats = 0; - sglex_src_rhs_size = 0; - sglex_trg_rhs_size = 0; -} - -%} - -REAL [\-+]?[0-9]+(\.[0-9]*([eE][-+]*[0-9]+)?)?|inf|[\-+]inf -NT [^\t \[\],]+ -ALIGN [0-9]+-[0-9]+ - -%x LHS_END SRC TRG FEATS FEATVAL ALIGNS -%% - -[ ] ; -[\t] { - if (read_contexts) { - cur_options.clear(); - BEGIN(TRG); - } else { - std::cerr << "Unexpected tab while reading striped grammar\n"; - exit(1); - } - } - -\[{NT}\] { - if (read_contexts) { - sglex_tmp_token.assign(yytext, yyleng); - sglex_src_rhs[sglex_src_rhs_size] = TD::Convert(sglex_tmp_token); - ++sglex_src_rhs_size; - } else { - sglex_tmp_token.assign(yytext + 1, yyleng - 2); - sglex_lhs = -TD::Convert(sglex_tmp_token); - // std::cerr << sglex_tmp_token << "\n"; - BEGIN(LHS_END); - } - } - -[^ \t]+ { - if (read_contexts) { - // std::cerr << "Context: " << yytext << std::endl; - sglex_tmp_token.assign(yytext, yyleng); - sglex_src_rhs[sglex_src_rhs_size] = TD::Convert(sglex_tmp_token); - ++sglex_src_rhs_size; - } else { - std::cerr << "Unexpected input: " << yytext << " when NT expected\n"; - exit(1); - } - } - -\[{NT}\] { - sglex_tmp_token.assign(yytext + 1, yyleng - 2); - sglex_src_nts[sglex_src_arity] = sglex_src_rhs[sglex_src_rhs_size] = -TD::Convert(sglex_tmp_token); - ++sglex_src_arity; - ++sglex_src_rhs_size; - } - -[ ] { ; } -\|\|\| { - sglex_reset(); - BEGIN(SRC); - } - -. { - std::cerr << "Line " << lex_line << ": unexpected input in LHS: " << yytext << std::endl; - exit(1); - } - - -\[{NT},[1-9][0-9]?\] { - int index = yytext[yyleng - 2] - '0'; - if (yytext[yyleng - 3] == ',') { - sglex_tmp_token.assign(yytext + 1, yyleng - 4); - } else { - sglex_tmp_token.assign(yytext + 1, yyleng - 5); - index += 10 * (yytext[yyleng - 3] - '0'); - } - if ((sglex_src_arity+1) != index) { - std::cerr << "Src indices must go in order: expected " << sglex_src_arity << " but got " << index << std::endl; - abort(); - } - sglex_src_nts[sglex_src_arity] = sglex_src_rhs[sglex_src_rhs_size] = -TD::Convert(sglex_tmp_token); - ++sglex_src_rhs_size; - ++sglex_src_arity; - } - -[^ \t]+ { - sglex_tmp_token.assign(yytext, yyleng); - sglex_src_rhs[sglex_src_rhs_size] = TD::Convert(sglex_tmp_token); - ++sglex_src_rhs_size; - } -[ ] { ; } -\t { - //std::cerr << "LHS=" << TD::Convert(-sglex_lhs) << " "; - //std::cerr << " src_size: " << sglex_src_rhs_size << std::endl; - //std::cerr << " src_arity: " << sglex_src_arity << std::endl; - cur_options.clear(); - memset(sglex_nt_sanity, 0, sglex_src_arity * sizeof(int)); - sglex_trg_rhs_size = 0; - BEGIN(TRG); - } - -\[[1-9][0-9]?\] { - if (read_contexts) { - sglex_tmp_token.assign(yytext, yyleng); - sglex_trg_rhs[sglex_trg_rhs_size] = TD::Convert(sglex_tmp_token); - ++sglex_trg_rhs_size; - } else { - int index = yytext[yyleng - 2] - '0'; - if (yyleng == 4) { - index += 10 * (yytext[yyleng - 3] - '0'); - } - ++sglex_trg_arity; - sanity_check_trg_index(index); - sglex_trg_rhs[sglex_trg_rhs_size] = 1 - index; - ++sglex_trg_rhs_size; - } -} - -\|\|\| { - //std::cerr << " trg_size: " << sglex_trg_rhs_size << std::endl; - //std::cerr << " trg_arity: " << sglex_trg_arity << std::endl; - assert(sglex_trg_rhs_size > 0); - cur_trg_rhs.resize(sglex_trg_rhs_size); - for (int i = 0; i < sglex_trg_rhs_size; ++i) - cur_trg_rhs[i] = sglex_trg_rhs[i]; - cur_stats = &cur_options[cur_trg_rhs]; - BEGIN(FEATS); - } - -[^ ]+ { - sglex_tmp_token.assign(yytext, yyleng); - sglex_trg_rhs[sglex_trg_rhs_size] = TD::Convert(sglex_tmp_token); - - ++sglex_trg_rhs_size; - } -[ ]+ { ; } - -\n { - assert(sglex_src_rhs_size > 0); - cur_src_rhs.resize(sglex_src_rhs_size); - for (int i = 0; i < sglex_src_rhs_size; ++i) - cur_src_rhs[i] = sglex_src_rhs[i]; - if (read_contexts) { - context_callback(cur_src_rhs, cur_options, context_callback_extra); - } else { - assert(sglex_lhs < 0); - grammar_callback(sglex_lhs, cur_src_rhs, cur_options, grammar_callback_extra); - } - cur_options.clear(); - sglex_reset(); - BEGIN(INITIAL); - } -[ ]+ { ; } -\|\|\| { - memset(sglex_nt_sanity, 0, sglex_src_arity * sizeof(int)); - sglex_trg_rhs_size = 0; - BEGIN(TRG); - } -[A-Z][A-Z_0-9]*= { - // std::cerr << "FV: " << yytext << std::endl; - sglex_tmp_token.assign(yytext, yyleng - 1); - sglex_cur_fid = FD::Convert(sglex_tmp_token); - static const int Afid = FD::Convert("A"); - if (sglex_cur_fid == Afid) { - BEGIN(ALIGNS); - } else { - BEGIN(FEATVAL); - } - } -{REAL} { - // std::cerr << "Feature val input: " << yytext << std::endl; - cur_stats->counts.add_value(sglex_cur_fid, strtod(yytext, NULL)); - BEGIN(FEATS); - } -. { - std::cerr << "Feature val unexpected input: " << yytext << std::endl; - exit(1); - } -. { - std::cerr << "Features unexpected input: " << yytext << std::endl; - exit(1); - } -{ALIGN}(,{ALIGN})* { - assert(cur_stats->aligns.empty()); - int i = 0; - while(i < yyleng) { - short a = 0; - short b = 0; - while (yytext[i] != '-') { a *= 10; a += yytext[i] - '0'; ++i; } - ++i; - while (yytext[i] != ',' && i < yyleng) { b *= 10; b += yytext[i] - '0'; ++i; } - ++i; - cur_stats->aligns.push_back(std::make_pair(a,b)); - } - BEGIN(FEATS); - } -. { - std::cerr << "Aligns unexpected input: " << yytext << std::endl; - exit(1); - } -%% - -#include "filelib.h" - -void StripedGrammarLexer::ReadStripedGrammar(std::istream* in, GrammarCallback func, void* extra) { - read_contexts = 0; - lex_line = 1; - sglex_stream = in; - grammar_callback_extra = extra; - grammar_callback = func; - yylex(); -} - -void StripedGrammarLexer::ReadContexts(std::istream* in, ContextCallback func, void* extra) { - read_contexts = 1; - lex_line = 1; - sglex_stream = in; - context_callback_extra = extra; - context_callback = func; - yylex(); -} - - -- cgit v1.2.3