diff options
author | Chris Dyer <cdyer@cs.cmu.edu> | 2012-08-12 23:33:21 -0400 |
---|---|---|
committer | Chris Dyer <cdyer@cs.cmu.edu> | 2012-08-12 23:33:21 -0400 |
commit | b6e70b420ed993ee73f71058d04b382147896068 (patch) | |
tree | f46854c2cf037df209ca5dc9064a67c35db25acd /extools/sg_lexer.l | |
parent | 8294cf0992815ffd791a5ef9e74f32fea6efeb9a (diff) |
use new union api
Diffstat (limited to 'extools/sg_lexer.l')
-rw-r--r-- | extools/sg_lexer.l | 294 |
1 files changed, 0 insertions, 294 deletions
diff --git a/extools/sg_lexer.l b/extools/sg_lexer.l deleted file mode 100644 index c85cdea7..00000000 --- a/extools/sg_lexer.l +++ /dev/null @@ -1,294 +0,0 @@ -%{ -#include <string> -#include <iostream> -#include <sstream> -#include <cstring> -#include <cassert> -#include "tdict.h" -#include "fdict.h" -#include "striped_grammar.h" - -int lex_line = 0; -int read_contexts = 0; -std::istream* sglex_stream = NULL; -StripedGrammarLexer::GrammarCallback grammar_callback = NULL; -StripedGrammarLexer::ContextCallback context_callback = NULL; -void* grammar_callback_extra = NULL; -void* context_callback_extra = NULL; - -#undef YY_INPUT -#define YY_INPUT(buf, result, max_size) (result = sglex_stream->read(buf, max_size).gcount()) - -#define YY_SKIP_YYWRAP 1 -int num_rules = 0; -int yywrap() { return 1; } -bool fl = true; -#define MAX_TOKEN_SIZE 255 -std::string sglex_tmp_token(MAX_TOKEN_SIZE, '\0'); - -#define MAX_RULE_SIZE 48 -WordID sglex_src_rhs[MAX_RULE_SIZE]; -WordID sglex_trg_rhs[MAX_RULE_SIZE]; -int sglex_src_rhs_size; -int sglex_trg_rhs_size; -WordID sglex_lhs; -int sglex_src_arity; -int sglex_trg_arity; - -#define MAX_FEATS 100 -int sglex_feat_ids[MAX_FEATS]; -double sglex_feat_vals[MAX_FEATS]; -int sglex_num_feats; - -#define MAX_ARITY 20 -int sglex_nt_sanity[MAX_ARITY]; -int sglex_src_nts[MAX_ARITY]; -float sglex_nt_size_means[MAX_ARITY]; -float sglex_nt_size_vars[MAX_ARITY]; - -std::vector<WordID> cur_src_rhs; -std::vector<WordID> cur_trg_rhs; -ID2RuleStatistics cur_options; -RuleStatistics* cur_stats = NULL; -int sglex_cur_fid = 0; - -static void sanity_check_trg_index(int index) { - if (index > sglex_src_arity) { - std::cerr << "Target index " << index << " exceeds source arity " << sglex_src_arity << std::endl; - abort(); - } - int& flag = sglex_nt_sanity[index - 1]; - if (flag) { - std::cerr << "Target index " << index << " used multiple times!" << std::endl; - abort(); - } - flag = 1; -} - -static void sglex_reset() { - sglex_src_arity = 0; - sglex_trg_arity = 0; - sglex_num_feats = 0; - sglex_src_rhs_size = 0; - sglex_trg_rhs_size = 0; -} - -%} - -REAL [\-+]?[0-9]+(\.[0-9]*([eE][-+]*[0-9]+)?)?|inf|[\-+]inf -NT [^\t \[\],]+ -ALIGN [0-9]+-[0-9]+ - -%x LHS_END SRC TRG FEATS FEATVAL ALIGNS -%% - -<INITIAL>[ ] ; -<INITIAL>[\t] { - if (read_contexts) { - cur_options.clear(); - BEGIN(TRG); - } else { - std::cerr << "Unexpected tab while reading striped grammar\n"; - exit(1); - } - } - -<INITIAL>\[{NT}\] { - if (read_contexts) { - sglex_tmp_token.assign(yytext, yyleng); - sglex_src_rhs[sglex_src_rhs_size] = TD::Convert(sglex_tmp_token); - ++sglex_src_rhs_size; - } else { - sglex_tmp_token.assign(yytext + 1, yyleng - 2); - sglex_lhs = -TD::Convert(sglex_tmp_token); - // std::cerr << sglex_tmp_token << "\n"; - BEGIN(LHS_END); - } - } - -<INITIAL>[^ \t]+ { - if (read_contexts) { - // std::cerr << "Context: " << yytext << std::endl; - sglex_tmp_token.assign(yytext, yyleng); - sglex_src_rhs[sglex_src_rhs_size] = TD::Convert(sglex_tmp_token); - ++sglex_src_rhs_size; - } else { - std::cerr << "Unexpected input: " << yytext << " when NT expected\n"; - exit(1); - } - } - -<SRC>\[{NT}\] { - sglex_tmp_token.assign(yytext + 1, yyleng - 2); - sglex_src_nts[sglex_src_arity] = sglex_src_rhs[sglex_src_rhs_size] = -TD::Convert(sglex_tmp_token); - ++sglex_src_arity; - ++sglex_src_rhs_size; - } - -<LHS_END>[ ] { ; } -<LHS_END>\|\|\| { - sglex_reset(); - BEGIN(SRC); - } - -<LHS_END>. { - std::cerr << "Line " << lex_line << ": unexpected input in LHS: " << yytext << std::endl; - exit(1); - } - - -<SRC>\[{NT},[1-9][0-9]?\] { - int index = yytext[yyleng - 2] - '0'; - if (yytext[yyleng - 3] == ',') { - sglex_tmp_token.assign(yytext + 1, yyleng - 4); - } else { - sglex_tmp_token.assign(yytext + 1, yyleng - 5); - index += 10 * (yytext[yyleng - 3] - '0'); - } - if ((sglex_src_arity+1) != index) { - std::cerr << "Src indices must go in order: expected " << sglex_src_arity << " but got " << index << std::endl; - abort(); - } - sglex_src_nts[sglex_src_arity] = sglex_src_rhs[sglex_src_rhs_size] = -TD::Convert(sglex_tmp_token); - ++sglex_src_rhs_size; - ++sglex_src_arity; - } - -<SRC>[^ \t]+ { - sglex_tmp_token.assign(yytext, yyleng); - sglex_src_rhs[sglex_src_rhs_size] = TD::Convert(sglex_tmp_token); - ++sglex_src_rhs_size; - } -<SRC>[ ] { ; } -<SRC>\t { - //std::cerr << "LHS=" << TD::Convert(-sglex_lhs) << " "; - //std::cerr << " src_size: " << sglex_src_rhs_size << std::endl; - //std::cerr << " src_arity: " << sglex_src_arity << std::endl; - cur_options.clear(); - memset(sglex_nt_sanity, 0, sglex_src_arity * sizeof(int)); - sglex_trg_rhs_size = 0; - BEGIN(TRG); - } - -<TRG>\[[1-9][0-9]?\] { - if (read_contexts) { - sglex_tmp_token.assign(yytext, yyleng); - sglex_trg_rhs[sglex_trg_rhs_size] = TD::Convert(sglex_tmp_token); - ++sglex_trg_rhs_size; - } else { - int index = yytext[yyleng - 2] - '0'; - if (yyleng == 4) { - index += 10 * (yytext[yyleng - 3] - '0'); - } - ++sglex_trg_arity; - sanity_check_trg_index(index); - sglex_trg_rhs[sglex_trg_rhs_size] = 1 - index; - ++sglex_trg_rhs_size; - } -} - -<TRG>\|\|\| { - //std::cerr << " trg_size: " << sglex_trg_rhs_size << std::endl; - //std::cerr << " trg_arity: " << sglex_trg_arity << std::endl; - assert(sglex_trg_rhs_size > 0); - cur_trg_rhs.resize(sglex_trg_rhs_size); - for (int i = 0; i < sglex_trg_rhs_size; ++i) - cur_trg_rhs[i] = sglex_trg_rhs[i]; - cur_stats = &cur_options[cur_trg_rhs]; - BEGIN(FEATS); - } - -<TRG>[^ ]+ { - sglex_tmp_token.assign(yytext, yyleng); - sglex_trg_rhs[sglex_trg_rhs_size] = TD::Convert(sglex_tmp_token); - - ++sglex_trg_rhs_size; - } -<TRG>[ ]+ { ; } - -<FEATS>\n { - assert(sglex_src_rhs_size > 0); - cur_src_rhs.resize(sglex_src_rhs_size); - for (int i = 0; i < sglex_src_rhs_size; ++i) - cur_src_rhs[i] = sglex_src_rhs[i]; - if (read_contexts) { - context_callback(cur_src_rhs, cur_options, context_callback_extra); - } else { - assert(sglex_lhs < 0); - grammar_callback(sglex_lhs, cur_src_rhs, cur_options, grammar_callback_extra); - } - cur_options.clear(); - sglex_reset(); - BEGIN(INITIAL); - } -<FEATS>[ ]+ { ; } -<FEATS>\|\|\| { - memset(sglex_nt_sanity, 0, sglex_src_arity * sizeof(int)); - sglex_trg_rhs_size = 0; - BEGIN(TRG); - } -<FEATS>[A-Z][A-Z_0-9]*= { - // std::cerr << "FV: " << yytext << std::endl; - sglex_tmp_token.assign(yytext, yyleng - 1); - sglex_cur_fid = FD::Convert(sglex_tmp_token); - static const int Afid = FD::Convert("A"); - if (sglex_cur_fid == Afid) { - BEGIN(ALIGNS); - } else { - BEGIN(FEATVAL); - } - } -<FEATVAL>{REAL} { - // std::cerr << "Feature val input: " << yytext << std::endl; - cur_stats->counts.add_value(sglex_cur_fid, strtod(yytext, NULL)); - BEGIN(FEATS); - } -<FEATVAL>. { - std::cerr << "Feature val unexpected input: " << yytext << std::endl; - exit(1); - } -<FEATS>. { - std::cerr << "Features unexpected input: " << yytext << std::endl; - exit(1); - } -<ALIGNS>{ALIGN}(,{ALIGN})* { - assert(cur_stats->aligns.empty()); - int i = 0; - while(i < yyleng) { - short a = 0; - short b = 0; - while (yytext[i] != '-') { a *= 10; a += yytext[i] - '0'; ++i; } - ++i; - while (yytext[i] != ',' && i < yyleng) { b *= 10; b += yytext[i] - '0'; ++i; } - ++i; - cur_stats->aligns.push_back(std::make_pair(a,b)); - } - BEGIN(FEATS); - } -<ALIGNS>. { - std::cerr << "Aligns unexpected input: " << yytext << std::endl; - exit(1); - } -%% - -#include "filelib.h" - -void StripedGrammarLexer::ReadStripedGrammar(std::istream* in, GrammarCallback func, void* extra) { - read_contexts = 0; - lex_line = 1; - sglex_stream = in; - grammar_callback_extra = extra; - grammar_callback = func; - yylex(); -} - -void StripedGrammarLexer::ReadContexts(std::istream* in, ContextCallback func, void* extra) { - read_contexts = 1; - lex_line = 1; - sglex_stream = in; - context_callback_extra = extra; - context_callback = func; - yylex(); -} - - |