summaryrefslogtreecommitdiff
path: root/gi/pyp-topics/src/contexts_lexer.l
blob: 7a5d9460b080c7970c0159b167b55f648efdebad (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
%{
#include "contexts_lexer.h"

#include <string>
#include <iostream>
#include <sstream>
#include <cstring>
#include <cassert>
#include <algorithm>

int lex_line = 0;
std::istream* contextslex_stream = NULL;
ContextsLexer::ContextsCallback contexts_callback = NULL;
void* contexts_callback_extra = NULL;

#undef YY_INPUT
#define YY_INPUT(buf, result, max_size) (result = contextslex_stream->read(buf, max_size).gcount())

#define YY_SKIP_YYWRAP 1
int num_phrases = 0;
int yywrap() { return 1; }

#define MAX_TOKEN_SIZE 255
std::string contextslex_tmp_token(MAX_TOKEN_SIZE, '\0');
ContextsLexer::PhraseContextsType current_contexts;

#define MAX_CONTEXT_SIZE 255
//std::string tmp_context[MAX_CONTEXT_SIZE];
ContextsLexer::Context tmp_context;


void contextslex_reset() {
  current_contexts.phrase.clear();
  current_contexts.contexts.clear();
  current_contexts.counts.clear();
  tmp_context.clear();
}

%}

INT [\-+]?[0-9]+|inf|[\-+]inf

%x CONTEXT COUNT COUNT_END
%%

<INITIAL>[^\t]+	{ 
    contextslex_reset();
    current_contexts.phrase.assign(yytext, yyleng);
    BEGIN(CONTEXT);
  }
<INITIAL>\t	{ 
    ; 
  }

<INITIAL,CONTEXT,COUNT>\n	{
    std::cerr << "ERROR: contexts_lexer.l: unexpected newline while trying to read phrase|context|count." << std::endl;
    abort();
  }

<CONTEXT>\|\|\|	{
    current_contexts.contexts.push_back(tmp_context);
    tmp_context.clear();
		BEGIN(COUNT);
	}
<CONTEXT>[^ \t]+	{ 
		contextslex_tmp_token.assign(yytext, yyleng);
    tmp_context.push_back(contextslex_tmp_token);
  }
<CONTEXT>[ \t]+	{ ; }

<COUNT>[ \t]+	{ ; }
<COUNT>C={INT} { 
		current_contexts.counts.push_back(std::make_pair(current_contexts.counts.size(), atoi(yytext+2)));
    BEGIN(COUNT_END);
  }
<COUNT>.	{ 
    std::cerr << "ERROR: contexts_lexer.l: unexpected content while reading count." << std::endl;
    abort();
  }

<COUNT_END>[ \t]+  { ; }
<COUNT_END>\|\|\|	{
		BEGIN(CONTEXT);
  }
<COUNT_END>\n {
    //std::cerr << "READ:" << current_contexts.phrase << " with " << current_contexts.contexts.size() 
    //  << " contexts, and " << current_contexts.counts.size() << " counts." << std::endl;
    std::sort(current_contexts.counts.rbegin(), current_contexts.counts.rend()); 

		contexts_callback(current_contexts, contexts_callback_extra);
    current_contexts.phrase.clear();
    current_contexts.contexts.clear();
    current_contexts.counts.clear();
		BEGIN(INITIAL);
  }
<COUNT_END>.  { 
		contextslex_tmp_token.assign(yytext, yyleng);
    std::cerr << "ERROR: contexts_lexer.l: unexpected content while looking for ||| closing count." << std::endl;
    abort();
  }

%%

#include "../../../decoder/filelib.h" 

void ContextsLexer::ReadContexts(std::istream* in, ContextsLexer::ContextsCallback func, void* extra) {
  lex_line = 1;
  contextslex_stream = in;
  contexts_callback_extra = extra,
  contexts_callback = func;
  yylex();
}