%option nounput
%{

#include "json_feature_map_lexer.h"
#include "fdict.h"
#include "fast_sparse_vector.h"

#define YY_DECL int json_fmap_yylex (void)
#undef YY_INPUT
#define YY_INPUT(buf, result, max_size) (result = jfmap_stream->read(buf, max_size).gcount())
#define YY_SKIP_YYWRAP 1
int yywrap() { return 1; }

JSONFeatureMapLexer::FeatureMapCallback json_fmap_callback = NULL;
void* json_fmap_callback_extra = NULL;
std::istream* jfmap_stream = NULL;
bool fl = true;
unsigned spos = 0;
char featname[16000];
#define MAX_FEATS 20000
std::pair<int, float> featmap[MAX_FEATS];
unsigned curfeat = 0;
std::string instid;

inline unsigned unicode_escape_to_utf8(uint16_t w1, uint16_t w2, char* putf8) {
  uint32_t cp;
  if((w1 & 0xfc00) == 0xd800) {
    if((w2 & 0xfc00) == 0xdc00) {
      cp = 0x10000 + (((static_cast<uint32_t>(w1) & 0x3ff) << 10) | (w2 & 0x3ff));
    } else {
      abort();
    }
  } else {
    cp = w1;
  }
  
  
  if(cp < 0x80) {
    putf8[0] = static_cast<char>(cp);
    return 1;
  } else if(cp < 0x0800) {
    putf8[0] = 0xc0 | ((cp >> 6) & 0x1f);
    putf8[1] = 0x80 | (cp & 0x3f);
    return 2;
  } else if(cp < 0x10000) {
    putf8[0] = 0xe0 | ((cp >> 6) & 0x0f);
    putf8[1] = 0x80 | ((cp >> 6) & 0x3f);
    putf8[2] = 0x80 | (cp & 0x3f);
    return 3;
  } else if(cp < 0x1fffff) {
    putf8[0] = 0xf0 | ((cp >> 18) & 0x07);
    putf8[1] = 0x80 | ((cp >> 12) & 0x3f);
    putf8[2] = 0x80 | ((cp >> 6) & 0x3f);
    putf8[3] = 0x80 | (cp & 0x3f);
    return 4;
  } else {
    abort();
  } 
  return 0;
}

%}

ID [^ \t\n\r]+
HEX_D [a-fA-F0-9]
INT [-]?[0-9]+
DOUBLE {INT}((\.[0-9]+)?([eE][-+]?[0-9]+)?)
WS [ \t\r\n]
LCB [{]
RCB [}]
UNESCAPED_CH [^\"\\\b\n\r\f\t]

%x JSON PREVAL STRING JSONVAL POSTVAL DOUBLE
%%

<INITIAL>{ID}                            { instid = yytext; BEGIN(JSON); }

<JSON>{WS}*{LCB}{WS}*                    { BEGIN(PREVAL); }

<JSON>{WS}*{LCB}{WS}*{RCB}\n*            {const SparseVector<float> x;
                                         json_fmap_callback(instid, x, json_fmap_callback_extra);
                                         curfeat = 0;
                                         BEGIN(INITIAL);}

<PREVAL>\"                               { BEGIN(STRING); spos=0; }

<STRING>\"                               { featname[spos] = 0;
                                           featmap[curfeat].first = FD::Convert(featname);
                                           BEGIN(JSONVAL);
                                         }
<STRING>{UNESCAPED_CH}                   { featname[spos++] = yytext[0]; }
<STRING>\\\"                             { featname[spos++] = '"'; }
<STRING>\\\\                             { featname[spos++] = '\\'; }
<STRING>\\\/                             { featname[spos++] = '/'; }
<STRING>\\b                              { }
<STRING>\\f                              { }
<STRING>\\n                              { }
<STRING>\\r                              { }
<STRING>\\t                              { }
<STRING>\\u{HEX_D}{HEX_D}{HEX_D}{HEX_D}  { uint16_t hex = strtol(&yytext[2], NULL, 16);
                                           spos += unicode_escape_to_utf8(hex, 0, &featname[spos++])-1;
                                         }

<JSONVAL>{WS}*:{WS}*                     { BEGIN(DOUBLE); }
<DOUBLE>{DOUBLE}                         { featmap[curfeat++].second = strtod(yytext, 0);
                                           BEGIN(POSTVAL); }

<POSTVAL>{WS}*,{WS}*                     { BEGIN(PREVAL); }
<POSTVAL>{WS}*{RCB}\n*                   {
                                           const SparseVector<float> x(&featmap[0], &featmap[curfeat]);
                                           json_fmap_callback(instid, x, json_fmap_callback_extra);
                                           curfeat = 0;
                                           BEGIN(INITIAL);
                                         }

<PREVAL,POSTVAL,DOUBLE,JSONVAL,INITIAL>. { std::cerr << "bad input: " << yytext << std::endl; abort(); }

%%

void JSONFeatureMapLexer::ReadRules(std::istream* in, FeatureMapCallback func, void* extra) {
  json_fmap_callback = func;
  json_fmap_callback_extra = extra;
  jfmap_stream = in;
  json_fmap_yylex();
}

#if 0
void cb(const std::string& id, const SparseVector<float>& fmap, void* extra) {
  (void) extra;
  static int cc = 0;
  cc++;
}

int main() {
  JSONFeatureMapLexer::ReadRules(&std::cin, cb, NULL);
}
#endif