From bebc7faef73b5943279e692706950c1d6cda7f42 Mon Sep 17 00:00:00 2001 From: Victor Chahuneau Date: Thu, 17 May 2012 10:45:55 -0400 Subject: Fix JSON parser for unicode, empty feature vectors --- creg/json_feature_map_lexer.ll | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'creg') diff --git a/creg/json_feature_map_lexer.ll b/creg/json_feature_map_lexer.ll index cbb6d9a9..f9ce7977 100644 --- a/creg/json_feature_map_lexer.ll +++ b/creg/json_feature_map_lexer.ll @@ -77,6 +77,11 @@ UNESCAPED_CH [^\"\\\b\n\r\f\t] {WS}*{LCB}{WS}* { BEGIN(PREVAL); } +{WS}*{LCB}{WS}*{RCB}\n* {const SparseVector x; + json_fmap_callback(instid, x, json_fmap_callback_extra); + curfeat = 0; + BEGIN(INITIAL);} + \" { BEGIN(STRING); spos=0; } \" { featname[spos] = 0; @@ -92,7 +97,8 @@ UNESCAPED_CH [^\"\\\b\n\r\f\t] \\n { } \\r { } \\t { } -\\u{HEX_D}{HEX_D}{HEX_D}{HEX_D} { abort(); +\\u{HEX_D}{HEX_D}{HEX_D}{HEX_D} { uint16_t hex = strtol(&yytext[2], NULL, 16); + spos += unicode_escape_to_utf8(hex, 0, &featname[spos++])-1; } {WS}*:{WS}* { BEGIN(DOUBLE); } @@ -129,4 +135,3 @@ int main() { JSONFeatureMapLexer::ReadRules(&std::cin, cb, NULL); } #endif - -- cgit v1.2.3