diff options
author | Victor Chahuneau <vchahune@cs.cmu.edu> | 2012-05-17 10:45:55 -0400 |
---|---|---|
committer | Victor Chahuneau <vchahune@cs.cmu.edu> | 2012-05-17 10:45:55 -0400 |
commit | bebc7faef73b5943279e692706950c1d6cda7f42 (patch) | |
tree | 067bc348da29ac80c92149586fb864d13c96ccc8 /creg | |
parent | 824c96f038b0447ce83ae92cad112a5be49e3330 (diff) |
Fix JSON parser for unicode, empty feature vectors
Diffstat (limited to 'creg')
-rw-r--r-- | creg/json_feature_map_lexer.ll | 9 |
1 files changed, 7 insertions, 2 deletions
diff --git a/creg/json_feature_map_lexer.ll b/creg/json_feature_map_lexer.ll index cbb6d9a9..f9ce7977 100644 --- a/creg/json_feature_map_lexer.ll +++ b/creg/json_feature_map_lexer.ll @@ -77,6 +77,11 @@ UNESCAPED_CH [^\"\\\b\n\r\f\t] <JSON>{WS}*{LCB}{WS}* { BEGIN(PREVAL); } +<JSON>{WS}*{LCB}{WS}*{RCB}\n* {const SparseVector<float> x; + json_fmap_callback(instid, x, json_fmap_callback_extra); + curfeat = 0; + BEGIN(INITIAL);} + <PREVAL>\" { BEGIN(STRING); spos=0; } <STRING>\" { featname[spos] = 0; @@ -92,7 +97,8 @@ UNESCAPED_CH [^\"\\\b\n\r\f\t] <STRING>\\n { } <STRING>\\r { } <STRING>\\t { } -<STRING>\\u{HEX_D}{HEX_D}{HEX_D}{HEX_D} { abort(); +<STRING>\\u{HEX_D}{HEX_D}{HEX_D}{HEX_D} { uint16_t hex = strtol(&yytext[2], NULL, 16); + spos += unicode_escape_to_utf8(hex, 0, &featname[spos++])-1; } <JSONVAL>{WS}*:{WS}* { BEGIN(DOUBLE); } @@ -129,4 +135,3 @@ int main() { JSONFeatureMapLexer::ReadRules(&std::cin, cb, NULL); } #endif - |