diff options
author | Victor Chahuneau <vchahune@cs.cmu.edu> | 2012-05-17 10:45:55 -0400 |
---|---|---|
committer | Victor Chahuneau <vchahune@cs.cmu.edu> | 2012-05-17 10:45:55 -0400 |
commit | 141f566baf82129fd339fa28e1e98a17c6e37dcc (patch) | |
tree | 87f45170c70446259759ba571466e7950076068a /creg | |
parent | 0f1e696690b18259acd79c3469d00a72aca00161 (diff) |
Fix JSON parser for unicode, empty feature vectors
Diffstat (limited to 'creg')
-rw-r--r-- | creg/json_feature_map_lexer.ll | 9 |
1 files changed, 7 insertions, 2 deletions
diff --git a/creg/json_feature_map_lexer.ll b/creg/json_feature_map_lexer.ll index cbb6d9a9..f9ce7977 100644 --- a/creg/json_feature_map_lexer.ll +++ b/creg/json_feature_map_lexer.ll @@ -77,6 +77,11 @@ UNESCAPED_CH [^\"\\\b\n\r\f\t] <JSON>{WS}*{LCB}{WS}* { BEGIN(PREVAL); } +<JSON>{WS}*{LCB}{WS}*{RCB}\n* {const SparseVector<float> x; + json_fmap_callback(instid, x, json_fmap_callback_extra); + curfeat = 0; + BEGIN(INITIAL);} + <PREVAL>\" { BEGIN(STRING); spos=0; } <STRING>\" { featname[spos] = 0; @@ -92,7 +97,8 @@ UNESCAPED_CH [^\"\\\b\n\r\f\t] <STRING>\\n { } <STRING>\\r { } <STRING>\\t { } -<STRING>\\u{HEX_D}{HEX_D}{HEX_D}{HEX_D} { abort(); +<STRING>\\u{HEX_D}{HEX_D}{HEX_D}{HEX_D} { uint16_t hex = strtol(&yytext[2], NULL, 16); + spos += unicode_escape_to_utf8(hex, 0, &featname[spos++])-1; } <JSONVAL>{WS}*:{WS}* { BEGIN(DOUBLE); } @@ -129,4 +135,3 @@ int main() { JSONFeatureMapLexer::ReadRules(&std::cin, cb, NULL); } #endif - |