summaryrefslogtreecommitdiff
path: root/creg
diff options
context:
space:
mode:
authorVictor Chahuneau <vchahune@cs.cmu.edu>2012-05-17 10:45:55 -0400
committerVictor Chahuneau <vchahune@cs.cmu.edu>2012-05-17 10:45:55 -0400
commit141f566baf82129fd339fa28e1e98a17c6e37dcc (patch)
tree87f45170c70446259759ba571466e7950076068a /creg
parent0f1e696690b18259acd79c3469d00a72aca00161 (diff)
Fix JSON parser for unicode, empty feature vectors
Diffstat (limited to 'creg')
-rw-r--r--creg/json_feature_map_lexer.ll9
1 files changed, 7 insertions, 2 deletions
diff --git a/creg/json_feature_map_lexer.ll b/creg/json_feature_map_lexer.ll
index cbb6d9a9..f9ce7977 100644
--- a/creg/json_feature_map_lexer.ll
+++ b/creg/json_feature_map_lexer.ll
@@ -77,6 +77,11 @@ UNESCAPED_CH [^\"\\\b\n\r\f\t]
<JSON>{WS}*{LCB}{WS}* { BEGIN(PREVAL); }
+<JSON>{WS}*{LCB}{WS}*{RCB}\n* {const SparseVector<float> x;
+ json_fmap_callback(instid, x, json_fmap_callback_extra);
+ curfeat = 0;
+ BEGIN(INITIAL);}
+
<PREVAL>\" { BEGIN(STRING); spos=0; }
<STRING>\" { featname[spos] = 0;
@@ -92,7 +97,8 @@ UNESCAPED_CH [^\"\\\b\n\r\f\t]
<STRING>\\n { }
<STRING>\\r { }
<STRING>\\t { }
-<STRING>\\u{HEX_D}{HEX_D}{HEX_D}{HEX_D} { abort();
+<STRING>\\u{HEX_D}{HEX_D}{HEX_D}{HEX_D} { uint16_t hex = strtol(&yytext[2], NULL, 16);
+ spos += unicode_escape_to_utf8(hex, 0, &featname[spos++])-1;
}
<JSONVAL>{WS}*:{WS}* { BEGIN(DOUBLE); }
@@ -129,4 +135,3 @@ int main() {
JSONFeatureMapLexer::ReadRules(&std::cin, cb, NULL);
}
#endif
-