From bebc7faef73b5943279e692706950c1d6cda7f42 Mon Sep 17 00:00:00 2001 From: Victor Chahuneau Date: Thu, 17 May 2012 10:45:55 -0400 Subject: Fix JSON parser for unicode, empty feature vectors --- creg/json_feature_map_lexer.ll | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'creg') diff --git a/creg/json_feature_map_lexer.ll b/creg/json_feature_map_lexer.ll index cbb6d9a9..f9ce7977 100644 --- a/creg/json_feature_map_lexer.ll +++ b/creg/json_feature_map_lexer.ll @@ -77,6 +77,11 @@ UNESCAPED_CH [^\"\\\b\n\r\f\t] {WS}*{LCB}{WS}* { BEGIN(PREVAL); } +{WS}*{LCB}{WS}*{RCB}\n* {const SparseVector x; + json_fmap_callback(instid, x, json_fmap_callback_extra); + curfeat = 0; + BEGIN(INITIAL);} + \" { BEGIN(STRING); spos=0; } \" { featname[spos] = 0; @@ -92,7 +97,8 @@ UNESCAPED_CH [^\"\\\b\n\r\f\t] \\n { } \\r { } \\t { } -\\u{HEX_D}{HEX_D}{HEX_D}{HEX_D} { abort(); +\\u{HEX_D}{HEX_D}{HEX_D}{HEX_D} { uint16_t hex = strtol(&yytext[2], NULL, 16); + spos += unicode_escape_to_utf8(hex, 0, &featname[spos++])-1; } {WS}*:{WS}* { BEGIN(DOUBLE); } @@ -129,4 +135,3 @@ int main() { JSONFeatureMapLexer::ReadRules(&std::cin, cb, NULL); } #endif - -- cgit v1.2.3 From 0c318c2458c58dddde5b1fabf024f58a82dc7eaf Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Thu, 17 May 2012 23:48:23 -0400 Subject: jamfiles for creg --- Jamroot | 4 ++-- creg/Jamfile | 6 ++++++ creg/creg.cc | 2 +- training/liblbfgs/Jamfile | 5 +++++ 4 files changed, 14 insertions(+), 3 deletions(-) create mode 100644 creg/Jamfile create mode 100644 training/liblbfgs/Jamfile (limited to 'creg') diff --git a/Jamroot b/Jamroot index 6daf7a9e..f42a9bc3 100644 --- a/Jamroot +++ b/Jamroot @@ -29,9 +29,9 @@ if [ test_header boost/serialization/map.hpp ] && [ test_library boost_serializa project : requirements $(requirements) ; project : default-build single on release ; -install-bin-libs utils//programs mteval//programs klm/lm//programs decoder//cdec phrasinator//programs ; +install-bin-libs utils//programs mteval//programs klm/lm//programs training//liblbfgs decoder//cdec creg//creg phrasinator//programs ; -build-projects mteval decoder klm/lm ; +build-projects mteval decoder klm/lm training/liblbfgs creg ; #Compile everything ending with _test.cc into a test and run it. rule all_tests ( targets * : dependencies : properties * ) { diff --git a/creg/Jamfile b/creg/Jamfile new file mode 100644 index 00000000..cfed2388 --- /dev/null +++ b/creg/Jamfile @@ -0,0 +1,6 @@ +import lex ; + +exe creg : creg.cc json_feature_map_lexer.ll ..//utils ../training//liblbfgs ..//boost_program_options : ../training . : ..//z ; + +alias programs : creg ; + diff --git a/creg/creg.cc b/creg/creg.cc index 005ec9ac..b145ac49 100644 --- a/creg/creg.cc +++ b/creg/creg.cc @@ -65,7 +65,7 @@ void ReaderCB(const string& id, const SparseVector& fmap, void* extra) { if (rh.lc % 40000 == 0) { cerr << " [" << rh.lc << "]\n"; rh.flag = false; } const unordered_map::iterator it = rh.id2ind.find(id); if (it == rh.id2ind.end()) { - cerr << "Unlabeled example in line " << rh.lc << endl; + cerr << "Unlabeled example in line " << rh.lc << " (key=" << id << ')' << endl; abort(); } (*rh.xy_pairs)[it->second - 1].x = fmap; diff --git a/training/liblbfgs/Jamfile b/training/liblbfgs/Jamfile new file mode 100644 index 00000000..49c82748 --- /dev/null +++ b/training/liblbfgs/Jamfile @@ -0,0 +1,5 @@ +import testing ; + +lib liblbfgs : lbfgs.c : .. ; + +unit-test ll_test : ll_test.cc liblbfgs : .. ; -- cgit v1.2.3