diff options
author | Patrick Simianer <simianer@cl.uni-heidelberg.de> | 2012-05-18 13:16:03 +0200 |
---|---|---|
committer | Patrick Simianer <simianer@cl.uni-heidelberg.de> | 2012-05-18 13:16:03 +0200 |
commit | 47e9deff1c2cbd104d6e2822703cc83df625d8aa (patch) | |
tree | 72d2db8fc921a48e1b6306255ebbe5e7408e034b /creg | |
parent | 37050e861de2b216d3a28f79e111b674c5d142ac (diff) | |
parent | 05b86961fa7e04d47cf729ea27e27c527f895da0 (diff) |
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'creg')
-rw-r--r-- | creg/Jamfile | 6 | ||||
-rw-r--r-- | creg/creg.cc | 2 | ||||
-rw-r--r-- | creg/json_feature_map_lexer.ll | 9 |
3 files changed, 14 insertions, 3 deletions
diff --git a/creg/Jamfile b/creg/Jamfile new file mode 100644 index 00000000..cfed2388 --- /dev/null +++ b/creg/Jamfile @@ -0,0 +1,6 @@ +import lex ; + +exe creg : creg.cc json_feature_map_lexer.ll ..//utils ../training//liblbfgs ..//boost_program_options : <include>../training <include>. : <library>..//z ; + +alias programs : creg ; + diff --git a/creg/creg.cc b/creg/creg.cc index 005ec9ac..b145ac49 100644 --- a/creg/creg.cc +++ b/creg/creg.cc @@ -65,7 +65,7 @@ void ReaderCB(const string& id, const SparseVector<float>& fmap, void* extra) { if (rh.lc % 40000 == 0) { cerr << " [" << rh.lc << "]\n"; rh.flag = false; } const unordered_map<string, unsigned>::iterator it = rh.id2ind.find(id); if (it == rh.id2ind.end()) { - cerr << "Unlabeled example in line " << rh.lc << endl; + cerr << "Unlabeled example in line " << rh.lc << " (key=" << id << ')' << endl; abort(); } (*rh.xy_pairs)[it->second - 1].x = fmap; diff --git a/creg/json_feature_map_lexer.ll b/creg/json_feature_map_lexer.ll index cbb6d9a9..f9ce7977 100644 --- a/creg/json_feature_map_lexer.ll +++ b/creg/json_feature_map_lexer.ll @@ -77,6 +77,11 @@ UNESCAPED_CH [^\"\\\b\n\r\f\t] <JSON>{WS}*{LCB}{WS}* { BEGIN(PREVAL); } +<JSON>{WS}*{LCB}{WS}*{RCB}\n* {const SparseVector<float> x; + json_fmap_callback(instid, x, json_fmap_callback_extra); + curfeat = 0; + BEGIN(INITIAL);} + <PREVAL>\" { BEGIN(STRING); spos=0; } <STRING>\" { featname[spos] = 0; @@ -92,7 +97,8 @@ UNESCAPED_CH [^\"\\\b\n\r\f\t] <STRING>\\n { } <STRING>\\r { } <STRING>\\t { } -<STRING>\\u{HEX_D}{HEX_D}{HEX_D}{HEX_D} { abort(); +<STRING>\\u{HEX_D}{HEX_D}{HEX_D}{HEX_D} { uint16_t hex = strtol(&yytext[2], NULL, 16); + spos += unicode_escape_to_utf8(hex, 0, &featname[spos++])-1; } <JSONVAL>{WS}*:{WS}* { BEGIN(DOUBLE); } @@ -129,4 +135,3 @@ int main() { JSONFeatureMapLexer::ReadRules(&std::cin, cb, NULL); } #endif - |