summaryrefslogtreecommitdiff
path: root/creg
diff options
context:
space:
mode:
Diffstat (limited to 'creg')
-rw-r--r--creg/Jamfile6
-rw-r--r--creg/creg.cc2
-rw-r--r--creg/json_feature_map_lexer.ll9
3 files changed, 14 insertions, 3 deletions
diff --git a/creg/Jamfile b/creg/Jamfile
new file mode 100644
index 00000000..cfed2388
--- /dev/null
+++ b/creg/Jamfile
@@ -0,0 +1,6 @@
+import lex ;
+
+exe creg : creg.cc json_feature_map_lexer.ll ..//utils ../training//liblbfgs ..//boost_program_options : <include>../training <include>. : <library>..//z ;
+
+alias programs : creg ;
+
diff --git a/creg/creg.cc b/creg/creg.cc
index 005ec9ac..b145ac49 100644
--- a/creg/creg.cc
+++ b/creg/creg.cc
@@ -65,7 +65,7 @@ void ReaderCB(const string& id, const SparseVector<float>& fmap, void* extra) {
if (rh.lc % 40000 == 0) { cerr << " [" << rh.lc << "]\n"; rh.flag = false; }
const unordered_map<string, unsigned>::iterator it = rh.id2ind.find(id);
if (it == rh.id2ind.end()) {
- cerr << "Unlabeled example in line " << rh.lc << endl;
+ cerr << "Unlabeled example in line " << rh.lc << " (key=" << id << ')' << endl;
abort();
}
(*rh.xy_pairs)[it->second - 1].x = fmap;
diff --git a/creg/json_feature_map_lexer.ll b/creg/json_feature_map_lexer.ll
index cbb6d9a9..f9ce7977 100644
--- a/creg/json_feature_map_lexer.ll
+++ b/creg/json_feature_map_lexer.ll
@@ -77,6 +77,11 @@ UNESCAPED_CH [^\"\\\b\n\r\f\t]
<JSON>{WS}*{LCB}{WS}* { BEGIN(PREVAL); }
+<JSON>{WS}*{LCB}{WS}*{RCB}\n* {const SparseVector<float> x;
+ json_fmap_callback(instid, x, json_fmap_callback_extra);
+ curfeat = 0;
+ BEGIN(INITIAL);}
+
<PREVAL>\" { BEGIN(STRING); spos=0; }
<STRING>\" { featname[spos] = 0;
@@ -92,7 +97,8 @@ UNESCAPED_CH [^\"\\\b\n\r\f\t]
<STRING>\\n { }
<STRING>\\r { }
<STRING>\\t { }
-<STRING>\\u{HEX_D}{HEX_D}{HEX_D}{HEX_D} { abort();
+<STRING>\\u{HEX_D}{HEX_D}{HEX_D}{HEX_D} { uint16_t hex = strtol(&yytext[2], NULL, 16);
+ spos += unicode_escape_to_utf8(hex, 0, &featname[spos++])-1;
}
<JSONVAL>{WS}*:{WS}* { BEGIN(DOUBLE); }
@@ -129,4 +135,3 @@ int main() {
JSONFeatureMapLexer::ReadRules(&std::cin, cb, NULL);
}
#endif
-