diff options
author | Chris Dyer <cdyer@cs.cmu.edu> | 2012-05-13 17:09:34 -0700 |
---|---|---|
committer | Chris Dyer <cdyer@cs.cmu.edu> | 2012-05-13 17:09:34 -0700 |
commit | a937d645257f0949e138f3548fd0a2b65ea8aa11 (patch) | |
tree | ad46b2f3c24ad8f3fab3cd8b7971e646552e1a17 /utils | |
parent | 69b0bf8d618338c82fda17878defff77fb35a69f (diff) |
put creg in its own top-level folder
Diffstat (limited to 'utils')
-rw-r--r-- | utils/Makefile.am | 4 | ||||
-rw-r--r-- | utils/json_feature_map_lexer.h | 15 | ||||
-rw-r--r-- | utils/json_feature_map_lexer.ll | 132 |
3 files changed, 0 insertions, 151 deletions
diff --git a/utils/Makefile.am b/utils/Makefile.am index b7da0f06..46650c75 100644 --- a/utils/Makefile.am +++ b/utils/Makefile.am @@ -33,16 +33,12 @@ libutils_a_SOURCES = \ sparse_vector.cc \ timing_stats.cc \ verbose.cc \ - json_feature_map_lexer.cc \ weights.cc if HAVE_CMPH libutils_a_SOURCES += perfect_hash.cc endif -json_feature_map_lexer.cc: json_feature_map_lexer.ll - $(LEX) -s -8 -CF -o$@ $< - phmt_SOURCES = phmt.cc ts_SOURCES = ts.cc m_test_SOURCES = m_test.cc diff --git a/utils/json_feature_map_lexer.h b/utils/json_feature_map_lexer.h deleted file mode 100644 index 3324aa29..00000000 --- a/utils/json_feature_map_lexer.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef _RULE_LEXER_H_ -#define _RULE_LEXER_H_ - -#include <iostream> -#include <string> - -#include "sparse_vector.h" - -struct JSONFeatureMapLexer { - typedef void (*FeatureMapCallback)(const std::string& id, const SparseVector<float>& fmap, void* extra); - static void ReadRules(std::istream* in, FeatureMapCallback func, void* extra); -}; - -#endif - diff --git a/utils/json_feature_map_lexer.ll b/utils/json_feature_map_lexer.ll deleted file mode 100644 index 372b52f5..00000000 --- a/utils/json_feature_map_lexer.ll +++ /dev/null @@ -1,132 +0,0 @@ -%option nounput -%{ - -#include "json_feature_map_lexer.h" -#include "fdict.h" -#include "fast_sparse_vector.h" - -#define YY_DECL int json_fmap_yylex (void) -#undef YY_INPUT -#define YY_INPUT(buf, result, max_size) (result = jfmap_stream->read(buf, max_size).gcount()) -#define YY_SKIP_YYWRAP 1 -int yywrap() { return 1; } - -JSONFeatureMapLexer::FeatureMapCallback json_fmap_callback = NULL; -void* json_fmap_callback_extra = NULL; -std::istream* jfmap_stream = NULL; -bool fl = true; -unsigned spos = 0; -char featname[16000]; -#define MAX_FEATS 20000 -std::pair<int, float> featmap[MAX_FEATS]; -unsigned curfeat = 0; -std::string instid; - -inline unsigned unicode_escape_to_utf8(uint16_t w1, uint16_t w2, char* putf8) { - uint32_t cp; - if((w1 & 0xfc00) == 0xd800) { - if((w2 & 0xfc00) == 0xdc00) { - cp = 0x10000 + (((static_cast<uint32_t>(w1) & 0x3ff) << 10) | (w2 & 0x3ff)); - } else { - abort(); - } - } else { - cp = w1; - } - - - if(cp < 0x80) { - putf8[0] = static_cast<char>(cp); - return 1; - } else if(cp < 0x0800) { - putf8[0] = 0xc0 | ((cp >> 6) & 0x1f); - putf8[1] = 0x80 | (cp & 0x3f); - return 2; - } else if(cp < 0x10000) { - putf8[0] = 0xe0 | ((cp >> 6) & 0x0f); - putf8[1] = 0x80 | ((cp >> 6) & 0x3f); - putf8[2] = 0x80 | (cp & 0x3f); - return 3; - } else if(cp < 0x1fffff) { - putf8[0] = 0xf0 | ((cp >> 18) & 0x07); - putf8[1] = 0x80 | ((cp >> 12) & 0x3f); - putf8[2] = 0x80 | ((cp >> 6) & 0x3f); - putf8[3] = 0x80 | (cp & 0x3f); - return 4; - } else { - abort(); - } - return 0; -} - -%} - -ID [A-Za-z_0-9]+ -HEX_D [a-fA-F0-9] -INT [-]?[0-9]+ -DOUBLE {INT}((\.[0-9]+)?([eE][-+]?[0-9]+)?) -WS [ \t\r\n] -LCB [{] -RCB [}] -UNESCAPED_CH [^\"\\\b\n\r\f\t] - -%x JSON PREVAL STRING JSONVAL POSTVAL DOUBLE -%% - -<INITIAL>{ID} { instid = yytext; BEGIN(JSON); } - -<JSON>{WS}*{LCB}{WS}* { BEGIN(PREVAL); } - -<PREVAL>\" { BEGIN(STRING); spos=0; } - -<STRING>\" { featname[spos] = 0; - featmap[curfeat].first = FD::Convert(featname); - BEGIN(JSONVAL); - } -<STRING>{UNESCAPED_CH} { featname[spos++] = yytext[0]; } -<STRING>\\\" { featname[spos++] = '"'; } -<STRING>\\\\ { featname[spos++] = '\\'; } -<STRING>\\\/ { featname[spos++] = '/'; } -<STRING>\\b { } -<STRING>\\f { } -<STRING>\\n { } -<STRING>\\r { } -<STRING>\\t { } -<STRING>\\u{HEX_D}{HEX_D}{HEX_D}{HEX_D} { abort(); - } - -<JSONVAL>{WS}*:{WS}* { BEGIN(DOUBLE); } -<DOUBLE>{DOUBLE} { featmap[curfeat++].second = strtod(yytext, 0); - BEGIN(POSTVAL); } - -<POSTVAL>{WS}*,{WS}* { BEGIN(PREVAL); } -<POSTVAL>{WS}*{RCB}\n* { - const SparseVector<float> x(&featmap[0], &featmap[curfeat]); - json_fmap_callback(instid, x, json_fmap_callback_extra); - curfeat = 0; - BEGIN(INITIAL); - } - -<PREVAL,POSTVAL,DOUBLE,JSONVAL,INITIAL>. { std::cerr << "bad input: " << yytext << std::endl; abort(); } - -%% - -void JSONFeatureMapLexer::ReadRules(std::istream* in, FeatureMapCallback func, void* extra) { - json_fmap_callback = func; - json_fmap_callback_extra = extra; - jfmap_stream = in; - json_fmap_yylex(); -} - -#if 0 -void cb(const std::string& id, const SparseVector<float>& fmap, void* extra) { - (void) extra; - static int cc = 0; - cc++; -} - -int main() { - JSONFeatureMapLexer::ReadRules(&std::cin, cb, NULL); -} -#endif - |