From 625269764ebbe8d0b566e6ef5fc26a6bccd4181d Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Sun, 13 Jul 2014 14:04:45 +0200 Subject: init --- .gitignore | 28 ++ LICENSE | 2 + Makefile | 51 ++ README.md | 236 ++++++++++ benchmark.rb | 9 + cdec_json_parser/JSON_parser.c | 1012 ++++++++++++++++++++++++++++++++++++++++ cdec_json_parser/JSON_parser.h | 152 ++++++ cdec_json_parser/LICENSE | 213 +++++++++ cdec_json_parser/Makefile | 7 + cdec_json_parser/json_parse.cc | 31 ++ cdec_json_parser/json_parse.h | 62 +++ data/Makefile | 6 + data/cdec.ini | 4 + data/make.sh | 8 + data/make_paks.cc | 126 +++++ data/to_ascii.rb | 13 + data/weights.init | 12 + memusg.sh | 13 + run.sh | 43 ++ run_msgpack.sh | 33 ++ test_JsonBox.cc | 24 + test_MicroJSON.cc | 28 ++ test_MicroJSON.sh | 5 + test_cdec_json_parser.cc | 25 + test_gason.cc | 71 +++ test_json-cpp.cc | 100 ++++ test_jsoncpp.cc | 29 ++ test_jsonxx.cc | 35 ++ test_libjson.cc | 44 ++ test_msgpack.cc | 83 ++++ test_msgpack_ruby | 9 + test_nosjob.cc | 32 ++ test_picojson.cc | 32 ++ test_rapidjson.cc | 31 ++ test_sajson.cc | 32 ++ 35 files changed, 2641 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 Makefile create mode 100644 README.md create mode 100755 benchmark.rb create mode 100644 cdec_json_parser/JSON_parser.c create mode 100644 cdec_json_parser/JSON_parser.h create mode 100644 cdec_json_parser/LICENSE create mode 100644 cdec_json_parser/Makefile create mode 100644 cdec_json_parser/json_parse.cc create mode 100644 cdec_json_parser/json_parse.h create mode 100644 data/Makefile create mode 100644 data/cdec.ini create mode 100755 data/make.sh create mode 100644 data/make_paks.cc create mode 100755 data/to_ascii.rb create mode 100644 data/weights.init create mode 100755 memusg.sh create mode 100755 run.sh create mode 100755 run_msgpack.sh create mode 100644 test_JsonBox.cc create mode 100644 test_MicroJSON.cc create mode 100755 test_MicroJSON.sh create mode 100644 test_cdec_json_parser.cc create mode 100644 test_gason.cc create mode 100644 test_json-cpp.cc create mode 100644 test_jsoncpp.cc create mode 100644 test_jsonxx.cc create mode 100644 test_libjson.cc create mode 100644 test_msgpack.cc create mode 100755 test_msgpack_ruby create mode 100644 test_nosjob.cc create mode 100644 test_picojson.cc create mode 100644 test_rapidjson.cc create mode 100644 test_sajson.cc diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e6c5173 --- /dev/null +++ b/.gitignore @@ -0,0 +1,28 @@ +JsonBox/ +MicroJSON*/ +gason/ +json-cpp.hpp +jsoncpp/ +jsonxx/ +libjson/ +nosjob*/ +picojson/ +proto_map/ +rapidjson/ +sajson/ +msgpack-c/ +*.o +test_JsonBox +test_MicroJSON +test_cdec_json_parser +test_gason +test_json-cpp +test_jsoncpp +test_jsonxx +test_libjson +test_nosjob +test_picojson +test_rapidjson +test_sajson +test_msgpack +data/make_paks diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..df23347 --- /dev/null +++ b/LICENSE @@ -0,0 +1,2 @@ +This is public domain. + diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..bf290a4 --- /dev/null +++ b/Makefile @@ -0,0 +1,51 @@ +COMPILER := g++ +CXXFLAGS := -O3 -march=native -mtune=native + + +all: test_gason test_json-cpp test_jsoncpp test_libjson test_picojson test_rapidjson test_sajson test_JsonBox test_jsonxx test_MicroJSON test_nosjob test_cdec_json_parser + +test_gason: test_gason.cc + $(COMPILER) $(CXXFLAGS) -std=c++11 test_gason.cc -o test_gason gason/gason.o + +test_json-cpp: test_json-cpp.cc + $(COMPILER) $(CXXFLAGS) -std=c++11 test_json-cpp.cc -o test_json-cpp + +test_jsoncpp: test_jsoncpp.cc + $(COMPILER) $(CXXFLAGS) test_jsoncpp.cc jsoncpp/lib/libjsoncpp.a -o test_jsoncpp + +test_libjson: test_libjson.cc + $(COMPILER) $(CXXFLAGS) test_libjson.cc libjson/libjson.a -o test_libjson + +test_picojson: test_picojson.cc + $(COMPILER) $(CXXFLAGS) test_picojson.cc -o test_picojson + +test_rapidjson: test_rapidjson.cc + $(COMPILER) $(CXXFLAGS) test_rapidjson.cc -o test_rapidjson + +test_sajson: test_sajson.cc + $(COMPILER) $(CXXFLAGS) test_sajson.cc -o test_sajson + +test_JsonBox: test_JsonBox.cc + $(COMPILER) $(CXXFLAGS) test_JsonBox.cc -I./JsonBox/include/ JsonBox/libJsonBox.a -o test_JsonBox + +test_jsonxx: test_jsonxx.cc + $(COMPILER) $(CXXFLAGS) test_jsonxx.cc jsonxx/jsonxx.o -o test_jsonxx + +test_MicroJSON: test_MicroJSON.cc + #$(COMPILER) $(CXXFLAGS) test_MicroJSON.cc MicroJSON-0.3.2/libMicroJSON-0.so -lUTF8Strings-1 -o test_MicroJSON + +test_nosjob: test_nosjob.cc + $(COMPILER) $(CXXFLAGS) test_nosjob.cc nosjob-e1d67401fcda6e05/libnosjob.a -o test_nosjob + +test_cdec_json_parser: test_cdec_json_parser.cc + $(COMPILER) $(CXXFLAGS) test_cdec_json_parser.cc cdec_json_parser/json_parse.o cdec_json_parser/JSON_parser.o -o test_cdec_json_parser + +test_msgpack: test_msgpack.cc + $(COMPILER) $(CXXFLAGS) test_msgpack.cc -I./msgpack-c/include/ ./msgpack-c/lib/libmsgpack.a -o test_msgpack + +clean: + rm -f test_gason test_json-cpp test_jsoncpp test_libjson + rm -f test_picojson test_rapidjson test_sajson test_JsonBox + rm -f test_jsonxx test_MicroJSON test_nosjob test_cdec_json_parser + rm -f test_msgpack + diff --git a/README.md b/README.md new file mode 100644 index 0000000..fa41339 --- /dev/null +++ b/README.md @@ -0,0 +1,236 @@ +Serializer Benchmark +==================== + +Comparing parsing speed/memory usage of all C++ JSON libs I could find. Also including two msgpack implementations (C++/ruby). +Goal is to output data.edges.last.rule.substr(1, 4). +Data is fairly large and complex object (a hypergraph representation) with a lot of different types, e.g. strings (ASCII), ints, floats, arrays +and sub-objects. +Note that the comparison is unfair for some parsers, as they just do SAX-style parsing and do not actually fill objects +with data (e.g. the cdec parser). + +* cdec-json-parser: ripped out of [1]. +* gason: git clone https://github.com/vivkin/gason.git +* JsonBox: git clone https://github.com/anhero/JsonBox.git +* jsoncpp: git clone https://github.com/open-source-parsers/jsoncpp.git +* json-cpp: wget "https://raw.githubusercontent.com/ascheglov/json-cpp/master/single_include/json-cpp.hpp" +* jsonxx: git clone https://github.com/hjiang/jsonxx.git +* libjson: wget "http://downloads.sourceforge.net/project/libjson/libjson_7.6.1.zip?r=&ts=1405248411&use_mirror=heanet" +* MicroJSON: wget http://grigory.info/distfiles/MicroJSON-0.3.2.tar.bz2 +* msgpack-c: git clone https://github.com/msgpack/msgpack-c.git +* msgpack-ruby: gem install msgpack +* nosjob: go to [2] and figure out how to download a tarball +* picojson: git clone https://github.com/kazuho/picojson.git +* rapidjson: git clone https://github.com/miloyip/rapidjson.git +* sajson: git clone https://github.com/chadaustin/sajson.git + +You'll also need root privileges to clear the disk caches. Assumes these scripts [3] +are in the PATH. + +Versions: +--------- +* cdec-json-parser: SHA-1 d124d4aaa78b52b46f7ac8d7306be342d3405124 +* gason: SHA-1 ede29fc5f0de8e47fd82c09f2f98123d2c867f28 +* JsonBox: SHA-1 fcb82ebae41dffb90d32a49ac236d1608d9a67ee +* jsoncpp: SHA-1 655a9db0cc62394e81d3074a98c7191fbfc00259 +* json-cpp: SHA-1 170121e2dc099895064305e38bfb25d90a807ce3 +* libjson: version 7.6.1 +* MicroJSON: version 0.3.2 +* msgpack-c: SHA-1 197ed8c983a70d5892bf73dcd1a352bf8e2588df +* msgpack-ruby: version 0.5.8 +* nosjob: SHA-1 e1d67401fcda6e05a536272532bdb9770bec27e8 +* picojson: SHA-1 5e71db9bec7f22a041cd251c6d6d67e954396d5d +* rapidjson: SHA-1 63d054349ab56d278060cd3373e76a6933cf194a +* sajson: SHA-1 003988269f1774dfb184e1864f2f4e654965581e + + +[1] https://github.com/redpony/cdec/tree/master/decoder +[2] http://fossil.wanderinghorse.net/repos/nosjob/index.cgi/index +[3] https://github.com/pks/scripts + + +Results +======= + +On my machine (Lenovo X61s) which has an SSD: +Linux x 3.12.23 #1 SMP PREEMPT Fri Jul 4 15:09:43 CEST 2014 x86_64 Intel(R) Core(TM)2 Duo CPU L7500 @ 1.60GHz GenuineIntel GNU/Linux + +Spoiler: sajson and rapidjson are the fastest JSON parsers -- but msgpack is even faster. + +JSON parsing benchmark +---------------------- + REAPEAT=10 + +[test_cdec_json_parser] +data/1020.json: 8.81 s +data/1570.json: 3.07 s +data/1391.json: 1.99 s +data/429.json: 0.6 s +data/2002.json: 0.32 s +data/1889.json: 0.07 s +data/1495.json: 0.01 s +data/748.json: 0.0 s +--- +overall: 1.84 s + memory: 1 m + +[test_gason] +data/1020.json: 4.34 s +data/1570.json: 1.52 s +data/1391.json: 1.05 s +data/429.json: 0.29 s +data/2002.json: 0.16 s +data/1889.json: 0.03 s +data/1495.json: 0.01 s +data/748.json: 0.01 s +--- +overall: 0.91 s + memory: 389 m + +[test_JsonBox] +data/1020.json: 36.15 s +data/1570.json: 11.91 s +data/1391.json: 8.25 s +data/429.json: 2.3 s +data/2002.json: 1.21 s +data/1889.json: 0.24 s +data/1495.json: 0.02 s +data/748.json: 0.0 s +--- +overall: 7.42 s + memory: 901 m + +[test_jsoncpp] +data/1020.json: 9.59 s +data/1570.json: 3.32 s +data/1391.json: 2.19 s +data/429.json: 0.64 s +data/2002.json: 0.34 s +data/1889.json: 0.07 s +data/1495.json: 0.01 s +data/748.json: 0.01 s +--- +overall: 2.0 s + memory: 804 m + +[test_json-cpp] +data/1020.json: 4.32 s +data/1570.json: 1.44 s +data/1391.json: 0.99 s +data/429.json: 0.28 s +data/2002.json: 0.15 s +data/1889.json: 0.03 s +data/1495.json: 0.01 s +data/748.json: 0.0 s +--- +overall: 0.89 s + memory: 263 m + +[test_jsonxx] +data/1020.json: 36.85 s +data/1570.json: 12.86 s +data/1391.json: 8.36 s +data/429.json: 2.4 s +data/2002.json: 1.29 s +data/1889.json: 0.26 s +data/1495.json: 0.01 s +data/748.json: 0.0 s +--- +overall: 7.66 s + memory: 1440 m + +[test_libjson] +data/1020.json: 13.09 s +data/1570.json: 4.51 s +data/1391.json: 3.0 s +data/429.json: 0.86 s +data/2002.json: 0.46 s +data/1889.json: 0.09 s +data/1495.json: 0.01 s +data/748.json: 0.0 s +--- +overall: 2.72 s + memory: 1649 m + +[test_nosjob] +data/1020.json: 17.64 s +data/1570.json: 6.18 s +data/1391.json: 4.09 s +data/429.json: 1.16 s +data/2002.json: 0.62 s +data/1889.json: 0.13 s +data/1495.json: 0.01 s +data/748.json: 0.0 s +--- +overall: 3.68 s + memory: 931 m + +[test_picojson] +data/1020.json: 17.35 s +data/1570.json: 5.51 s +data/1391.json: 3.97 s +data/429.json: 1.07 s +data/2002.json: 0.55 s +data/1889.json: 0.11 s +data/1495.json: 0.01 s +data/748.json: 0.01 s +--- +overall: 3.53 s + memory: 1049 m + +[test_rapidjson] +data/1020.json: 3.27 s +data/1570.json: 1.08 s +data/1391.json: 0.75 s +data/429.json: 0.21 s +data/2002.json: 0.11 s +data/1889.json: 0.03 s +data/1495.json: 0.01 s +data/748.json: 0.0 s +--- +overall: 0.67 s + memory: 415 m + +[test_sajson] +data/1020.json: 2.94 s +data/1570.json: 0.97 s +data/1391.json: 0.66 s +data/429.json: 0.19 s +data/2002.json: 0.1 s +data/1889.json: 0.02 s +data/1495.json: 0.0 s +data/748.json: 0.0 s +--- +overall: 0.6 s + memory: 293 m + + +MSGPACK parsing benchmark +------------------------- + REAPEAT=10 + +[test_msgpack] +data/1020.pak: 2.2 s +data/1570.pak: 0.8 s +data/1391.pak: 0.5 s +data/429.pak: 0.15 s +data/2002.pak: 0.09 s +data/1889.pak: 0.02 s +data/1495.pak: 0.0 s +data/748.pak: 0.0 s +--- +overall: 0.47 s + memory: 451 m + +[test_msgpack_ruby] +data/1020.pak: 1.91 s +data/1570.pak: 0.76 s +data/1391.pak: 0.52 s +data/429.pak: 0.23 s +data/2002.pak: 0.19 s +data/1889.pak: 0.14 s +data/1495.pak: 0.13 s +data/748.pak: 0.13 s +--- +overall: 0.5 s + memory: 216 m + diff --git a/benchmark.rb b/benchmark.rb new file mode 100755 index 0000000..81e11f8 --- /dev/null +++ b/benchmark.rb @@ -0,0 +1,9 @@ +#!/usr/bin/env ruby + + +ARGV[0].to_i.times { + start = Time.now + `#{ARGV[1]} #{ARGV[2]}` + puts Time.now-start +} + diff --git a/cdec_json_parser/JSON_parser.c b/cdec_json_parser/JSON_parser.c new file mode 100644 index 0000000..5e392bc --- /dev/null +++ b/cdec_json_parser/JSON_parser.c @@ -0,0 +1,1012 @@ +/* JSON_parser.c */ + +/* 2007-08-24 */ + +/* +Copyright (c) 2005 JSON.org + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +The Software shall be used for Good, not Evil. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +/* + Callbacks, comments, Unicode handling by Jean Gressmann (jean@0x42.de), 2007-2009. + + For the added features the license above applies also. + + Changelog: + 2009-05-17 + Incorporated benrudiak@googlemail.com fix for UTF16 decoding. + + 2009-05-14 + Fixed float parsing bug related to a locale being set that didn't + use '.' as decimal point character (charles@transmissionbt.com). + + 2008-10-14 + Renamed states.IN to states.IT to avoid name clash which IN macro + defined in windef.h (alexey.pelykh@gmail.com) + + 2008-07-19 + Removed some duplicate code & debugging variable (charles@transmissionbt.com) + + 2008-05-28 + Made JSON_value structure ansi C compliant. This bug was report by + trisk@acm.jhu.edu + + 2008-05-20 + Fixed bug reported by charles@transmissionbt.com where the switching + from static to dynamic parse buffer did not copy the static parse + buffer's content. +*/ + + + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "JSON_parser.h" + +#ifdef _MSC_VER +# if _MSC_VER >= 1400 /* Visual Studio 2005 and up */ +# pragma warning(disable:4996) // unsecure sscanf +# endif +#endif + + +#define true 1 +#define false 0 +#define __ -1 /* the universal error code */ + +/* values chosen so that the object size is approx equal to one page (4K) */ +#ifndef JSON_PARSER_STACK_SIZE +# define JSON_PARSER_STACK_SIZE 128 +#endif + +#ifndef JSON_PARSER_PARSE_BUFFER_SIZE +# define JSON_PARSER_PARSE_BUFFER_SIZE 3500 +#endif + +typedef unsigned short UTF16; + +struct JSON_parser_struct { + JSON_parser_callback callback; + void* ctx; + signed char state, before_comment_state, type, escaped, comment, allow_comments, handle_floats_manually; + UTF16 utf16_high_surrogate; + long depth; + long top; + signed char* stack; + long stack_capacity; + char decimal_point; + char* parse_buffer; + size_t parse_buffer_capacity; + size_t parse_buffer_count; + size_t comment_begin_offset; + signed char static_stack[JSON_PARSER_STACK_SIZE]; + char static_parse_buffer[JSON_PARSER_PARSE_BUFFER_SIZE]; +}; + +#define COUNTOF(x) (sizeof(x)/sizeof(x[0])) + +/* + Characters are mapped into these character classes. This allows for + a significant reduction in the size of the state transition table. +*/ + + + +enum classes { + C_SPACE, /* space */ + C_WHITE, /* other whitespace */ + C_LCURB, /* { */ + C_RCURB, /* } */ + C_LSQRB, /* [ */ + C_RSQRB, /* ] */ + C_COLON, /* : */ + C_COMMA, /* , */ + C_QUOTE, /* " */ + C_BACKS, /* \ */ + C_SLASH, /* / */ + C_PLUS, /* + */ + C_MINUS, /* - */ + C_POINT, /* . */ + C_ZERO , /* 0 */ + C_DIGIT, /* 123456789 */ + C_LOW_A, /* a */ + C_LOW_B, /* b */ + C_LOW_C, /* c */ + C_LOW_D, /* d */ + C_LOW_E, /* e */ + C_LOW_F, /* f */ + C_LOW_L, /* l */ + C_LOW_N, /* n */ + C_LOW_R, /* r */ + C_LOW_S, /* s */ + C_LOW_T, /* t */ + C_LOW_U, /* u */ + C_ABCDF, /* ABCDF */ + C_E, /* E */ + C_ETC, /* everything else */ + C_STAR, /* * */ + NR_CLASSES +}; + +static int ascii_class[128] = { +/* + This array maps the 128 ASCII characters into character classes. + The remaining Unicode characters should be mapped to C_ETC. + Non-whitespace control characters are errors. +*/ + __, __, __, __, __, __, __, __, + __, C_WHITE, C_WHITE, __, __, C_WHITE, __, __, + __, __, __, __, __, __, __, __, + __, __, __, __, __, __, __, __, + + C_SPACE, C_ETC, C_QUOTE, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, + C_ETC, C_ETC, C_STAR, C_PLUS, C_COMMA, C_MINUS, C_POINT, C_SLASH, + C_ZERO, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, + C_DIGIT, C_DIGIT, C_COLON, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, + + C_ETC, C_ABCDF, C_ABCDF, C_ABCDF, C_ABCDF, C_E, C_ABCDF, C_ETC, + C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, + C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, + C_ETC, C_ETC, C_ETC, C_LSQRB, C_BACKS, C_RSQRB, C_ETC, C_ETC, + + C_ETC, C_LOW_A, C_LOW_B, C_LOW_C, C_LOW_D, C_LOW_E, C_LOW_F, C_ETC, + C_ETC, C_ETC, C_ETC, C_ETC, C_LOW_L, C_ETC, C_LOW_N, C_ETC, + C_ETC, C_ETC, C_LOW_R, C_LOW_S, C_LOW_T, C_LOW_U, C_ETC, C_ETC, + C_ETC, C_ETC, C_ETC, C_LCURB, C_ETC, C_RCURB, C_ETC, C_ETC +}; + + +/* + The state codes. +*/ +enum states { + GO, /* start */ + OK, /* ok */ + OB, /* object */ + KE, /* key */ + CO, /* colon */ + VA, /* value */ + AR, /* array */ + ST, /* string */ + ES, /* escape */ + U1, /* u1 */ + U2, /* u2 */ + U3, /* u3 */ + U4, /* u4 */ + MI, /* minus */ + ZE, /* zero */ + IT, /* integer */ + FR, /* fraction */ + E1, /* e */ + E2, /* ex */ + E3, /* exp */ + T1, /* tr */ + T2, /* tru */ + T3, /* true */ + F1, /* fa */ + F2, /* fal */ + F3, /* fals */ + F4, /* false */ + N1, /* nu */ + N2, /* nul */ + N3, /* null */ + C1, /* / */ + C2, /* / * */ + C3, /* * */ + FX, /* *.* *eE* */ + D1, /* second UTF-16 character decoding started by \ */ + D2, /* second UTF-16 character proceeded by u */ + NR_STATES +}; + +enum actions +{ + CB = -10, /* comment begin */ + CE = -11, /* comment end */ + FA = -12, /* false */ + TR = -13, /* false */ + NU = -14, /* null */ + DE = -15, /* double detected by exponent e E */ + DF = -16, /* double detected by fraction . */ + SB = -17, /* string begin */ + MX = -18, /* integer detected by minus */ + ZX = -19, /* integer detected by zero */ + IX = -20, /* integer detected by 1-9 */ + EX = -21, /* next char is escaped */ + UC = -22 /* Unicode character read */ +}; + + +static int state_transition_table[NR_STATES][NR_CLASSES] = { +/* + The state transition table takes the current state and the current symbol, + and returns either a new state or an action. An action is represented as a + negative number. A JSON text is accepted if at the end of the text the + state is OK and if the mode is MODE_DONE. + + white 1-9 ABCDF etc + space | { } [ ] : , " \ / + - . 0 | a b c d e f l n r s t u | E | * */ +/*start GO*/ {GO,GO,-6,__,-5,__,__,__,__,__,CB,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, +/*ok OK*/ {OK,OK,__,-8,__,-7,__,-3,__,__,CB,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, +/*object OB*/ {OB,OB,__,-9,__,__,__,__,SB,__,CB,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, +/*key KE*/ {KE,KE,__,__,__,__,__,__,SB,__,CB,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, +/*colon CO*/ {CO,CO,__,__,__,__,-2,__,__,__,CB,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, +/*value VA*/ {VA,VA,-6,__,-5,__,__,__,SB,__,CB,__,MX,__,ZX,IX,__,__,__,__,__,FA,__,NU,__,__,TR,__,__,__,__,__}, +/*array AR*/ {AR,AR,-6,__,-5,-7,__,__,SB,__,CB,__,MX,__,ZX,IX,__,__,__,__,__,FA,__,NU,__,__,TR,__,__,__,__,__}, +/*string ST*/ {ST,__,ST,ST,ST,ST,ST,ST,-4,EX,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST}, +/*escape ES*/ {__,__,__,__,__,__,__,__,ST,ST,ST,__,__,__,__,__,__,ST,__,__,__,ST,__,ST,ST,__,ST,U1,__,__,__,__}, +/*u1 U1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,U2,U2,U2,U2,U2,U2,U2,U2,__,__,__,__,__,__,U2,U2,__,__}, +/*u2 U2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,U3,U3,U3,U3,U3,U3,U3,U3,__,__,__,__,__,__,U3,U3,__,__}, +/*u3 U3*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,U4,U4,U4,U4,U4,U4,U4,U4,__,__,__,__,__,__,U4,U4,__,__}, +/*u4 U4*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,UC,UC,UC,UC,UC,UC,UC,UC,__,__,__,__,__,__,UC,UC,__,__}, +/*minus MI*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,ZE,IT,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, +/*zero ZE*/ {OK,OK,__,-8,__,-7,__,-3,__,__,CB,__,__,DF,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, +/*int IT*/ {OK,OK,__,-8,__,-7,__,-3,__,__,CB,__,__,DF,IT,IT,__,__,__,__,DE,__,__,__,__,__,__,__,__,DE,__,__}, +/*frac FR*/ {OK,OK,__,-8,__,-7,__,-3,__,__,CB,__,__,__,FR,FR,__,__,__,__,E1,__,__,__,__,__,__,__,__,E1,__,__}, +/*e E1*/ {__,__,__,__,__,__,__,__,__,__,__,E2,E2,__,E3,E3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, +/*ex E2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,E3,E3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, +/*exp E3*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,__,E3,E3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, +/*tr T1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,T2,__,__,__,__,__,__,__}, +/*tru T2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,T3,__,__,__,__}, +/*true T3*/ {__,__,__,__,__,__,__,__,__,__,CB,__,__,__,__,__,__,__,__,__,OK,__,__,__,__,__,__,__,__,__,__,__}, +/*fa F1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F2,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, +/*fal F2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F3,__,__,__,__,__,__,__,__,__}, +/*fals F3*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F4,__,__,__,__,__,__}, +/*false F4*/ {__,__,__,__,__,__,__,__,__,__,CB,__,__,__,__,__,__,__,__,__,OK,__,__,__,__,__,__,__,__,__,__,__}, +/*nu N1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,N2,__,__,__,__}, +/*nul N2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,N3,__,__,__,__,__,__,__,__,__}, +/*null N3*/ {__,__,__,__,__,__,__,__,__,__,CB,__,__,__,__,__,__,__,__,__,__,__,OK,__,__,__,__,__,__,__,__,__}, +/*/ C1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,C2}, +/*/* C2*/ {C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C3}, +/** C3*/ {C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,CE,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C3}, +/*_. FX*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,__,FR,FR,__,__,__,__,E1,__,__,__,__,__,__,__,__,E1,__,__}, +/*\ D1*/ {__,__,__,__,__,__,__,__,__,D2,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, +/*\ D2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,U1,__,__,__,__}, +}; + + +/* + These modes can be pushed on the stack. +*/ +enum modes { + MODE_ARRAY = 1, + MODE_DONE = 2, + MODE_KEY = 3, + MODE_OBJECT = 4 +}; + +static int +push(JSON_parser jc, int mode) +{ +/* + Push a mode onto the stack. Return false if there is overflow. +*/ + jc->top += 1; + if (jc->depth < 0) { + if (jc->top >= jc->stack_capacity) { + size_t bytes_to_allocate; + jc->stack_capacity *= 2; + bytes_to_allocate = jc->stack_capacity * sizeof(jc->static_stack[0]); + if (jc->stack == &jc->static_stack[0]) { + jc->stack = (signed char*)malloc(bytes_to_allocate); + memcpy(jc->stack, jc->static_stack, sizeof(jc->static_stack)); + } else { + jc->stack = (signed char*)realloc(jc->stack, bytes_to_allocate); + } + } + } else { + if (jc->top >= jc->depth) { + return false; + } + } + + jc->stack[jc->top] = mode; + return true; +} + + +static int +pop(JSON_parser jc, int mode) +{ +/* + Pop the stack, assuring that the current mode matches the expectation. + Return false if there is underflow or if the modes mismatch. +*/ + if (jc->top < 0 || jc->stack[jc->top] != mode) { + return false; + } + jc->top -= 1; + return true; +} + + +#define parse_buffer_clear(jc) \ + do {\ + jc->parse_buffer_count = 0;\ + jc->parse_buffer[0] = 0;\ + } while (0) + +#define parse_buffer_pop_back_char(jc)\ + do {\ + assert(jc->parse_buffer_count >= 1);\ + --jc->parse_buffer_count;\ + jc->parse_buffer[jc->parse_buffer_count] = 0;\ + } while (0) + +void delete_JSON_parser(JSON_parser jc) +{ + if (jc) { + if (jc->stack != &jc->static_stack[0]) { + free((void*)jc->stack); + } + if (jc->parse_buffer != &jc->static_parse_buffer[0]) { + free((void*)jc->parse_buffer); + } + free((void*)jc); + } +} + + +JSON_parser +new_JSON_parser(JSON_config* config) +{ +/* + new_JSON_parser starts the checking process by constructing a JSON_parser + object. It takes a depth parameter that restricts the level of maximum + nesting. + + To continue the process, call JSON_parser_char for each character in the + JSON text, and then call JSON_parser_done to obtain the final result. + These functions are fully reentrant. +*/ + + int depth = 0; + JSON_config default_config; + + JSON_parser jc = (JSON_parser)malloc(sizeof(struct JSON_parser_struct)); + + memset(jc, 0, sizeof(*jc)); + + + /* initialize configuration */ + init_JSON_config(&default_config); + + /* set to default configuration if none was provided */ + if (config == NULL) { + config = &default_config; + } + + depth = config->depth; + + /* We need to be able to push at least one object */ + if (depth == 0) { + depth = 1; + } + + jc->state = GO; + jc->top = -1; + + /* Do we want non-bound stack? */ + if (depth > 0) { + jc->stack_capacity = depth; + jc->depth = depth; + if (depth <= (int)COUNTOF(jc->static_stack)) { + jc->stack = &jc->static_stack[0]; + } else { + jc->stack = (signed char*)malloc(jc->stack_capacity * sizeof(jc->static_stack[0])); + } + } else { + jc->stack_capacity = COUNTOF(jc->static_stack); + jc->depth = -1; + jc->stack = &jc->static_stack[0]; + } + + /* set parser to start */ + push(jc, MODE_DONE); + + /* set up the parse buffer */ + jc->parse_buffer = &jc->static_parse_buffer[0]; + jc->parse_buffer_capacity = COUNTOF(jc->static_parse_buffer); + parse_buffer_clear(jc); + + /* set up callback, comment & float handling */ + jc->callback = config->callback; + jc->ctx = config->callback_ctx; + jc->allow_comments = config->allow_comments != 0; + jc->handle_floats_manually = config->handle_floats_manually != 0; + + /* set up decimal point */ + jc->decimal_point = *localeconv()->decimal_point; + + return jc; +} + +static void grow_parse_buffer(JSON_parser jc) +{ + size_t bytes_to_allocate; + jc->parse_buffer_capacity *= 2; + bytes_to_allocate = jc->parse_buffer_capacity * sizeof(jc->parse_buffer[0]); + if (jc->parse_buffer == &jc->static_parse_buffer[0]) { + jc->parse_buffer = (char*)malloc(bytes_to_allocate); + memcpy(jc->parse_buffer, jc->static_parse_buffer, jc->parse_buffer_count); + } else { + jc->parse_buffer = (char*)realloc(jc->parse_buffer, bytes_to_allocate); + } +} + +#define parse_buffer_push_back_char(jc, c)\ + do {\ + if (jc->parse_buffer_count + 1 >= jc->parse_buffer_capacity) grow_parse_buffer(jc);\ + jc->parse_buffer[jc->parse_buffer_count++] = c;\ + jc->parse_buffer[jc->parse_buffer_count] = 0;\ + } while (0) + +#define assert_is_non_container_type(jc) \ + assert( \ + jc->type == JSON_T_NULL || \ + jc->type == JSON_T_FALSE || \ + jc->type == JSON_T_TRUE || \ + jc->type == JSON_T_FLOAT || \ + jc->type == JSON_T_INTEGER || \ + jc->type == JSON_T_STRING) + + +static int parse_parse_buffer(JSON_parser jc) +{ + if (jc->callback) { + JSON_value value, *arg = NULL; + + if (jc->type != JSON_T_NONE) { + assert_is_non_container_type(jc); + + switch(jc->type) { + case JSON_T_FLOAT: + arg = &value; + if (jc->handle_floats_manually) { + value.vu.str.value = jc->parse_buffer; + value.vu.str.length = jc->parse_buffer_count; + } else { + /*sscanf(jc->parse_buffer, "%Lf", &value.vu.float_value);*/ + + /* not checking with end pointer b/c there may be trailing ws */ + value.vu.float_value = strtod(jc->parse_buffer, NULL); + } + break; + case JSON_T_INTEGER: + arg = &value; + sscanf(jc->parse_buffer, JSON_PARSER_INTEGER_SSCANF_TOKEN, &value.vu.integer_value); + break; + case JSON_T_STRING: + arg = &value; + value.vu.str.value = jc->parse_buffer; + value.vu.str.length = jc->parse_buffer_count; + break; + } + + if (!(*jc->callback)(jc->ctx, jc->type, arg)) { + return false; + } + } + } + + parse_buffer_clear(jc); + + return true; +} + +#define IS_HIGH_SURROGATE(uc) (((uc) & 0xFC00) == 0xD800) +#define IS_LOW_SURROGATE(uc) (((uc) & 0xFC00) == 0xDC00) +#define DECODE_SURROGATE_PAIR(hi,lo) ((((hi) & 0x3FF) << 10) + ((lo) & 0x3FF) + 0x10000) +static unsigned char utf8_lead_bits[4] = { 0x00, 0xC0, 0xE0, 0xF0 }; + +static int decode_unicode_char(JSON_parser jc) +{ + int i; + unsigned uc = 0; + char* p; + int trail_bytes; + + assert(jc->parse_buffer_count >= 6); + + p = &jc->parse_buffer[jc->parse_buffer_count - 4]; + + for (i = 12; i >= 0; i -= 4, ++p) { + unsigned x = *p; + + if (x >= 'a') { + x -= ('a' - 10); + } else if (x >= 'A') { + x -= ('A' - 10); + } else { + x &= ~0x30u; + } + + assert(x < 16); + + uc |= x << i; + } + + /* clear UTF-16 char from buffer */ + jc->parse_buffer_count -= 6; + jc->parse_buffer[jc->parse_buffer_count] = 0; + + /* attempt decoding ... */ + if (jc->utf16_high_surrogate) { + if (IS_LOW_SURROGATE(uc)) { + uc = DECODE_SURROGATE_PAIR(jc->utf16_high_surrogate, uc); + trail_bytes = 3; + jc->utf16_high_surrogate = 0; + } else { + /* high surrogate without a following low surrogate */ + return false; + } + } else { + if (uc < 0x80) { + trail_bytes = 0; + } else if (uc < 0x800) { + trail_bytes = 1; + } else if (IS_HIGH_SURROGATE(uc)) { + /* save the high surrogate and wait for the low surrogate */ + jc->utf16_high_surrogate = uc; + return true; + } else if (IS_LOW_SURROGATE(uc)) { + /* low surrogate without a preceding high surrogate */ + return false; + } else { + trail_bytes = 2; + } + } + + jc->parse_buffer[jc->parse_buffer_count++] = (char) ((uc >> (trail_bytes * 6)) | utf8_lead_bits[trail_bytes]); + + for (i = trail_bytes * 6 - 6; i >= 0; i -= 6) { + jc->parse_buffer[jc->parse_buffer_count++] = (char) (((uc >> i) & 0x3F) | 0x80); + } + + jc->parse_buffer[jc->parse_buffer_count] = 0; + + return true; +} + +static int add_escaped_char_to_parse_buffer(JSON_parser jc, int next_char) +{ + jc->escaped = 0; + /* remove the backslash */ + parse_buffer_pop_back_char(jc); + switch(next_char) { + case 'b': + parse_buffer_push_back_char(jc, '\b'); + break; + case 'f': + parse_buffer_push_back_char(jc, '\f'); + break; + case 'n': + parse_buffer_push_back_char(jc, '\n'); + break; + case 'r': + parse_buffer_push_back_char(jc, '\r'); + break; + case 't': + parse_buffer_push_back_char(jc, '\t'); + break; + case '"': + parse_buffer_push_back_char(jc, '"'); + break; + case '\\': + parse_buffer_push_back_char(jc, '\\'); + break; + case '/': + parse_buffer_push_back_char(jc, '/'); + break; + case 'u': + parse_buffer_push_back_char(jc, '\\'); + parse_buffer_push_back_char(jc, 'u'); + break; + default: + return false; + } + + return true; +} + +#define add_char_to_parse_buffer(jc, next_char, next_class) \ + do { \ + if (jc->escaped) { \ + if (!add_escaped_char_to_parse_buffer(jc, next_char)) \ + return false; \ + } else if (!jc->comment) { \ + if ((jc->type != JSON_T_NONE) | !((next_class == C_SPACE) | (next_class == C_WHITE)) /* non-white-space */) { \ + parse_buffer_push_back_char(jc, (char)next_char); \ + } \ + } \ + } while (0) + + +#define assert_type_isnt_string_null_or_bool(jc) \ + assert(jc->type != JSON_T_FALSE); \ + assert(jc->type != JSON_T_TRUE); \ + assert(jc->type != JSON_T_NULL); \ + assert(jc->type != JSON_T_STRING) + + +int +JSON_parser_char(JSON_parser jc, int next_char) +{ +/* + After calling new_JSON_parser, call this function for each character (or + partial character) in your JSON text. It can accept UTF-8, UTF-16, or + UTF-32. It returns true if things are looking ok so far. If it rejects the + text, it returns false. +*/ + int next_class, next_state; + +/* + Determine the character's class. +*/ + if (next_char < 0) { + return false; + } + if (next_char >= 128) { + next_class = C_ETC; + } else { + next_class = ascii_class[next_char]; + if (next_class <= __) { + return false; + } + } + + add_char_to_parse_buffer(jc, next_char, next_class); + +/* + Get the next state from the state transition table. +*/ + next_state = state_transition_table[jc->state][next_class]; + if (next_state >= 0) { +/* + Change the state. +*/ + jc->state = next_state; + } else { +/* + Or perform one of the actions. +*/ + switch (next_state) { +/* Unicode character */ + case UC: + if(!decode_unicode_char(jc)) { + return false; + } + /* check if we need to read a second UTF-16 char */ + if (jc->utf16_high_surrogate) { + jc->state = D1; + } else { + jc->state = ST; + } + break; +/* escaped char */ + case EX: + jc->escaped = 1; + jc->state = ES; + break; +/* integer detected by minus */ + case MX: + jc->type = JSON_T_INTEGER; + jc->state = MI; + break; +/* integer detected by zero */ + case ZX: + jc->type = JSON_T_INTEGER; + jc->state = ZE; + break; +/* integer detected by 1-9 */ + case IX: + jc->type = JSON_T_INTEGER; + jc->state = IT; + break; + +/* floating point number detected by exponent*/ + case DE: + assert_type_isnt_string_null_or_bool(jc); + jc->type = JSON_T_FLOAT; + jc->state = E1; + break; + +/* floating point number detected by fraction */ + case DF: + assert_type_isnt_string_null_or_bool(jc); + if (!jc->handle_floats_manually) { +/* + Some versions of strtod (which underlies sscanf) don't support converting + C-locale formated floating point values. +*/ + assert(jc->parse_buffer[jc->parse_buffer_count-1] == '.'); + jc->parse_buffer[jc->parse_buffer_count-1] = jc->decimal_point; + } + jc->type = JSON_T_FLOAT; + jc->state = FX; + break; +/* string begin " */ + case SB: + parse_buffer_clear(jc); + assert(jc->type == JSON_T_NONE); + jc->type = JSON_T_STRING; + jc->state = ST; + break; + +/* n */ + case NU: + assert(jc->type == JSON_T_NONE); + jc->type = JSON_T_NULL; + jc->state = N1; + break; +/* f */ + case FA: + assert(jc->type == JSON_T_NONE); + jc->type = JSON_T_FALSE; + jc->state = F1; + break; +/* t */ + case TR: + assert(jc->type == JSON_T_NONE); + jc->type = JSON_T_TRUE; + jc->state = T1; + break; + +/* closing comment */ + case CE: + jc->comment = 0; + assert(jc->parse_buffer_count == 0); + assert(jc->type == JSON_T_NONE); + jc->state = jc->before_comment_state; + break; + +/* opening comment */ + case CB: + if (!jc->allow_comments) { + return false; + } + parse_buffer_pop_back_char(jc); + if (!parse_parse_buffer(jc)) { + return false; + } + assert(jc->parse_buffer_count == 0); + assert(jc->type != JSON_T_STRING); + switch (jc->stack[jc->top]) { + case MODE_ARRAY: + case MODE_OBJECT: + switch(jc->state) { + case VA: + case AR: + jc->before_comment_state = jc->state; + break; + default: + jc->before_comment_state = OK; + break; + } + break; + default: + jc->before_comment_state = jc->state; + break; + } + jc->type = JSON_T_NONE; + jc->state = C1; + jc->comment = 1; + break; +/* empty } */ + case -9: + parse_buffer_clear(jc); + if (jc->callback && !(*jc->callback)(jc->ctx, JSON_T_OBJECT_END, NULL)) { + return false; + } + if (!pop(jc, MODE_KEY)) { + return false; + } + jc->state = OK; + break; + +/* } */ case -8: + parse_buffer_pop_back_char(jc); + if (!parse_parse_buffer(jc)) { + return false; + } + if (jc->callback && !(*jc->callback)(jc->ctx, JSON_T_OBJECT_END, NULL)) { + return false; + } + if (!pop(jc, MODE_OBJECT)) { + return false; + } + jc->type = JSON_T_NONE; + jc->state = OK; + break; + +/* ] */ case -7: + parse_buffer_pop_back_char(jc); + if (!parse_parse_buffer(jc)) { + return false; + } + if (jc->callback && !(*jc->callback)(jc->ctx, JSON_T_ARRAY_END, NULL)) { + return false; + } + if (!pop(jc, MODE_ARRAY)) { + return false; + } + + jc->type = JSON_T_NONE; + jc->state = OK; + break; + +/* { */ case -6: + parse_buffer_pop_back_char(jc); + if (jc->callback && !(*jc->callback)(jc->ctx, JSON_T_OBJECT_BEGIN, NULL)) { + return false; + } + if (!push(jc, MODE_KEY)) { + return false; + } + assert(jc->type == JSON_T_NONE); + jc->state = OB; + break; + +/* [ */ case -5: + parse_buffer_pop_back_char(jc); + if (jc->callback && !(*jc->callback)(jc->ctx, JSON_T_ARRAY_BEGIN, NULL)) { + return false; + } + if (!push(jc, MODE_ARRAY)) { + return false; + } + assert(jc->type == JSON_T_NONE); + jc->state = AR; + break; + +/* string end " */ case -4: + parse_buffer_pop_back_char(jc); + switch (jc->stack[jc->top]) { + case MODE_KEY: + assert(jc->type == JSON_T_STRING); + jc->type = JSON_T_NONE; + jc->state = CO; + + if (jc->callback) { + JSON_value value; + value.vu.str.value = jc->parse_buffer; + value.vu.str.length = jc->parse_buffer_count; + if (!(*jc->callback)(jc->ctx, JSON_T_KEY, &value)) { + return false; + } + } + parse_buffer_clear(jc); + break; + case MODE_ARRAY: + case MODE_OBJECT: + assert(jc->type == JSON_T_STRING); + if (!parse_parse_buffer(jc)) { + return false; + } + jc->type = JSON_T_NONE; + jc->state = OK; + break; + default: + return false; + } + break; + +/* , */ case -3: + parse_buffer_pop_back_char(jc); + if (!parse_parse_buffer(jc)) { + return false; + } + switch (jc->stack[jc->top]) { + case MODE_OBJECT: +/* + A comma causes a flip from object mode to key mode. +*/ + if (!pop(jc, MODE_OBJECT) || !push(jc, MODE_KEY)) { + return false; + } + assert(jc->type != JSON_T_STRING); + jc->type = JSON_T_NONE; + jc->state = KE; + break; + case MODE_ARRAY: + assert(jc->type != JSON_T_STRING); + jc->type = JSON_T_NONE; + jc->state = VA; + break; + default: + return false; + } + break; + +/* : */ case -2: +/* + A colon causes a flip from key mode to object mode. +*/ + parse_buffer_pop_back_char(jc); + if (!pop(jc, MODE_KEY) || !push(jc, MODE_OBJECT)) { + return false; + } + assert(jc->type == JSON_T_NONE); + jc->state = VA; + break; +/* + Bad action. +*/ + default: + return false; + } + } + return true; +} + + +int +JSON_parser_done(JSON_parser jc) +{ + const int result = jc->state == OK && pop(jc, MODE_DONE); + + return result; +} + + +int JSON_parser_is_legal_white_space_string(const char* s) +{ + int c, char_class; + + if (s == NULL) { + return false; + } + + for (; *s; ++s) { + c = *s; + + if (c < 0 || c >= 128) { + return false; + } + + char_class = ascii_class[c]; + + if (char_class != C_SPACE && char_class != C_WHITE) { + return false; + } + } + + return true; +} + + + +void init_JSON_config(JSON_config* config) +{ + if (config) { + memset(config, 0, sizeof(*config)); + + config->depth = JSON_PARSER_STACK_SIZE - 1; + } +} diff --git a/cdec_json_parser/JSON_parser.h b/cdec_json_parser/JSON_parser.h new file mode 100644 index 0000000..de98007 --- /dev/null +++ b/cdec_json_parser/JSON_parser.h @@ -0,0 +1,152 @@ +#ifndef JSON_PARSER_H +#define JSON_PARSER_H + +/* JSON_parser.h */ + + +#include + +/* Windows DLL stuff */ +#ifdef _WIN32 +# ifdef JSON_PARSER_DLL_EXPORTS +# define JSON_PARSER_DLL_API __declspec(dllexport) +# else +# define JSON_PARSER_DLL_API __declspec(dllimport) +# endif +#else +# define JSON_PARSER_DLL_API +#endif + +/* Determine the integer type use to parse non-floating point numbers */ +#if __STDC_VERSION__ >= 199901L || HAVE_LONG_LONG == 1 +typedef long long JSON_int_t; +#define JSON_PARSER_INTEGER_SSCANF_TOKEN "%lld" +#define JSON_PARSER_INTEGER_SPRINTF_TOKEN "%lld" +#else +typedef long JSON_int_t; +#define JSON_PARSER_INTEGER_SSCANF_TOKEN "%ld" +#define JSON_PARSER_INTEGER_SPRINTF_TOKEN "%ld" +#endif + + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum +{ + JSON_T_NONE = 0, + JSON_T_ARRAY_BEGIN, // 1 + JSON_T_ARRAY_END, // 2 + JSON_T_OBJECT_BEGIN, // 3 + JSON_T_OBJECT_END, // 4 + JSON_T_INTEGER, // 5 + JSON_T_FLOAT, // 6 + JSON_T_NULL, // 7 + JSON_T_TRUE, // 8 + JSON_T_FALSE, // 9 + JSON_T_STRING, // 10 + JSON_T_KEY, // 11 + JSON_T_MAX // 12 +} JSON_type; + +typedef struct JSON_value_struct { + union { + JSON_int_t integer_value; + + double float_value; + + struct { + const char* value; + size_t length; + } str; + } vu; +} JSON_value; + +typedef struct JSON_parser_struct* JSON_parser; + +/*! \brief JSON parser callback + + \param ctx The pointer passed to new_JSON_parser. + \param type An element of JSON_type but not JSON_T_NONE. + \param value A representation of the parsed value. This parameter is NULL for + JSON_T_ARRAY_BEGIN, JSON_T_ARRAY_END, JSON_T_OBJECT_BEGIN, JSON_T_OBJECT_END, + JSON_T_NULL, JSON_T_TRUE, and SON_T_FALSE. String values are always returned + as zero-terminated C strings. + + \return Non-zero if parsing should continue, else zero. +*/ +typedef int (*JSON_parser_callback)(void* ctx, int type, const struct JSON_value_struct* value); + + +/*! \brief The structure used to configure a JSON parser object + + \param depth If negative, the parser can parse arbitrary levels of JSON, otherwise + the depth is the limit + \param Pointer to a callback. This parameter may be NULL. In this case the input is merely checked for validity. + \param Callback context. This parameter may be NULL. + \param depth. Specifies the levels of nested JSON to allow. Negative numbers yield unlimited nesting. + \param allowComments. To allow C style comments in JSON, set to non-zero. + \param handleFloatsManually. To decode floating point numbers manually set this parameter to non-zero. + + \return The parser object. +*/ +typedef struct { + JSON_parser_callback callback; + void* callback_ctx; + int depth; + int allow_comments; + int handle_floats_manually; +} JSON_config; + + +/*! \brief Initializes the JSON parser configuration structure to default values. + + The default configuration is + - 127 levels of nested JSON (depends on JSON_PARSER_STACK_SIZE, see json_parser.c) + - no parsing, just checking for JSON syntax + - no comments + + \param config. Used to configure the parser. +*/ +JSON_PARSER_DLL_API void init_JSON_config(JSON_config* config); + +/*! \brief Create a JSON parser object + + \param config. Used to configure the parser. Set to NULL to use the default configuration. + See init_JSON_config + + \return The parser object. +*/ +JSON_PARSER_DLL_API extern JSON_parser new_JSON_parser(JSON_config* config); + +/*! \brief Destroy a previously created JSON parser object. */ +JSON_PARSER_DLL_API extern void delete_JSON_parser(JSON_parser jc); + +/*! \brief Parse a character. + + \return Non-zero, if all characters passed to this function are part of are valid JSON. +*/ +JSON_PARSER_DLL_API extern int JSON_parser_char(JSON_parser jc, int next_char); + +/*! \brief Finalize parsing. + + Call this method once after all input characters have been consumed. + + \return Non-zero, if all parsed characters are valid JSON, zero otherwise. +*/ +JSON_PARSER_DLL_API extern int JSON_parser_done(JSON_parser jc); + +/*! \brief Determine if a given string is valid JSON white space + + \return Non-zero if the string is valid, zero otherwise. +*/ +JSON_PARSER_DLL_API extern int JSON_parser_is_legal_white_space_string(const char* s); + + +#ifdef __cplusplus +} +#endif + + +#endif /* JSON_PARSER_H */ diff --git a/cdec_json_parser/LICENSE b/cdec_json_parser/LICENSE new file mode 100644 index 0000000..a390938 --- /dev/null +++ b/cdec_json_parser/LICENSE @@ -0,0 +1,213 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +---------------------------------------------- + +L-BFGS CODE FROM COMPUTATIONAL CRYSTALLOGRAPHY TOOLBOX (CCTBX) + +This package includes source code (training/lbfgs.h) based on source +code distributed as part of the Compational Crystallography Toolbox +(CCTBX), which has separate copyright notices and license terms. Use of +this source code is subject to the terms and conditions of the license +contained in the file LICENSE.cctbx . + diff --git a/cdec_json_parser/Makefile b/cdec_json_parser/Makefile new file mode 100644 index 0000000..c976aba --- /dev/null +++ b/cdec_json_parser/Makefile @@ -0,0 +1,7 @@ +all: + gcc JSON_parser.c -c + g++ json_parse.cc -c + +clean: + rm *.o + diff --git a/cdec_json_parser/json_parse.cc b/cdec_json_parser/json_parse.cc new file mode 100644 index 0000000..4580fc8 --- /dev/null +++ b/cdec_json_parser/json_parse.cc @@ -0,0 +1,31 @@ +#include "json_parse.h" + +#include +#include + +using namespace std; + + +bool JSONParser::HandleJSONEvent(int type, const JSON_value* value) { + switch(type) { + case JSON_T_OBJECT_BEGIN: + case JSON_T_OBJECT_END: + case JSON_T_ARRAY_BEGIN: + case JSON_T_ARRAY_END: + case JSON_T_NULL: + case JSON_T_TRUE: + case JSON_T_FALSE: + case JSON_T_KEY: + case JSON_T_INTEGER: + case JSON_T_FLOAT: + break; + case JSON_T_STRING: + string s = value->vu.str.value; + string t = s.substr(1, 4); + if (t == "Goal") + cerr << t << endl; + break; + } + return true; +} + diff --git a/cdec_json_parser/json_parse.h b/cdec_json_parser/json_parse.h new file mode 100644 index 0000000..80c037b --- /dev/null +++ b/cdec_json_parser/json_parse.h @@ -0,0 +1,62 @@ +#ifndef _JSON_WRAPPER_H_ +#define _JSON_WRAPPER_H_ + +#include +#include +#include "JSON_parser.h" + +class JSONParser { + public: + JSONParser() { + state = -1; + init_JSON_config(&config); + hack.mf = &JSONParser::Callback; + config.depth = 10; + config.callback_ctx = reinterpret_cast(this); + config.callback = hack.cb; + config.allow_comments = 1; + config.handle_floats_manually = 1; + jc = new_JSON_parser(&config); + } + virtual ~JSONParser() { + delete_JSON_parser(jc); + } + bool Parse(std::istream* in) { + int count = 0; + int lc = 1; + for (; in ; ++count) { + int next_char = in->get(); + if (!in->good()) break; + if (lc == '\n') { ++lc; } + if (!JSON_parser_char(jc, next_char)) { + std::cerr << "JSON_parser_char: syntax error, line " << lc << " (byte " << count << ")" << std::endl; + return false; + } + } + if (!JSON_parser_done(jc)) { + std::cerr << "JSON_parser_done: syntax error\n"; + return false; + } + return true; + } + static void WriteEscapedString(const std::string& in, std::ostream* out); + protected: + bool HandleJSONEvent(int type, const JSON_value* value); + private: + int state; + std::string cur_key; + std::string cat; + int Callback(int type, const JSON_value* value) { + if (HandleJSONEvent(type, value)) return 1; + return 0; + } + JSON_parser_struct* jc; + JSON_config config; + typedef int (JSONParser::* MF)(int type, const struct JSON_value_struct* value); + union CBHack { + JSON_parser_callback cb; + MF mf; + } hack; +}; + +#endif diff --git a/data/Makefile b/data/Makefile new file mode 100644 index 0000000..e56b229 --- /dev/null +++ b/data/Makefile @@ -0,0 +1,6 @@ +make_paks: make_paks.cc + g++ -std=c++11 make_paks.cc -I../msgpack-c/include/ ../msgpack-c/lib/libmsgpack.a -o make_paks + +clean: + rm -f make_paks + diff --git a/data/cdec.ini b/data/cdec.ini new file mode 100644 index 0000000..ddbe54c --- /dev/null +++ b/data/cdec.ini @@ -0,0 +1,4 @@ +formalism=scfg +intersection_strategy=full +add_pass_through_rules=true + diff --git a/data/make.sh b/data/make.sh new file mode 100755 index 0000000..5e0c31b --- /dev/null +++ b/data/make.sh @@ -0,0 +1,8 @@ +#!/bin/zsh + + +# wmt/14/newstest2008 data +for i in 1020 1391 1495 1570 1889 2002 429 748; do + ~/src/weaver/util/cdec2json.py -c cdec.ini -w weights.init -g grammar.$i.gz < $i.in | ./to_ascii.rb > $i.json +done + diff --git a/data/make_paks.cc b/data/make_paks.cc new file mode 100644 index 0000000..3477294 --- /dev/null +++ b/data/make_paks.cc @@ -0,0 +1,126 @@ +#include +#include +#include +#include +#include +#include + + +/* + * https://github.com/ascheglov/json-cpp + * + */ +#include "../json-cpp.hpp" + +using namespace std; + + +struct Node { + int id; + string cat; + vector span; + + MSGPACK_DEFINE(id, cat, span); +}; + +struct Vector { + double CountEF; + double EgivenFCoherent; + double Glue; + double IsSingletonF; + double IsSingletonFE; + double LanguageModel; + double LanguageModel_OOV; + double MaxLexFgivenE; + double MaxLexEgivenF; + double PassThrough; + double PassThrough_1; + double PassThrough_2; + double PassThrough_3; + double PassThrough_4; + double PassThrough_5; + double PassThrough_6; + double SampleCountF; + double WordPenalty; + + MSGPACK_DEFINE(CountEF, EgivenFCoherent, Glue, IsSingletonF, IsSingletonFE, LanguageModel, LanguageModel_OOV, MaxLexEgivenF, MaxLexFgivenE, PassThrough, PassThrough_1, PassThrough_2, PassThrough_3, PassThrough_4, PassThrough_5, PassThrough_6, SampleCountF, WordPenalty); +}; + +struct Edge { + int head; + string rule; + vector tails; + Vector f; + double weight; + + MSGPACK_DEFINE(head, rule, tails, f, weight); +}; + +struct Hg { + Vector weights; + vector nodes; + vector edges; + vector rules; + + MSGPACK_DEFINE(weights, nodes, edges, rules); +}; + +template inline void +serialize(jsoncpp::Stream& stream, Hg& o) +{ + fields(o, stream, "weights", o.weights, "nodes", o.nodes, "edges", o.edges, "rules", o.rules); +} + +template inline void +serialize(jsoncpp::Stream& stream, Edge& o) +{ + fields(o, stream, "head", o.head, "rule", o.rule, "tails", o.tails, "f", o.f, "weight", o.weight); +} + +template inline void +serialize(jsoncpp::Stream& stream, Vector& o) +{ + fields(o, stream, "EgivenFCoherent", o.EgivenFCoherent, "SampleCountF", o.SampleCountF, "CountEF", o.CountEF, "MaxLexFgivenE", o.MaxLexFgivenE, "MaxLexEgivenF", o.MaxLexEgivenF, "IsSingletonF", o.IsSingletonF, "IsSingletonFE", o.IsSingletonFE, "LanguageModel", o.LanguageModel, "LanguageModel_OOV", o.LanguageModel_OOV, "PassThrough", o.PassThrough, "PassThrough_1", o.PassThrough_1, "PassThrough_2", o.PassThrough_2, "PassThrough_3", o.PassThrough_3, "PassThrough_4", o.PassThrough_4, "PassThrough_5", o.PassThrough_5, "PassThrough_6", o.PassThrough_6, "WordPenalty", o.WordPenalty, "Glue", o.Glue); +} + +template inline void +serialize(jsoncpp::Stream& stream, Node& o) +{ + fields(o, stream, "id", o.id, "cat", o.cat, "span", o.span); +} + +int +main(int argc, char** argv) +{ + ifstream ifs(argv[1]); + string json_str((istreambuf_iterator(ifs) ), + (istreambuf_iterator())); + + Hg hg; + Vector w; + hg.weights = w; + vector nodes; + hg.nodes = nodes; + vector edges; + hg.edges = edges; + jsoncpp::parse(hg, json_str); + + FILE* file = fopen(argv[2], "wb"); + msgpack::fbuffer fbuf(file); + msgpack::pack(fbuf, hg); + fclose(file); + + /*ifstream ifs1(argv[2]); + string str1((istreambuf_iterator(jfs1)), + (istreambuf_iterator())); + + msgpack::zone zone; + msgpack::object obj; + msgpack::unpack(str1.data(), str1.size(), NULL, &zone, &obj); + + Hg hg; + obj.convert(&hg);*/ + + return 0; +} + diff --git a/data/to_ascii.rb b/data/to_ascii.rb new file mode 100755 index 0000000..6c1d23e --- /dev/null +++ b/data/to_ascii.rb @@ -0,0 +1,13 @@ +#!/usr/bin/env ruby + + +while line = STDIN.gets + encoding_options = { + :invalid => :replace, + :undef => :replace, + :replace => '?', + :universal_newline => true + } + puts line.encode 'ASCII', encoding_options +end + diff --git a/data/weights.init b/data/weights.init new file mode 100644 index 0000000..0d09f9f --- /dev/null +++ b/data/weights.init @@ -0,0 +1,12 @@ +CountEF 0.1 +EgivenFCoherent -0.1 +Glue 0.01 +IsSingletonF -0.01 +IsSingletonFE -0.01 +LanguageModel 0.1 +LanguageModel_OOV -1 +MaxLexFgivenE -0.1 +MaxLexEgivenF -0.1 +PassThrough -0.1 +SampleCountF -0.1 +WordPenalty -0.1 diff --git a/memusg.sh b/memusg.sh new file mode 100755 index 0000000..e3b6f90 --- /dev/null +++ b/memusg.sh @@ -0,0 +1,13 @@ +#!/bin/bash + + +"$@" & +pid=$! peak=0 +while true; do + sleep 1 + sample="$(ps -o rss= $pid 2> /dev/null)" || break + let peak='sample > peak ? sample : peak' +done +#echo "Peak: $peak" 1>&2 +echo "$(( ${peak%% *} / 1024)) m" + diff --git a/run.sh b/run.sh new file mode 100755 index 0000000..83144b3 --- /dev/null +++ b/run.sh @@ -0,0 +1,43 @@ +#!/bin/zsh + + +export PATH=$PATH:/home/pks/src/scripts/ +export GEM_PATH=$GEM_PATH:/home/pks/lib/ruby +REPEAT=10 + +rm -f .overall + +echo +echo "JSON parsing benchmark" +echo "----------------------" +echo " REAPEAT=$REPEAT" +echo + +# fails: test_MicroJSON.sh \ +for prg in \ + test_cdec_json_parser \ + test_gason \ + test_JsonBox \ + test_jsoncpp \ + test_json-cpp \ + test_jsonxx \ + test_libjson \ + test_nosjob \ + test_picojson \ + test_rapidjson \ + test_sajson +do + echo "[$prg]" + sync; echo 3 > /proc/sys/vm/drop_caches + echo > .overall + for file in `ls -S data/*.json`; do + echo "$file:\t$(./benchmark.rb $REPEAT ./$prg $file 2>/dev/null | tee -a .overall | avg | round 2) s" + done + echo "---" + echo "overall:\t$(avg < .overall | round 2)" + echo " memory:\t$(./memusg.sh ./$prg data/1020.json 2>/dev/null)" + echo +done + +rm .overall + diff --git a/run_msgpack.sh b/run_msgpack.sh new file mode 100755 index 0000000..46c8127 --- /dev/null +++ b/run_msgpack.sh @@ -0,0 +1,33 @@ +#!/bin/zsh + + +export PATH=$PATH:/home/pks/src/scripts/ +export GEM_PATH=$GEM_PATH:/home/pks/lib/ruby +REPEAT=10 + +rm -f .overall_msgpack + +echo +echo "MSGPACK parsing benchmark" +echo "-------------------------" +echo " REAPEAT=$REPEAT" +echo + +for prg in \ + test_msgpack \ + test_msgpack_ruby +do + echo "[$prg]" + sync; echo 3 > /proc/sys/vm/drop_caches + echo > .overall_msgpack + for file in `ls -S data/*.pak`; do + echo "$file:\t$(./benchmark.rb $REPEAT ./$prg $file 2>/dev/null | tee -a .overall_msgpack | avg | round 2) s" + done + echo "---" + echo "overall:\t$(avg < .overall_msgpack | round 2)" + echo " memory:\t$(./memusg.sh ./$prg data/1020.pak 2>/dev/null)" + echo +done + +rm .overall_msgpack + diff --git a/test_JsonBox.cc b/test_JsonBox.cc new file mode 100644 index 0000000..e1b22c4 --- /dev/null +++ b/test_JsonBox.cc @@ -0,0 +1,24 @@ +#include +#include + +/* + * https://github.com/anhero/JsonBox + * + */ +#include "JsonBox/include/JsonBox.h" + +using namespace std; + + +int +main(int argc, char** argv) +{ + JsonBox::Value v; + v.loadFromFile(argv[1]); + JsonBox::Value w = v["edges"].getArray().back(); + string s = w["rule"].getString(); + cerr << s.substr(1,4) << endl; + + return 0; +} + diff --git a/test_MicroJSON.cc b/test_MicroJSON.cc new file mode 100644 index 0000000..d8d4969 --- /dev/null +++ b/test_MicroJSON.cc @@ -0,0 +1,28 @@ +#include +#include +#include + +/* + * http://grigory.info/MicroJSON.About.html + * + */ +#include "MicroJSON-0.3.2/Node.h" + +using namespace std; + + +int +main(int argc, char** argv) +{ + ifstream ifs(argv[1]); + string json_str((istreambuf_iterator(ifs)), + (istreambuf_iterator())); + + MicroJSON::Node Root; + Root.Parse(json_str); + MicroJSON::Node* edges = Root.GetSubNode("edges"); + cerr << edges->GetChildren().back()->GetSubNode("rule") << endl; + + return 0; +} + diff --git a/test_MicroJSON.sh b/test_MicroJSON.sh new file mode 100755 index 0000000..89d1d3d --- /dev/null +++ b/test_MicroJSON.sh @@ -0,0 +1,5 @@ +#!/bin/sh -x + + +LD_LIBRARY_PATH="/home/pks/z/test/json_test/MicroJSON-0.3.2" ./test_MicroJSON $1 + diff --git a/test_cdec_json_parser.cc b/test_cdec_json_parser.cc new file mode 100644 index 0000000..e805318 --- /dev/null +++ b/test_cdec_json_parser.cc @@ -0,0 +1,25 @@ +#include +#include +#include + +/* + * https://github.com/redpony/cdec/tree/master/decoder + * + */ +#include "cdec_json_parser/json_parse.h" + +using namespace std; + + +int +main(int argc, char** argv) +{ + ifstream ifs(argv[1]); + + istream& s = ifs; + JSONParser p; + p.Parse(&s); + + return 0; +} + diff --git a/test_gason.cc b/test_gason.cc new file mode 100644 index 0000000..d78c385 --- /dev/null +++ b/test_gason.cc @@ -0,0 +1,71 @@ +#include +#include +#include +#include + +/* + * https://github.com/vivkin/gason + * + */ +#include "gason/gason.h" + +using namespace std; + + +void +print(const char *s) +{ + string u(s); + u = u.substr(1, 4); + if (u == "Goal") { + cerr << u << endl; + } +} + +void +walk(JsonValue o) +{ + switch (o.getTag()) { + case JSON_TAG_NUMBER: + break; + case JSON_TAG_BOOL: + break; + case JSON_TAG_STRING: + print(o.toString()); + break; + case JSON_TAG_ARRAY: + if (!o.toNode()) + break; + for (auto i : o) + walk(i->value); + break; + case JSON_TAG_OBJECT: + if (!o.toNode()) + break; + for (auto i : o) { + print(i->key); + walk(i->value); + } + break; + case JSON_TAG_NULL: + break; + } +} + +int +main(int argc, char** argv) +{ + ifstream ifs(argv[1]); + string json_str((istreambuf_iterator(ifs)), + (istreambuf_iterator())); + + char* s = strdup(json_str.c_str()); + char *p; + JsonValue v; + JsonAllocator a; + JsonParseStatus status = jsonParse(s, &p, &v, a); + walk(v); + + return 0; +} + diff --git a/test_json-cpp.cc b/test_json-cpp.cc new file mode 100644 index 0000000..0791704 --- /dev/null +++ b/test_json-cpp.cc @@ -0,0 +1,100 @@ +#include +#include +#include + +/* + * https://github.com/ascheglov/json-cpp + * + */ +#include "json-cpp.hpp" + +using namespace std; + + +struct Node { + int id; + string cat; + vector span; +}; + +struct Vector { + double CountEF; + double EgivenFCoherent; + double Glue; + double IsSingletonF; + double IsSingletonFE; + double LanguageModel; + double LanguageModel_OOV; + double MaxLexFgivenE; + double MaxLexEgivenF; + double PassThrough; + double PassThrough_1; + double PassThrough_2; + double PassThrough_3; + double PassThrough_4; + double PassThrough_5; + double PassThrough_6; + double SampleCountF; + double WordPenalty; +}; + +struct Edge { + int head; + string rule; + vector tails; + Vector f; + double weight; +}; + +struct Hg { + Vector weights; + vector nodes; + vector edges; + vector rules; +}; + +template inline void +serialize(jsoncpp::Stream& stream, Hg& o) +{ + fields(o, stream, "weights", o.weights, "nodes", o.nodes, "edges", o.edges, "rules", o.rules); +} + +template inline void +serialize(jsoncpp::Stream& stream, Edge& o) +{ + fields(o, stream, "head", o.head, "rule", o.rule, "tails", o.tails, "f", o.f, "weight", o.weight); +} + +template inline void +serialize(jsoncpp::Stream& stream, Vector& o) +{ + fields(o, stream, "EgivenFCoherent", o.EgivenFCoherent, "SampleCountF", o.SampleCountF, "CountEF", o.CountEF, "MaxLexFgivenE", o.MaxLexFgivenE, "MaxLexEgivenF", o.MaxLexEgivenF, "IsSingletonF", o.IsSingletonF, "IsSingletonFE", o.IsSingletonFE, "LanguageModel", o.LanguageModel, "LanguageModel_OOV", o.LanguageModel_OOV, "PassThrough", o.PassThrough, "PassThrough_1", o.PassThrough_1, "PassThrough_2", o.PassThrough_2, "PassThrough_3", o.PassThrough_3, "PassThrough_4", o.PassThrough_4, "PassThrough_5", o.PassThrough_5, "PassThrough_6", o.PassThrough_6, "WordPenalty", o.WordPenalty, "Glue", o.Glue); +} + +template inline void +serialize(jsoncpp::Stream& stream, Node& o) +{ + fields(o, stream, "id", o.id, "cat", o.cat, "span", o.span); +} + +int +main(int argc, char** argv) +{ + ifstream ifs(argv[1]); + string json_str((istreambuf_iterator(ifs) ), + (istreambuf_iterator())); + + Hg hg; + Vector w; + hg.weights = w; + vector nodes; + hg.nodes = nodes; + vector edges; + hg.edges = edges; + jsoncpp::parse(hg, json_str); + Edge& last_edge = hg.edges.back(); + cerr << last_edge.rule.substr(1, 4) << endl; + + return 0; +} + diff --git a/test_jsoncpp.cc b/test_jsoncpp.cc new file mode 100644 index 0000000..ab3bd0c --- /dev/null +++ b/test_jsoncpp.cc @@ -0,0 +1,29 @@ +#include +#include +#include + +/* + * https://github.com/open-source-parsers/jsoncpp + * + */ +#include "jsoncpp/include/json/json.h" + +using namespace std; + + +int +main(int argc, char** argv) +{ + ifstream ifs(argv[1]); + string json_str((istreambuf_iterator(ifs)), + (istreambuf_iterator())); + + Json::Value v; + Json::Reader reader; + reader.parse(json_str, v); + Json::Value last_edge = v["edges"][v["edges"].size()-1]; + cerr << last_edge["rule"].asString().substr(1, 4) << endl; + + return 0; +} + diff --git a/test_jsonxx.cc b/test_jsonxx.cc new file mode 100644 index 0000000..d06640e --- /dev/null +++ b/test_jsonxx.cc @@ -0,0 +1,35 @@ +#include +#include +#include + +/* + * https://github.com/hjiang/jsonxx + * + */ +#include "jsonxx/jsonxx.h" + +using namespace std; + + +int +main(int argc, char** argv) +{ + ifstream ifs(argv[1]); + string json_str((istreambuf_iterator(ifs)), + (istreambuf_iterator())); + + jsonxx::Object o; + o.parse(json_str); + jsonxx::Array edges = o.get("edges"); + jsonxx::Array::container::const_iterator it = edges.values().begin(), end = edges.values().end(); + while (it != end) { + jsonxx::Object e = (*it)->get(); + string s = e.get("rule").substr(1, 4); + if (s == "Goal") + cerr << s << endl; + ++it; + } + + return 0; +} + diff --git a/test_libjson.cc b/test_libjson.cc new file mode 100644 index 0000000..6b3e2a9 --- /dev/null +++ b/test_libjson.cc @@ -0,0 +1,44 @@ +#include +#include +#include + +/* + * http://sourceforge.net/projects/libjson/ + * + */ +#include "libjson/libjson.h" + +using namespace std; + + +void +walk(const JSONNode & n) +{ + JSONNode::const_iterator it = n.begin(); + while (it != n.end()){ + if (it->type() == JSON_ARRAY || it->type() == JSON_NODE){ + walk(*it); + } + string s = it->as_string(); + if (s.size() >= 5) { + string t = s.substr(1, 4); + if (t == "Goal") + cerr << t << endl; + } + ++it; + } +} + +int +main(int argc, char** argv) +{ + ifstream ifs(argv[1]); + string json_str((istreambuf_iterator(ifs)), + (istreambuf_iterator())); + + JSONNode n = libjson::parse(json_str); + walk(n); + + return 0; +} + diff --git a/test_msgpack.cc b/test_msgpack.cc new file mode 100644 index 0000000..1204b05 --- /dev/null +++ b/test_msgpack.cc @@ -0,0 +1,83 @@ +#include +#include +#include + +/* + * http://msgpack.org/ + * + */ +#include +#include +#include + +using namespace std; + + +struct Node { + int id; + string cat; + vector span; + + MSGPACK_DEFINE(id, cat, span); +}; + +struct Vector { + double CountEF; + double EgivenFCoherent; + double Glue; + double IsSingletonF; + double IsSingletonFE; + double LanguageModel; + double LanguageModel_OOV; + double MaxLexFgivenE; + double MaxLexEgivenF; + double PassThrough; + double PassThrough_1; + double PassThrough_2; + double PassThrough_3; + double PassThrough_4; + double PassThrough_5; + double PassThrough_6; + double SampleCountF; + double WordPenalty; + + MSGPACK_DEFINE(CountEF, EgivenFCoherent, Glue, IsSingletonF, IsSingletonFE, LanguageModel, LanguageModel_OOV, MaxLexEgivenF, MaxLexFgivenE, PassThrough, PassThrough_1, PassThrough_2, PassThrough_3, PassThrough_4, PassThrough_5, PassThrough_6, SampleCountF, WordPenalty); +}; + +struct Edge { + int head; + string rule; + vector tails; + Vector f; + double weight; + + MSGPACK_DEFINE(head, rule, tails, f, weight); +}; + +struct Hg { + Vector weights; + vector nodes; + vector edges; + vector rules; + + MSGPACK_DEFINE(weights, nodes, edges, rules); +}; + +int +main(int argc, char** argv) +{ + ifstream ifs(argv[1]); + string str((istreambuf_iterator(ifs)), + (istreambuf_iterator())); + + msgpack::zone zone; + msgpack::object obj; + msgpack::unpack(str.data(), str.size(), NULL, &zone, &obj); + Hg hg; + obj.convert(&hg); + Edge last_edge = hg.edges.back(); + cerr << last_edge.rule.substr(1, 4) << endl; + + return 0; +} + diff --git a/test_msgpack_ruby b/test_msgpack_ruby new file mode 100755 index 0000000..0f2d387 --- /dev/null +++ b/test_msgpack_ruby @@ -0,0 +1,9 @@ +#!/usr/bin/env ruby + +require 'msgpack' + + +msg = MessagePack.unpack(File.new(ARGV[0]).read) + +STDERR.write "#{msg["edges"].last()["rule"][1..4]}\n" + diff --git a/test_nosjob.cc b/test_nosjob.cc new file mode 100644 index 0000000..cf8891f --- /dev/null +++ b/test_nosjob.cc @@ -0,0 +1,32 @@ +#include +#include +#include + +/* + * http://fossil.wanderinghorse.net/repos/nosjob/index.cgi/index + * + */ +#include "nosjob-e1d67401fcda6e05/include/wh/nosjob/nosjob.hpp" + +using namespace std; + + +int +main(int argc, char** argv) +{ + ifstream ifs(argv[1]); + string json_str((istreambuf_iterator(ifs)), + (istreambuf_iterator())); + + nosjob::Atom root = nosjob::JsonParser().parse(json_str); + nosjob::Object o = nosjob::Object::cast(root); + nosjob::Atom edges = o.get(nosjob::Utf8String("edges")); + nosjob::Array a = nosjob::Array::cast(edges); + nosjob::Object last_edge = nosjob::Object::cast(a.get(a.size()-1)); + nosjob::Utf8String s = nosjob::Utf8String::cast(last_edge.get(nosjob::Utf8String("rule"))); + string t((char*)s.c_str()); + cerr << t.substr(1, 4) << endl; + + return 0; +} + diff --git a/test_picojson.cc b/test_picojson.cc new file mode 100644 index 0000000..cf3b621 --- /dev/null +++ b/test_picojson.cc @@ -0,0 +1,32 @@ +#include +#include +#include +#include + +/* + * https://github.com/kazuho/picojson + * + */ +#include "picojson/picojson.h" + +using namespace std; + + +int +main(int argc, char** argv) +{ + ifstream ifs(argv[1]); + string json_str((istreambuf_iterator(ifs)), + (istreambuf_iterator())); + + picojson::value v; + istringstream iss(json_str); + picojson::parse(v, iss); + picojson::value::object& obj = v.get(); + picojson::value::object& last_edge = obj["edges"].get().back().get(); + string s(last_edge["rule"].get()); + cerr << s.substr(1, 4) << endl; + + return 0; +} + diff --git a/test_rapidjson.cc b/test_rapidjson.cc new file mode 100644 index 0000000..b344ed0 --- /dev/null +++ b/test_rapidjson.cc @@ -0,0 +1,31 @@ +#include +#include +#include +#include + +/* + * https://github.com/miloyip/rapidjson + * + */ +#include "rapidjson/include/rapidjson/rapidjson.h" +#include "rapidjson/include/rapidjson/document.h" +#include "rapidjson/include/rapidjson/stringbuffer.h" + +using namespace std; + + +int +main(int argc, char** argv) +{ + ifstream ifs(argv[1]); + string json_str((istreambuf_iterator(ifs)), + (istreambuf_iterator())); + + rapidjson::Document d; + d.Parse(json_str.c_str()); + string s(d["edges"][d["edges"].Size()-1]["rule"].GetString()); + cerr << s.substr(1, 4) << endl; + + return 0; +} + diff --git a/test_sajson.cc b/test_sajson.cc new file mode 100644 index 0000000..4081d43 --- /dev/null +++ b/test_sajson.cc @@ -0,0 +1,32 @@ +#include +#include +#include +#include + +/* + * https://github.com/chadaustin/sajson + * + */ +#include "sajson/include/sajson.h" + +using namespace std; + + +int +main(int argc, char** argv) +{ + ifstream ifs(argv[1]); + string json_str((istreambuf_iterator(ifs)), + (istreambuf_iterator())); + + const sajson::document& document = sajson::parse(sajson::literal(json_str.c_str())); + size_t index_a = document.get_root().find_object_key(sajson::literal("edges")); + const sajson::value& edges = document.get_root().get_object_value(index_a); + const sajson::value& last_edge = edges.get_array_element(edges.get_length()-1); + size_t index_r = last_edge.find_object_key(sajson::literal("rule")); + const sajson::value& r = last_edge.get_object_value(index_r); + cerr << r.as_string().substr(1, 4) << endl; + + return 0; +} + -- cgit v1.2.3