summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore28
-rw-r--r--LICENSE2
-rw-r--r--Makefile51
-rw-r--r--README.md236
-rwxr-xr-xbenchmark.rb9
-rw-r--r--cdec_json_parser/JSON_parser.c1012
-rw-r--r--cdec_json_parser/JSON_parser.h152
-rw-r--r--cdec_json_parser/LICENSE213
-rw-r--r--cdec_json_parser/Makefile7
-rw-r--r--cdec_json_parser/json_parse.cc31
-rw-r--r--cdec_json_parser/json_parse.h62
-rw-r--r--data/Makefile6
-rw-r--r--data/cdec.ini4
-rwxr-xr-xdata/make.sh8
-rw-r--r--data/make_paks.cc126
-rwxr-xr-xdata/to_ascii.rb13
-rw-r--r--data/weights.init12
-rwxr-xr-xmemusg.sh13
-rwxr-xr-xrun.sh43
-rwxr-xr-xrun_msgpack.sh33
-rw-r--r--test_JsonBox.cc24
-rw-r--r--test_MicroJSON.cc28
-rwxr-xr-xtest_MicroJSON.sh5
-rw-r--r--test_cdec_json_parser.cc25
-rw-r--r--test_gason.cc71
-rw-r--r--test_json-cpp.cc100
-rw-r--r--test_jsoncpp.cc29
-rw-r--r--test_jsonxx.cc35
-rw-r--r--test_libjson.cc44
-rw-r--r--test_msgpack.cc83
-rwxr-xr-xtest_msgpack_ruby9
-rw-r--r--test_nosjob.cc32
-rw-r--r--test_picojson.cc32
-rw-r--r--test_rapidjson.cc31
-rw-r--r--test_sajson.cc32
35 files changed, 2641 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..e6c5173
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,28 @@
+JsonBox/
+MicroJSON*/
+gason/
+json-cpp.hpp
+jsoncpp/
+jsonxx/
+libjson/
+nosjob*/
+picojson/
+proto_map/
+rapidjson/
+sajson/
+msgpack-c/
+*.o
+test_JsonBox
+test_MicroJSON
+test_cdec_json_parser
+test_gason
+test_json-cpp
+test_jsoncpp
+test_jsonxx
+test_libjson
+test_nosjob
+test_picojson
+test_rapidjson
+test_sajson
+test_msgpack
+data/make_paks
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..df23347
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,2 @@
+This is public domain.
+
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..bf290a4
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,51 @@
+COMPILER := g++
+CXXFLAGS := -O3 -march=native -mtune=native
+
+
+all: test_gason test_json-cpp test_jsoncpp test_libjson test_picojson test_rapidjson test_sajson test_JsonBox test_jsonxx test_MicroJSON test_nosjob test_cdec_json_parser
+
+test_gason: test_gason.cc
+ $(COMPILER) $(CXXFLAGS) -std=c++11 test_gason.cc -o test_gason gason/gason.o
+
+test_json-cpp: test_json-cpp.cc
+ $(COMPILER) $(CXXFLAGS) -std=c++11 test_json-cpp.cc -o test_json-cpp
+
+test_jsoncpp: test_jsoncpp.cc
+ $(COMPILER) $(CXXFLAGS) test_jsoncpp.cc jsoncpp/lib/libjsoncpp.a -o test_jsoncpp
+
+test_libjson: test_libjson.cc
+ $(COMPILER) $(CXXFLAGS) test_libjson.cc libjson/libjson.a -o test_libjson
+
+test_picojson: test_picojson.cc
+ $(COMPILER) $(CXXFLAGS) test_picojson.cc -o test_picojson
+
+test_rapidjson: test_rapidjson.cc
+ $(COMPILER) $(CXXFLAGS) test_rapidjson.cc -o test_rapidjson
+
+test_sajson: test_sajson.cc
+ $(COMPILER) $(CXXFLAGS) test_sajson.cc -o test_sajson
+
+test_JsonBox: test_JsonBox.cc
+ $(COMPILER) $(CXXFLAGS) test_JsonBox.cc -I./JsonBox/include/ JsonBox/libJsonBox.a -o test_JsonBox
+
+test_jsonxx: test_jsonxx.cc
+ $(COMPILER) $(CXXFLAGS) test_jsonxx.cc jsonxx/jsonxx.o -o test_jsonxx
+
+test_MicroJSON: test_MicroJSON.cc
+ #$(COMPILER) $(CXXFLAGS) test_MicroJSON.cc MicroJSON-0.3.2/libMicroJSON-0.so -lUTF8Strings-1 -o test_MicroJSON
+
+test_nosjob: test_nosjob.cc
+ $(COMPILER) $(CXXFLAGS) test_nosjob.cc nosjob-e1d67401fcda6e05/libnosjob.a -o test_nosjob
+
+test_cdec_json_parser: test_cdec_json_parser.cc
+ $(COMPILER) $(CXXFLAGS) test_cdec_json_parser.cc cdec_json_parser/json_parse.o cdec_json_parser/JSON_parser.o -o test_cdec_json_parser
+
+test_msgpack: test_msgpack.cc
+ $(COMPILER) $(CXXFLAGS) test_msgpack.cc -I./msgpack-c/include/ ./msgpack-c/lib/libmsgpack.a -o test_msgpack
+
+clean:
+ rm -f test_gason test_json-cpp test_jsoncpp test_libjson
+ rm -f test_picojson test_rapidjson test_sajson test_JsonBox
+ rm -f test_jsonxx test_MicroJSON test_nosjob test_cdec_json_parser
+ rm -f test_msgpack
+
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..fa41339
--- /dev/null
+++ b/README.md
@@ -0,0 +1,236 @@
+Serializer Benchmark
+====================
+
+Comparing parsing speed/memory usage of all C++ JSON libs I could find. Also including two msgpack implementations (C++/ruby).
+Goal is to output data.edges.last.rule.substr(1, 4).
+Data is fairly large and complex object (a hypergraph representation) with a lot of different types, e.g. strings (ASCII), ints, floats, arrays
+and sub-objects.
+Note that the comparison is unfair for some parsers, as they just do SAX-style parsing and do not actually fill objects
+with data (e.g. the cdec parser).
+
+* cdec-json-parser: ripped out of [1].
+* gason: git clone https://github.com/vivkin/gason.git
+* JsonBox: git clone https://github.com/anhero/JsonBox.git
+* jsoncpp: git clone https://github.com/open-source-parsers/jsoncpp.git
+* json-cpp: wget "https://raw.githubusercontent.com/ascheglov/json-cpp/master/single_include/json-cpp.hpp"
+* jsonxx: git clone https://github.com/hjiang/jsonxx.git
+* libjson: wget "http://downloads.sourceforge.net/project/libjson/libjson_7.6.1.zip?r=&ts=1405248411&use_mirror=heanet"
+* MicroJSON: wget http://grigory.info/distfiles/MicroJSON-0.3.2.tar.bz2
+* msgpack-c: git clone https://github.com/msgpack/msgpack-c.git
+* msgpack-ruby: gem install msgpack
+* nosjob: go to [2] and figure out how to download a tarball
+* picojson: git clone https://github.com/kazuho/picojson.git
+* rapidjson: git clone https://github.com/miloyip/rapidjson.git
+* sajson: git clone https://github.com/chadaustin/sajson.git
+
+You'll also need root privileges to clear the disk caches. Assumes these scripts [3]
+are in the PATH.
+
+Versions:
+---------
+* cdec-json-parser: SHA-1 d124d4aaa78b52b46f7ac8d7306be342d3405124
+* gason: SHA-1 ede29fc5f0de8e47fd82c09f2f98123d2c867f28
+* JsonBox: SHA-1 fcb82ebae41dffb90d32a49ac236d1608d9a67ee
+* jsoncpp: SHA-1 655a9db0cc62394e81d3074a98c7191fbfc00259
+* json-cpp: SHA-1 170121e2dc099895064305e38bfb25d90a807ce3
+* libjson: version 7.6.1
+* MicroJSON: version 0.3.2
+* msgpack-c: SHA-1 197ed8c983a70d5892bf73dcd1a352bf8e2588df
+* msgpack-ruby: version 0.5.8
+* nosjob: SHA-1 e1d67401fcda6e05a536272532bdb9770bec27e8
+* picojson: SHA-1 5e71db9bec7f22a041cd251c6d6d67e954396d5d
+* rapidjson: SHA-1 63d054349ab56d278060cd3373e76a6933cf194a
+* sajson: SHA-1 003988269f1774dfb184e1864f2f4e654965581e
+
+
+[1] https://github.com/redpony/cdec/tree/master/decoder
+[2] http://fossil.wanderinghorse.net/repos/nosjob/index.cgi/index
+[3] https://github.com/pks/scripts
+
+
+Results
+=======
+
+On my machine (Lenovo X61s) which has an SSD:
+Linux x 3.12.23 #1 SMP PREEMPT Fri Jul 4 15:09:43 CEST 2014 x86_64 Intel(R) Core(TM)2 Duo CPU L7500 @ 1.60GHz GenuineIntel GNU/Linux
+
+Spoiler: sajson and rapidjson are the fastest JSON parsers -- but msgpack is even faster.
+
+JSON parsing benchmark
+----------------------
+ REAPEAT=10
+
+[test_cdec_json_parser]
+data/1020.json: 8.81 s
+data/1570.json: 3.07 s
+data/1391.json: 1.99 s
+data/429.json: 0.6 s
+data/2002.json: 0.32 s
+data/1889.json: 0.07 s
+data/1495.json: 0.01 s
+data/748.json: 0.0 s
+---
+overall: 1.84 s
+ memory: 1 m
+
+[test_gason]
+data/1020.json: 4.34 s
+data/1570.json: 1.52 s
+data/1391.json: 1.05 s
+data/429.json: 0.29 s
+data/2002.json: 0.16 s
+data/1889.json: 0.03 s
+data/1495.json: 0.01 s
+data/748.json: 0.01 s
+---
+overall: 0.91 s
+ memory: 389 m
+
+[test_JsonBox]
+data/1020.json: 36.15 s
+data/1570.json: 11.91 s
+data/1391.json: 8.25 s
+data/429.json: 2.3 s
+data/2002.json: 1.21 s
+data/1889.json: 0.24 s
+data/1495.json: 0.02 s
+data/748.json: 0.0 s
+---
+overall: 7.42 s
+ memory: 901 m
+
+[test_jsoncpp]
+data/1020.json: 9.59 s
+data/1570.json: 3.32 s
+data/1391.json: 2.19 s
+data/429.json: 0.64 s
+data/2002.json: 0.34 s
+data/1889.json: 0.07 s
+data/1495.json: 0.01 s
+data/748.json: 0.01 s
+---
+overall: 2.0 s
+ memory: 804 m
+
+[test_json-cpp]
+data/1020.json: 4.32 s
+data/1570.json: 1.44 s
+data/1391.json: 0.99 s
+data/429.json: 0.28 s
+data/2002.json: 0.15 s
+data/1889.json: 0.03 s
+data/1495.json: 0.01 s
+data/748.json: 0.0 s
+---
+overall: 0.89 s
+ memory: 263 m
+
+[test_jsonxx]
+data/1020.json: 36.85 s
+data/1570.json: 12.86 s
+data/1391.json: 8.36 s
+data/429.json: 2.4 s
+data/2002.json: 1.29 s
+data/1889.json: 0.26 s
+data/1495.json: 0.01 s
+data/748.json: 0.0 s
+---
+overall: 7.66 s
+ memory: 1440 m
+
+[test_libjson]
+data/1020.json: 13.09 s
+data/1570.json: 4.51 s
+data/1391.json: 3.0 s
+data/429.json: 0.86 s
+data/2002.json: 0.46 s
+data/1889.json: 0.09 s
+data/1495.json: 0.01 s
+data/748.json: 0.0 s
+---
+overall: 2.72 s
+ memory: 1649 m
+
+[test_nosjob]
+data/1020.json: 17.64 s
+data/1570.json: 6.18 s
+data/1391.json: 4.09 s
+data/429.json: 1.16 s
+data/2002.json: 0.62 s
+data/1889.json: 0.13 s
+data/1495.json: 0.01 s
+data/748.json: 0.0 s
+---
+overall: 3.68 s
+ memory: 931 m
+
+[test_picojson]
+data/1020.json: 17.35 s
+data/1570.json: 5.51 s
+data/1391.json: 3.97 s
+data/429.json: 1.07 s
+data/2002.json: 0.55 s
+data/1889.json: 0.11 s
+data/1495.json: 0.01 s
+data/748.json: 0.01 s
+---
+overall: 3.53 s
+ memory: 1049 m
+
+[test_rapidjson]
+data/1020.json: 3.27 s
+data/1570.json: 1.08 s
+data/1391.json: 0.75 s
+data/429.json: 0.21 s
+data/2002.json: 0.11 s
+data/1889.json: 0.03 s
+data/1495.json: 0.01 s
+data/748.json: 0.0 s
+---
+overall: 0.67 s
+ memory: 415 m
+
+[test_sajson]
+data/1020.json: 2.94 s
+data/1570.json: 0.97 s
+data/1391.json: 0.66 s
+data/429.json: 0.19 s
+data/2002.json: 0.1 s
+data/1889.json: 0.02 s
+data/1495.json: 0.0 s
+data/748.json: 0.0 s
+---
+overall: 0.6 s
+ memory: 293 m
+
+
+MSGPACK parsing benchmark
+-------------------------
+ REAPEAT=10
+
+[test_msgpack]
+data/1020.pak: 2.2 s
+data/1570.pak: 0.8 s
+data/1391.pak: 0.5 s
+data/429.pak: 0.15 s
+data/2002.pak: 0.09 s
+data/1889.pak: 0.02 s
+data/1495.pak: 0.0 s
+data/748.pak: 0.0 s
+---
+overall: 0.47 s
+ memory: 451 m
+
+[test_msgpack_ruby]
+data/1020.pak: 1.91 s
+data/1570.pak: 0.76 s
+data/1391.pak: 0.52 s
+data/429.pak: 0.23 s
+data/2002.pak: 0.19 s
+data/1889.pak: 0.14 s
+data/1495.pak: 0.13 s
+data/748.pak: 0.13 s
+---
+overall: 0.5 s
+ memory: 216 m
+
diff --git a/benchmark.rb b/benchmark.rb
new file mode 100755
index 0000000..81e11f8
--- /dev/null
+++ b/benchmark.rb
@@ -0,0 +1,9 @@
+#!/usr/bin/env ruby
+
+
+ARGV[0].to_i.times {
+ start = Time.now
+ `#{ARGV[1]} #{ARGV[2]}`
+ puts Time.now-start
+}
+
diff --git a/cdec_json_parser/JSON_parser.c b/cdec_json_parser/JSON_parser.c
new file mode 100644
index 0000000..5e392bc
--- /dev/null
+++ b/cdec_json_parser/JSON_parser.c
@@ -0,0 +1,1012 @@
+/* JSON_parser.c */
+
+/* 2007-08-24 */
+
+/*
+Copyright (c) 2005 JSON.org
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+The Software shall be used for Good, not Evil.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*/
+
+/*
+ Callbacks, comments, Unicode handling by Jean Gressmann (jean@0x42.de), 2007-2009.
+
+ For the added features the license above applies also.
+
+ Changelog:
+ 2009-05-17
+ Incorporated benrudiak@googlemail.com fix for UTF16 decoding.
+
+ 2009-05-14
+ Fixed float parsing bug related to a locale being set that didn't
+ use '.' as decimal point character (charles@transmissionbt.com).
+
+ 2008-10-14
+ Renamed states.IN to states.IT to avoid name clash which IN macro
+ defined in windef.h (alexey.pelykh@gmail.com)
+
+ 2008-07-19
+ Removed some duplicate code & debugging variable (charles@transmissionbt.com)
+
+ 2008-05-28
+ Made JSON_value structure ansi C compliant. This bug was report by
+ trisk@acm.jhu.edu
+
+ 2008-05-20
+ Fixed bug reported by charles@transmissionbt.com where the switching
+ from static to dynamic parse buffer did not copy the static parse
+ buffer's content.
+*/
+
+
+
+#include <assert.h>
+#include <ctype.h>
+#include <float.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <locale.h>
+
+#include "JSON_parser.h"
+
+#ifdef _MSC_VER
+# if _MSC_VER >= 1400 /* Visual Studio 2005 and up */
+# pragma warning(disable:4996) // unsecure sscanf
+# endif
+#endif
+
+
+#define true 1
+#define false 0
+#define __ -1 /* the universal error code */
+
+/* values chosen so that the object size is approx equal to one page (4K) */
+#ifndef JSON_PARSER_STACK_SIZE
+# define JSON_PARSER_STACK_SIZE 128
+#endif
+
+#ifndef JSON_PARSER_PARSE_BUFFER_SIZE
+# define JSON_PARSER_PARSE_BUFFER_SIZE 3500
+#endif
+
+typedef unsigned short UTF16;
+
+struct JSON_parser_struct {
+ JSON_parser_callback callback;
+ void* ctx;
+ signed char state, before_comment_state, type, escaped, comment, allow_comments, handle_floats_manually;
+ UTF16 utf16_high_surrogate;
+ long depth;
+ long top;
+ signed char* stack;
+ long stack_capacity;
+ char decimal_point;
+ char* parse_buffer;
+ size_t parse_buffer_capacity;
+ size_t parse_buffer_count;
+ size_t comment_begin_offset;
+ signed char static_stack[JSON_PARSER_STACK_SIZE];
+ char static_parse_buffer[JSON_PARSER_PARSE_BUFFER_SIZE];
+};
+
+#define COUNTOF(x) (sizeof(x)/sizeof(x[0]))
+
+/*
+ Characters are mapped into these character classes. This allows for
+ a significant reduction in the size of the state transition table.
+*/
+
+
+
+enum classes {
+ C_SPACE, /* space */
+ C_WHITE, /* other whitespace */
+ C_LCURB, /* { */
+ C_RCURB, /* } */
+ C_LSQRB, /* [ */
+ C_RSQRB, /* ] */
+ C_COLON, /* : */
+ C_COMMA, /* , */
+ C_QUOTE, /* " */
+ C_BACKS, /* \ */
+ C_SLASH, /* / */
+ C_PLUS, /* + */
+ C_MINUS, /* - */
+ C_POINT, /* . */
+ C_ZERO , /* 0 */
+ C_DIGIT, /* 123456789 */
+ C_LOW_A, /* a */
+ C_LOW_B, /* b */
+ C_LOW_C, /* c */
+ C_LOW_D, /* d */
+ C_LOW_E, /* e */
+ C_LOW_F, /* f */
+ C_LOW_L, /* l */
+ C_LOW_N, /* n */
+ C_LOW_R, /* r */
+ C_LOW_S, /* s */
+ C_LOW_T, /* t */
+ C_LOW_U, /* u */
+ C_ABCDF, /* ABCDF */
+ C_E, /* E */
+ C_ETC, /* everything else */
+ C_STAR, /* * */
+ NR_CLASSES
+};
+
+static int ascii_class[128] = {
+/*
+ This array maps the 128 ASCII characters into character classes.
+ The remaining Unicode characters should be mapped to C_ETC.
+ Non-whitespace control characters are errors.
+*/
+ __, __, __, __, __, __, __, __,
+ __, C_WHITE, C_WHITE, __, __, C_WHITE, __, __,
+ __, __, __, __, __, __, __, __,
+ __, __, __, __, __, __, __, __,
+
+ C_SPACE, C_ETC, C_QUOTE, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
+ C_ETC, C_ETC, C_STAR, C_PLUS, C_COMMA, C_MINUS, C_POINT, C_SLASH,
+ C_ZERO, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT,
+ C_DIGIT, C_DIGIT, C_COLON, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
+
+ C_ETC, C_ABCDF, C_ABCDF, C_ABCDF, C_ABCDF, C_E, C_ABCDF, C_ETC,
+ C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
+ C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
+ C_ETC, C_ETC, C_ETC, C_LSQRB, C_BACKS, C_RSQRB, C_ETC, C_ETC,
+
+ C_ETC, C_LOW_A, C_LOW_B, C_LOW_C, C_LOW_D, C_LOW_E, C_LOW_F, C_ETC,
+ C_ETC, C_ETC, C_ETC, C_ETC, C_LOW_L, C_ETC, C_LOW_N, C_ETC,
+ C_ETC, C_ETC, C_LOW_R, C_LOW_S, C_LOW_T, C_LOW_U, C_ETC, C_ETC,
+ C_ETC, C_ETC, C_ETC, C_LCURB, C_ETC, C_RCURB, C_ETC, C_ETC
+};
+
+
+/*
+ The state codes.
+*/
+enum states {
+ GO, /* start */
+ OK, /* ok */
+ OB, /* object */
+ KE, /* key */
+ CO, /* colon */
+ VA, /* value */
+ AR, /* array */
+ ST, /* string */
+ ES, /* escape */
+ U1, /* u1 */
+ U2, /* u2 */
+ U3, /* u3 */
+ U4, /* u4 */
+ MI, /* minus */
+ ZE, /* zero */
+ IT, /* integer */
+ FR, /* fraction */
+ E1, /* e */
+ E2, /* ex */
+ E3, /* exp */
+ T1, /* tr */
+ T2, /* tru */
+ T3, /* true */
+ F1, /* fa */
+ F2, /* fal */
+ F3, /* fals */
+ F4, /* false */
+ N1, /* nu */
+ N2, /* nul */
+ N3, /* null */
+ C1, /* / */
+ C2, /* / * */
+ C3, /* * */
+ FX, /* *.* *eE* */
+ D1, /* second UTF-16 character decoding started by \ */
+ D2, /* second UTF-16 character proceeded by u */
+ NR_STATES
+};
+
+enum actions
+{
+ CB = -10, /* comment begin */
+ CE = -11, /* comment end */
+ FA = -12, /* false */
+ TR = -13, /* false */
+ NU = -14, /* null */
+ DE = -15, /* double detected by exponent e E */
+ DF = -16, /* double detected by fraction . */
+ SB = -17, /* string begin */
+ MX = -18, /* integer detected by minus */
+ ZX = -19, /* integer detected by zero */
+ IX = -20, /* integer detected by 1-9 */
+ EX = -21, /* next char is escaped */
+ UC = -22 /* Unicode character read */
+};
+
+
+static int state_transition_table[NR_STATES][NR_CLASSES] = {
+/*
+ The state transition table takes the current state and the current symbol,
+ and returns either a new state or an action. An action is represented as a
+ negative number. A JSON text is accepted if at the end of the text the
+ state is OK and if the mode is MODE_DONE.
+
+ white 1-9 ABCDF etc
+ space | { } [ ] : , " \ / + - . 0 | a b c d e f l n r s t u | E | * */
+/*start GO*/ {GO,GO,-6,__,-5,__,__,__,__,__,CB,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
+/*ok OK*/ {OK,OK,__,-8,__,-7,__,-3,__,__,CB,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
+/*object OB*/ {OB,OB,__,-9,__,__,__,__,SB,__,CB,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
+/*key KE*/ {KE,KE,__,__,__,__,__,__,SB,__,CB,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
+/*colon CO*/ {CO,CO,__,__,__,__,-2,__,__,__,CB,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
+/*value VA*/ {VA,VA,-6,__,-5,__,__,__,SB,__,CB,__,MX,__,ZX,IX,__,__,__,__,__,FA,__,NU,__,__,TR,__,__,__,__,__},
+/*array AR*/ {AR,AR,-6,__,-5,-7,__,__,SB,__,CB,__,MX,__,ZX,IX,__,__,__,__,__,FA,__,NU,__,__,TR,__,__,__,__,__},
+/*string ST*/ {ST,__,ST,ST,ST,ST,ST,ST,-4,EX,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST},
+/*escape ES*/ {__,__,__,__,__,__,__,__,ST,ST,ST,__,__,__,__,__,__,ST,__,__,__,ST,__,ST,ST,__,ST,U1,__,__,__,__},
+/*u1 U1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,U2,U2,U2,U2,U2,U2,U2,U2,__,__,__,__,__,__,U2,U2,__,__},
+/*u2 U2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,U3,U3,U3,U3,U3,U3,U3,U3,__,__,__,__,__,__,U3,U3,__,__},
+/*u3 U3*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,U4,U4,U4,U4,U4,U4,U4,U4,__,__,__,__,__,__,U4,U4,__,__},
+/*u4 U4*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,UC,UC,UC,UC,UC,UC,UC,UC,__,__,__,__,__,__,UC,UC,__,__},
+/*minus MI*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,ZE,IT,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
+/*zero ZE*/ {OK,OK,__,-8,__,-7,__,-3,__,__,CB,__,__,DF,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
+/*int IT*/ {OK,OK,__,-8,__,-7,__,-3,__,__,CB,__,__,DF,IT,IT,__,__,__,__,DE,__,__,__,__,__,__,__,__,DE,__,__},
+/*frac FR*/ {OK,OK,__,-8,__,-7,__,-3,__,__,CB,__,__,__,FR,FR,__,__,__,__,E1,__,__,__,__,__,__,__,__,E1,__,__},
+/*e E1*/ {__,__,__,__,__,__,__,__,__,__,__,E2,E2,__,E3,E3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
+/*ex E2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,E3,E3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
+/*exp E3*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,__,E3,E3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
+/*tr T1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,T2,__,__,__,__,__,__,__},
+/*tru T2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,T3,__,__,__,__},
+/*true T3*/ {__,__,__,__,__,__,__,__,__,__,CB,__,__,__,__,__,__,__,__,__,OK,__,__,__,__,__,__,__,__,__,__,__},
+/*fa F1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F2,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
+/*fal F2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F3,__,__,__,__,__,__,__,__,__},
+/*fals F3*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F4,__,__,__,__,__,__},
+/*false F4*/ {__,__,__,__,__,__,__,__,__,__,CB,__,__,__,__,__,__,__,__,__,OK,__,__,__,__,__,__,__,__,__,__,__},
+/*nu N1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,N2,__,__,__,__},
+/*nul N2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,N3,__,__,__,__,__,__,__,__,__},
+/*null N3*/ {__,__,__,__,__,__,__,__,__,__,CB,__,__,__,__,__,__,__,__,__,__,__,OK,__,__,__,__,__,__,__,__,__},
+/*/ C1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,C2},
+/*/* C2*/ {C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C3},
+/** C3*/ {C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,CE,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C3},
+/*_. FX*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,__,FR,FR,__,__,__,__,E1,__,__,__,__,__,__,__,__,E1,__,__},
+/*\ D1*/ {__,__,__,__,__,__,__,__,__,D2,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
+/*\ D2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,U1,__,__,__,__},
+};
+
+
+/*
+ These modes can be pushed on the stack.
+*/
+enum modes {
+ MODE_ARRAY = 1,
+ MODE_DONE = 2,
+ MODE_KEY = 3,
+ MODE_OBJECT = 4
+};
+
+static int
+push(JSON_parser jc, int mode)
+{
+/*
+ Push a mode onto the stack. Return false if there is overflow.
+*/
+ jc->top += 1;
+ if (jc->depth < 0) {
+ if (jc->top >= jc->stack_capacity) {
+ size_t bytes_to_allocate;
+ jc->stack_capacity *= 2;
+ bytes_to_allocate = jc->stack_capacity * sizeof(jc->static_stack[0]);
+ if (jc->stack == &jc->static_stack[0]) {
+ jc->stack = (signed char*)malloc(bytes_to_allocate);
+ memcpy(jc->stack, jc->static_stack, sizeof(jc->static_stack));
+ } else {
+ jc->stack = (signed char*)realloc(jc->stack, bytes_to_allocate);
+ }
+ }
+ } else {
+ if (jc->top >= jc->depth) {
+ return false;
+ }
+ }
+
+ jc->stack[jc->top] = mode;
+ return true;
+}
+
+
+static int
+pop(JSON_parser jc, int mode)
+{
+/*
+ Pop the stack, assuring that the current mode matches the expectation.
+ Return false if there is underflow or if the modes mismatch.
+*/
+ if (jc->top < 0 || jc->stack[jc->top] != mode) {
+ return false;
+ }
+ jc->top -= 1;
+ return true;
+}
+
+
+#define parse_buffer_clear(jc) \
+ do {\
+ jc->parse_buffer_count = 0;\
+ jc->parse_buffer[0] = 0;\
+ } while (0)
+
+#define parse_buffer_pop_back_char(jc)\
+ do {\
+ assert(jc->parse_buffer_count >= 1);\
+ --jc->parse_buffer_count;\
+ jc->parse_buffer[jc->parse_buffer_count] = 0;\
+ } while (0)
+
+void delete_JSON_parser(JSON_parser jc)
+{
+ if (jc) {
+ if (jc->stack != &jc->static_stack[0]) {
+ free((void*)jc->stack);
+ }
+ if (jc->parse_buffer != &jc->static_parse_buffer[0]) {
+ free((void*)jc->parse_buffer);
+ }
+ free((void*)jc);
+ }
+}
+
+
+JSON_parser
+new_JSON_parser(JSON_config* config)
+{
+/*
+ new_JSON_parser starts the checking process by constructing a JSON_parser
+ object. It takes a depth parameter that restricts the level of maximum
+ nesting.
+
+ To continue the process, call JSON_parser_char for each character in the
+ JSON text, and then call JSON_parser_done to obtain the final result.
+ These functions are fully reentrant.
+*/
+
+ int depth = 0;
+ JSON_config default_config;
+
+ JSON_parser jc = (JSON_parser)malloc(sizeof(struct JSON_parser_struct));
+
+ memset(jc, 0, sizeof(*jc));
+
+
+ /* initialize configuration */
+ init_JSON_config(&default_config);
+
+ /* set to default configuration if none was provided */
+ if (config == NULL) {
+ config = &default_config;
+ }
+
+ depth = config->depth;
+
+ /* We need to be able to push at least one object */
+ if (depth == 0) {
+ depth = 1;
+ }
+
+ jc->state = GO;
+ jc->top = -1;
+
+ /* Do we want non-bound stack? */
+ if (depth > 0) {
+ jc->stack_capacity = depth;
+ jc->depth = depth;
+ if (depth <= (int)COUNTOF(jc->static_stack)) {
+ jc->stack = &jc->static_stack[0];
+ } else {
+ jc->stack = (signed char*)malloc(jc->stack_capacity * sizeof(jc->static_stack[0]));
+ }
+ } else {
+ jc->stack_capacity = COUNTOF(jc->static_stack);
+ jc->depth = -1;
+ jc->stack = &jc->static_stack[0];
+ }
+
+ /* set parser to start */
+ push(jc, MODE_DONE);
+
+ /* set up the parse buffer */
+ jc->parse_buffer = &jc->static_parse_buffer[0];
+ jc->parse_buffer_capacity = COUNTOF(jc->static_parse_buffer);
+ parse_buffer_clear(jc);
+
+ /* set up callback, comment & float handling */
+ jc->callback = config->callback;
+ jc->ctx = config->callback_ctx;
+ jc->allow_comments = config->allow_comments != 0;
+ jc->handle_floats_manually = config->handle_floats_manually != 0;
+
+ /* set up decimal point */
+ jc->decimal_point = *localeconv()->decimal_point;
+
+ return jc;
+}
+
+static void grow_parse_buffer(JSON_parser jc)
+{
+ size_t bytes_to_allocate;
+ jc->parse_buffer_capacity *= 2;
+ bytes_to_allocate = jc->parse_buffer_capacity * sizeof(jc->parse_buffer[0]);
+ if (jc->parse_buffer == &jc->static_parse_buffer[0]) {
+ jc->parse_buffer = (char*)malloc(bytes_to_allocate);
+ memcpy(jc->parse_buffer, jc->static_parse_buffer, jc->parse_buffer_count);
+ } else {
+ jc->parse_buffer = (char*)realloc(jc->parse_buffer, bytes_to_allocate);
+ }
+}
+
+#define parse_buffer_push_back_char(jc, c)\
+ do {\
+ if (jc->parse_buffer_count + 1 >= jc->parse_buffer_capacity) grow_parse_buffer(jc);\
+ jc->parse_buffer[jc->parse_buffer_count++] = c;\
+ jc->parse_buffer[jc->parse_buffer_count] = 0;\
+ } while (0)
+
+#define assert_is_non_container_type(jc) \
+ assert( \
+ jc->type == JSON_T_NULL || \
+ jc->type == JSON_T_FALSE || \
+ jc->type == JSON_T_TRUE || \
+ jc->type == JSON_T_FLOAT || \
+ jc->type == JSON_T_INTEGER || \
+ jc->type == JSON_T_STRING)
+
+
+static int parse_parse_buffer(JSON_parser jc)
+{
+ if (jc->callback) {
+ JSON_value value, *arg = NULL;
+
+ if (jc->type != JSON_T_NONE) {
+ assert_is_non_container_type(jc);
+
+ switch(jc->type) {
+ case JSON_T_FLOAT:
+ arg = &value;
+ if (jc->handle_floats_manually) {
+ value.vu.str.value = jc->parse_buffer;
+ value.vu.str.length = jc->parse_buffer_count;
+ } else {
+ /*sscanf(jc->parse_buffer, "%Lf", &value.vu.float_value);*/
+
+ /* not checking with end pointer b/c there may be trailing ws */
+ value.vu.float_value = strtod(jc->parse_buffer, NULL);
+ }
+ break;
+ case JSON_T_INTEGER:
+ arg = &value;
+ sscanf(jc->parse_buffer, JSON_PARSER_INTEGER_SSCANF_TOKEN, &value.vu.integer_value);
+ break;
+ case JSON_T_STRING:
+ arg = &value;
+ value.vu.str.value = jc->parse_buffer;
+ value.vu.str.length = jc->parse_buffer_count;
+ break;
+ }
+
+ if (!(*jc->callback)(jc->ctx, jc->type, arg)) {
+ return false;
+ }
+ }
+ }
+
+ parse_buffer_clear(jc);
+
+ return true;
+}
+
+#define IS_HIGH_SURROGATE(uc) (((uc) & 0xFC00) == 0xD800)
+#define IS_LOW_SURROGATE(uc) (((uc) & 0xFC00) == 0xDC00)
+#define DECODE_SURROGATE_PAIR(hi,lo) ((((hi) & 0x3FF) << 10) + ((lo) & 0x3FF) + 0x10000)
+static unsigned char utf8_lead_bits[4] = { 0x00, 0xC0, 0xE0, 0xF0 };
+
+static int decode_unicode_char(JSON_parser jc)
+{
+ int i;
+ unsigned uc = 0;
+ char* p;
+ int trail_bytes;
+
+ assert(jc->parse_buffer_count >= 6);
+
+ p = &jc->parse_buffer[jc->parse_buffer_count - 4];
+
+ for (i = 12; i >= 0; i -= 4, ++p) {
+ unsigned x = *p;
+
+ if (x >= 'a') {
+ x -= ('a' - 10);
+ } else if (x >= 'A') {
+ x -= ('A' - 10);
+ } else {
+ x &= ~0x30u;
+ }
+
+ assert(x < 16);
+
+ uc |= x << i;
+ }
+
+ /* clear UTF-16 char from buffer */
+ jc->parse_buffer_count -= 6;
+ jc->parse_buffer[jc->parse_buffer_count] = 0;
+
+ /* attempt decoding ... */
+ if (jc->utf16_high_surrogate) {
+ if (IS_LOW_SURROGATE(uc)) {
+ uc = DECODE_SURROGATE_PAIR(jc->utf16_high_surrogate, uc);
+ trail_bytes = 3;
+ jc->utf16_high_surrogate = 0;
+ } else {
+ /* high surrogate without a following low surrogate */
+ return false;
+ }
+ } else {
+ if (uc < 0x80) {
+ trail_bytes = 0;
+ } else if (uc < 0x800) {
+ trail_bytes = 1;
+ } else if (IS_HIGH_SURROGATE(uc)) {
+ /* save the high surrogate and wait for the low surrogate */
+ jc->utf16_high_surrogate = uc;
+ return true;
+ } else if (IS_LOW_SURROGATE(uc)) {
+ /* low surrogate without a preceding high surrogate */
+ return false;
+ } else {
+ trail_bytes = 2;
+ }
+ }
+
+ jc->parse_buffer[jc->parse_buffer_count++] = (char) ((uc >> (trail_bytes * 6)) | utf8_lead_bits[trail_bytes]);
+
+ for (i = trail_bytes * 6 - 6; i >= 0; i -= 6) {
+ jc->parse_buffer[jc->parse_buffer_count++] = (char) (((uc >> i) & 0x3F) | 0x80);
+ }
+
+ jc->parse_buffer[jc->parse_buffer_count] = 0;
+
+ return true;
+}
+
+static int add_escaped_char_to_parse_buffer(JSON_parser jc, int next_char)
+{
+ jc->escaped = 0;
+ /* remove the backslash */
+ parse_buffer_pop_back_char(jc);
+ switch(next_char) {
+ case 'b':
+ parse_buffer_push_back_char(jc, '\b');
+ break;
+ case 'f':
+ parse_buffer_push_back_char(jc, '\f');
+ break;
+ case 'n':
+ parse_buffer_push_back_char(jc, '\n');
+ break;
+ case 'r':
+ parse_buffer_push_back_char(jc, '\r');
+ break;
+ case 't':
+ parse_buffer_push_back_char(jc, '\t');
+ break;
+ case '"':
+ parse_buffer_push_back_char(jc, '"');
+ break;
+ case '\\':
+ parse_buffer_push_back_char(jc, '\\');
+ break;
+ case '/':
+ parse_buffer_push_back_char(jc, '/');
+ break;
+ case 'u':
+ parse_buffer_push_back_char(jc, '\\');
+ parse_buffer_push_back_char(jc, 'u');
+ break;
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+#define add_char_to_parse_buffer(jc, next_char, next_class) \
+ do { \
+ if (jc->escaped) { \
+ if (!add_escaped_char_to_parse_buffer(jc, next_char)) \
+ return false; \
+ } else if (!jc->comment) { \
+ if ((jc->type != JSON_T_NONE) | !((next_class == C_SPACE) | (next_class == C_WHITE)) /* non-white-space */) { \
+ parse_buffer_push_back_char(jc, (char)next_char); \
+ } \
+ } \
+ } while (0)
+
+
+#define assert_type_isnt_string_null_or_bool(jc) \
+ assert(jc->type != JSON_T_FALSE); \
+ assert(jc->type != JSON_T_TRUE); \
+ assert(jc->type != JSON_T_NULL); \
+ assert(jc->type != JSON_T_STRING)
+
+
+int
+JSON_parser_char(JSON_parser jc, int next_char)
+{
+/*
+ After calling new_JSON_parser, call this function for each character (or
+ partial character) in your JSON text. It can accept UTF-8, UTF-16, or
+ UTF-32. It returns true if things are looking ok so far. If it rejects the
+ text, it returns false.
+*/
+ int next_class, next_state;
+
+/*
+ Determine the character's class.
+*/
+ if (next_char < 0) {
+ return false;
+ }
+ if (next_char >= 128) {
+ next_class = C_ETC;
+ } else {
+ next_class = ascii_class[next_char];
+ if (next_class <= __) {
+ return false;
+ }
+ }
+
+ add_char_to_parse_buffer(jc, next_char, next_class);
+
+/*
+ Get the next state from the state transition table.
+*/
+ next_state = state_transition_table[jc->state][next_class];
+ if (next_state >= 0) {
+/*
+ Change the state.
+*/
+ jc->state = next_state;
+ } else {
+/*
+ Or perform one of the actions.
+*/
+ switch (next_state) {
+/* Unicode character */
+ case UC:
+ if(!decode_unicode_char(jc)) {
+ return false;
+ }
+ /* check if we need to read a second UTF-16 char */
+ if (jc->utf16_high_surrogate) {
+ jc->state = D1;
+ } else {
+ jc->state = ST;
+ }
+ break;
+/* escaped char */
+ case EX:
+ jc->escaped = 1;
+ jc->state = ES;
+ break;
+/* integer detected by minus */
+ case MX:
+ jc->type = JSON_T_INTEGER;
+ jc->state = MI;
+ break;
+/* integer detected by zero */
+ case ZX:
+ jc->type = JSON_T_INTEGER;
+ jc->state = ZE;
+ break;
+/* integer detected by 1-9 */
+ case IX:
+ jc->type = JSON_T_INTEGER;
+ jc->state = IT;
+ break;
+
+/* floating point number detected by exponent*/
+ case DE:
+ assert_type_isnt_string_null_or_bool(jc);
+ jc->type = JSON_T_FLOAT;
+ jc->state = E1;
+ break;
+
+/* floating point number detected by fraction */
+ case DF:
+ assert_type_isnt_string_null_or_bool(jc);
+ if (!jc->handle_floats_manually) {
+/*
+ Some versions of strtod (which underlies sscanf) don't support converting
+ C-locale formated floating point values.
+*/
+ assert(jc->parse_buffer[jc->parse_buffer_count-1] == '.');
+ jc->parse_buffer[jc->parse_buffer_count-1] = jc->decimal_point;
+ }
+ jc->type = JSON_T_FLOAT;
+ jc->state = FX;
+ break;
+/* string begin " */
+ case SB:
+ parse_buffer_clear(jc);
+ assert(jc->type == JSON_T_NONE);
+ jc->type = JSON_T_STRING;
+ jc->state = ST;
+ break;
+
+/* n */
+ case NU:
+ assert(jc->type == JSON_T_NONE);
+ jc->type = JSON_T_NULL;
+ jc->state = N1;
+ break;
+/* f */
+ case FA:
+ assert(jc->type == JSON_T_NONE);
+ jc->type = JSON_T_FALSE;
+ jc->state = F1;
+ break;
+/* t */
+ case TR:
+ assert(jc->type == JSON_T_NONE);
+ jc->type = JSON_T_TRUE;
+ jc->state = T1;
+ break;
+
+/* closing comment */
+ case CE:
+ jc->comment = 0;
+ assert(jc->parse_buffer_count == 0);
+ assert(jc->type == JSON_T_NONE);
+ jc->state = jc->before_comment_state;
+ break;
+
+/* opening comment */
+ case CB:
+ if (!jc->allow_comments) {
+ return false;
+ }
+ parse_buffer_pop_back_char(jc);
+ if (!parse_parse_buffer(jc)) {
+ return false;
+ }
+ assert(jc->parse_buffer_count == 0);
+ assert(jc->type != JSON_T_STRING);
+ switch (jc->stack[jc->top]) {
+ case MODE_ARRAY:
+ case MODE_OBJECT:
+ switch(jc->state) {
+ case VA:
+ case AR:
+ jc->before_comment_state = jc->state;
+ break;
+ default:
+ jc->before_comment_state = OK;
+ break;
+ }
+ break;
+ default:
+ jc->before_comment_state = jc->state;
+ break;
+ }
+ jc->type = JSON_T_NONE;
+ jc->state = C1;
+ jc->comment = 1;
+ break;
+/* empty } */
+ case -9:
+ parse_buffer_clear(jc);
+ if (jc->callback && !(*jc->callback)(jc->ctx, JSON_T_OBJECT_END, NULL)) {
+ return false;
+ }
+ if (!pop(jc, MODE_KEY)) {
+ return false;
+ }
+ jc->state = OK;
+ break;
+
+/* } */ case -8:
+ parse_buffer_pop_back_char(jc);
+ if (!parse_parse_buffer(jc)) {
+ return false;
+ }
+ if (jc->callback && !(*jc->callback)(jc->ctx, JSON_T_OBJECT_END, NULL)) {
+ return false;
+ }
+ if (!pop(jc, MODE_OBJECT)) {
+ return false;
+ }
+ jc->type = JSON_T_NONE;
+ jc->state = OK;
+ break;
+
+/* ] */ case -7:
+ parse_buffer_pop_back_char(jc);
+ if (!parse_parse_buffer(jc)) {
+ return false;
+ }
+ if (jc->callback && !(*jc->callback)(jc->ctx, JSON_T_ARRAY_END, NULL)) {
+ return false;
+ }
+ if (!pop(jc, MODE_ARRAY)) {
+ return false;
+ }
+
+ jc->type = JSON_T_NONE;
+ jc->state = OK;
+ break;
+
+/* { */ case -6:
+ parse_buffer_pop_back_char(jc);
+ if (jc->callback && !(*jc->callback)(jc->ctx, JSON_T_OBJECT_BEGIN, NULL)) {
+ return false;
+ }
+ if (!push(jc, MODE_KEY)) {
+ return false;
+ }
+ assert(jc->type == JSON_T_NONE);
+ jc->state = OB;
+ break;
+
+/* [ */ case -5:
+ parse_buffer_pop_back_char(jc);
+ if (jc->callback && !(*jc->callback)(jc->ctx, JSON_T_ARRAY_BEGIN, NULL)) {
+ return false;
+ }
+ if (!push(jc, MODE_ARRAY)) {
+ return false;
+ }
+ assert(jc->type == JSON_T_NONE);
+ jc->state = AR;
+ break;
+
+/* string end " */ case -4:
+ parse_buffer_pop_back_char(jc);
+ switch (jc->stack[jc->top]) {
+ case MODE_KEY:
+ assert(jc->type == JSON_T_STRING);
+ jc->type = JSON_T_NONE;
+ jc->state = CO;
+
+ if (jc->callback) {
+ JSON_value value;
+ value.vu.str.value = jc->parse_buffer;
+ value.vu.str.length = jc->parse_buffer_count;
+ if (!(*jc->callback)(jc->ctx, JSON_T_KEY, &value)) {
+ return false;
+ }
+ }
+ parse_buffer_clear(jc);
+ break;
+ case MODE_ARRAY:
+ case MODE_OBJECT:
+ assert(jc->type == JSON_T_STRING);
+ if (!parse_parse_buffer(jc)) {
+ return false;
+ }
+ jc->type = JSON_T_NONE;
+ jc->state = OK;
+ break;
+ default:
+ return false;
+ }
+ break;
+
+/* , */ case -3:
+ parse_buffer_pop_back_char(jc);
+ if (!parse_parse_buffer(jc)) {
+ return false;
+ }
+ switch (jc->stack[jc->top]) {
+ case MODE_OBJECT:
+/*
+ A comma causes a flip from object mode to key mode.
+*/
+ if (!pop(jc, MODE_OBJECT) || !push(jc, MODE_KEY)) {
+ return false;
+ }
+ assert(jc->type != JSON_T_STRING);
+ jc->type = JSON_T_NONE;
+ jc->state = KE;
+ break;
+ case MODE_ARRAY:
+ assert(jc->type != JSON_T_STRING);
+ jc->type = JSON_T_NONE;
+ jc->state = VA;
+ break;
+ default:
+ return false;
+ }
+ break;
+
+/* : */ case -2:
+/*
+ A colon causes a flip from key mode to object mode.
+*/
+ parse_buffer_pop_back_char(jc);
+ if (!pop(jc, MODE_KEY) || !push(jc, MODE_OBJECT)) {
+ return false;
+ }
+ assert(jc->type == JSON_T_NONE);
+ jc->state = VA;
+ break;
+/*
+ Bad action.
+*/
+ default:
+ return false;
+ }
+ }
+ return true;
+}
+
+
+int
+JSON_parser_done(JSON_parser jc)
+{
+ const int result = jc->state == OK && pop(jc, MODE_DONE);
+
+ return result;
+}
+
+
+int JSON_parser_is_legal_white_space_string(const char* s)
+{
+ int c, char_class;
+
+ if (s == NULL) {
+ return false;
+ }
+
+ for (; *s; ++s) {
+ c = *s;
+
+ if (c < 0 || c >= 128) {
+ return false;
+ }
+
+ char_class = ascii_class[c];
+
+ if (char_class != C_SPACE && char_class != C_WHITE) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+
+
+void init_JSON_config(JSON_config* config)
+{
+ if (config) {
+ memset(config, 0, sizeof(*config));
+
+ config->depth = JSON_PARSER_STACK_SIZE - 1;
+ }
+}
diff --git a/cdec_json_parser/JSON_parser.h b/cdec_json_parser/JSON_parser.h
new file mode 100644
index 0000000..de98007
--- /dev/null
+++ b/cdec_json_parser/JSON_parser.h
@@ -0,0 +1,152 @@
+#ifndef JSON_PARSER_H
+#define JSON_PARSER_H
+
+/* JSON_parser.h */
+
+
+#include <stddef.h>
+
+/* Windows DLL stuff */
+#ifdef _WIN32
+# ifdef JSON_PARSER_DLL_EXPORTS
+# define JSON_PARSER_DLL_API __declspec(dllexport)
+# else
+# define JSON_PARSER_DLL_API __declspec(dllimport)
+# endif
+#else
+# define JSON_PARSER_DLL_API
+#endif
+
+/* Determine the integer type use to parse non-floating point numbers */
+#if __STDC_VERSION__ >= 199901L || HAVE_LONG_LONG == 1
+typedef long long JSON_int_t;
+#define JSON_PARSER_INTEGER_SSCANF_TOKEN "%lld"
+#define JSON_PARSER_INTEGER_SPRINTF_TOKEN "%lld"
+#else
+typedef long JSON_int_t;
+#define JSON_PARSER_INTEGER_SSCANF_TOKEN "%ld"
+#define JSON_PARSER_INTEGER_SPRINTF_TOKEN "%ld"
+#endif
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum
+{
+ JSON_T_NONE = 0,
+ JSON_T_ARRAY_BEGIN, // 1
+ JSON_T_ARRAY_END, // 2
+ JSON_T_OBJECT_BEGIN, // 3
+ JSON_T_OBJECT_END, // 4
+ JSON_T_INTEGER, // 5
+ JSON_T_FLOAT, // 6
+ JSON_T_NULL, // 7
+ JSON_T_TRUE, // 8
+ JSON_T_FALSE, // 9
+ JSON_T_STRING, // 10
+ JSON_T_KEY, // 11
+ JSON_T_MAX // 12
+} JSON_type;
+
+typedef struct JSON_value_struct {
+ union {
+ JSON_int_t integer_value;
+
+ double float_value;
+
+ struct {
+ const char* value;
+ size_t length;
+ } str;
+ } vu;
+} JSON_value;
+
+typedef struct JSON_parser_struct* JSON_parser;
+
+/*! \brief JSON parser callback
+
+ \param ctx The pointer passed to new_JSON_parser.
+ \param type An element of JSON_type but not JSON_T_NONE.
+ \param value A representation of the parsed value. This parameter is NULL for
+ JSON_T_ARRAY_BEGIN, JSON_T_ARRAY_END, JSON_T_OBJECT_BEGIN, JSON_T_OBJECT_END,
+ JSON_T_NULL, JSON_T_TRUE, and SON_T_FALSE. String values are always returned
+ as zero-terminated C strings.
+
+ \return Non-zero if parsing should continue, else zero.
+*/
+typedef int (*JSON_parser_callback)(void* ctx, int type, const struct JSON_value_struct* value);
+
+
+/*! \brief The structure used to configure a JSON parser object
+
+ \param depth If negative, the parser can parse arbitrary levels of JSON, otherwise
+ the depth is the limit
+ \param Pointer to a callback. This parameter may be NULL. In this case the input is merely checked for validity.
+ \param Callback context. This parameter may be NULL.
+ \param depth. Specifies the levels of nested JSON to allow. Negative numbers yield unlimited nesting.
+ \param allowComments. To allow C style comments in JSON, set to non-zero.
+ \param handleFloatsManually. To decode floating point numbers manually set this parameter to non-zero.
+
+ \return The parser object.
+*/
+typedef struct {
+ JSON_parser_callback callback;
+ void* callback_ctx;
+ int depth;
+ int allow_comments;
+ int handle_floats_manually;
+} JSON_config;
+
+
+/*! \brief Initializes the JSON parser configuration structure to default values.
+
+ The default configuration is
+ - 127 levels of nested JSON (depends on JSON_PARSER_STACK_SIZE, see json_parser.c)
+ - no parsing, just checking for JSON syntax
+ - no comments
+
+ \param config. Used to configure the parser.
+*/
+JSON_PARSER_DLL_API void init_JSON_config(JSON_config* config);
+
+/*! \brief Create a JSON parser object
+
+ \param config. Used to configure the parser. Set to NULL to use the default configuration.
+ See init_JSON_config
+
+ \return The parser object.
+*/
+JSON_PARSER_DLL_API extern JSON_parser new_JSON_parser(JSON_config* config);
+
+/*! \brief Destroy a previously created JSON parser object. */
+JSON_PARSER_DLL_API extern void delete_JSON_parser(JSON_parser jc);
+
+/*! \brief Parse a character.
+
+ \return Non-zero, if all characters passed to this function are part of are valid JSON.
+*/
+JSON_PARSER_DLL_API extern int JSON_parser_char(JSON_parser jc, int next_char);
+
+/*! \brief Finalize parsing.
+
+ Call this method once after all input characters have been consumed.
+
+ \return Non-zero, if all parsed characters are valid JSON, zero otherwise.
+*/
+JSON_PARSER_DLL_API extern int JSON_parser_done(JSON_parser jc);
+
+/*! \brief Determine if a given string is valid JSON white space
+
+ \return Non-zero if the string is valid, zero otherwise.
+*/
+JSON_PARSER_DLL_API extern int JSON_parser_is_legal_white_space_string(const char* s);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* JSON_PARSER_H */
diff --git a/cdec_json_parser/LICENSE b/cdec_json_parser/LICENSE
new file mode 100644
index 0000000..a390938
--- /dev/null
+++ b/cdec_json_parser/LICENSE
@@ -0,0 +1,213 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+----------------------------------------------
+
+L-BFGS CODE FROM COMPUTATIONAL CRYSTALLOGRAPHY TOOLBOX (CCTBX)
+
+This package includes source code (training/lbfgs.h) based on source
+code distributed as part of the Compational Crystallography Toolbox
+(CCTBX), which has separate copyright notices and license terms. Use of
+this source code is subject to the terms and conditions of the license
+contained in the file LICENSE.cctbx .
+
diff --git a/cdec_json_parser/Makefile b/cdec_json_parser/Makefile
new file mode 100644
index 0000000..c976aba
--- /dev/null
+++ b/cdec_json_parser/Makefile
@@ -0,0 +1,7 @@
+all:
+ gcc JSON_parser.c -c
+ g++ json_parse.cc -c
+
+clean:
+ rm *.o
+
diff --git a/cdec_json_parser/json_parse.cc b/cdec_json_parser/json_parse.cc
new file mode 100644
index 0000000..4580fc8
--- /dev/null
+++ b/cdec_json_parser/json_parse.cc
@@ -0,0 +1,31 @@
+#include "json_parse.h"
+
+#include <string>
+#include <iostream>
+
+using namespace std;
+
+
+bool JSONParser::HandleJSONEvent(int type, const JSON_value* value) {
+ switch(type) {
+ case JSON_T_OBJECT_BEGIN:
+ case JSON_T_OBJECT_END:
+ case JSON_T_ARRAY_BEGIN:
+ case JSON_T_ARRAY_END:
+ case JSON_T_NULL:
+ case JSON_T_TRUE:
+ case JSON_T_FALSE:
+ case JSON_T_KEY:
+ case JSON_T_INTEGER:
+ case JSON_T_FLOAT:
+ break;
+ case JSON_T_STRING:
+ string s = value->vu.str.value;
+ string t = s.substr(1, 4);
+ if (t == "Goal")
+ cerr << t << endl;
+ break;
+ }
+ return true;
+}
+
diff --git a/cdec_json_parser/json_parse.h b/cdec_json_parser/json_parse.h
new file mode 100644
index 0000000..80c037b
--- /dev/null
+++ b/cdec_json_parser/json_parse.h
@@ -0,0 +1,62 @@
+#ifndef _JSON_WRAPPER_H_
+#define _JSON_WRAPPER_H_
+
+#include <iostream>
+#include <cassert>
+#include "JSON_parser.h"
+
+class JSONParser {
+ public:
+ JSONParser() {
+ state = -1;
+ init_JSON_config(&config);
+ hack.mf = &JSONParser::Callback;
+ config.depth = 10;
+ config.callback_ctx = reinterpret_cast<void*>(this);
+ config.callback = hack.cb;
+ config.allow_comments = 1;
+ config.handle_floats_manually = 1;
+ jc = new_JSON_parser(&config);
+ }
+ virtual ~JSONParser() {
+ delete_JSON_parser(jc);
+ }
+ bool Parse(std::istream* in) {
+ int count = 0;
+ int lc = 1;
+ for (; in ; ++count) {
+ int next_char = in->get();
+ if (!in->good()) break;
+ if (lc == '\n') { ++lc; }
+ if (!JSON_parser_char(jc, next_char)) {
+ std::cerr << "JSON_parser_char: syntax error, line " << lc << " (byte " << count << ")" << std::endl;
+ return false;
+ }
+ }
+ if (!JSON_parser_done(jc)) {
+ std::cerr << "JSON_parser_done: syntax error\n";
+ return false;
+ }
+ return true;
+ }
+ static void WriteEscapedString(const std::string& in, std::ostream* out);
+ protected:
+ bool HandleJSONEvent(int type, const JSON_value* value);
+ private:
+ int state;
+ std::string cur_key;
+ std::string cat;
+ int Callback(int type, const JSON_value* value) {
+ if (HandleJSONEvent(type, value)) return 1;
+ return 0;
+ }
+ JSON_parser_struct* jc;
+ JSON_config config;
+ typedef int (JSONParser::* MF)(int type, const struct JSON_value_struct* value);
+ union CBHack {
+ JSON_parser_callback cb;
+ MF mf;
+ } hack;
+};
+
+#endif
diff --git a/data/Makefile b/data/Makefile
new file mode 100644
index 0000000..e56b229
--- /dev/null
+++ b/data/Makefile
@@ -0,0 +1,6 @@
+make_paks: make_paks.cc
+ g++ -std=c++11 make_paks.cc -I../msgpack-c/include/ ../msgpack-c/lib/libmsgpack.a -o make_paks
+
+clean:
+ rm -f make_paks
+
diff --git a/data/cdec.ini b/data/cdec.ini
new file mode 100644
index 0000000..ddbe54c
--- /dev/null
+++ b/data/cdec.ini
@@ -0,0 +1,4 @@
+formalism=scfg
+intersection_strategy=full
+add_pass_through_rules=true
+
diff --git a/data/make.sh b/data/make.sh
new file mode 100755
index 0000000..5e0c31b
--- /dev/null
+++ b/data/make.sh
@@ -0,0 +1,8 @@
+#!/bin/zsh
+
+
+# wmt/14/newstest2008 data
+for i in 1020 1391 1495 1570 1889 2002 429 748; do
+ ~/src/weaver/util/cdec2json.py -c cdec.ini -w weights.init -g grammar.$i.gz < $i.in | ./to_ascii.rb > $i.json
+done
+
diff --git a/data/make_paks.cc b/data/make_paks.cc
new file mode 100644
index 0000000..3477294
--- /dev/null
+++ b/data/make_paks.cc
@@ -0,0 +1,126 @@
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <msgpack.hpp>
+#include <msgpack/fbuffer.h>
+#include <msgpack/fbuffer.hpp>
+
+
+/*
+ * https://github.com/ascheglov/json-cpp
+ *
+ */
+#include "../json-cpp.hpp"
+
+using namespace std;
+
+
+struct Node {
+ int id;
+ string cat;
+ vector<int> span;
+
+ MSGPACK_DEFINE(id, cat, span);
+};
+
+struct Vector {
+ double CountEF;
+ double EgivenFCoherent;
+ double Glue;
+ double IsSingletonF;
+ double IsSingletonFE;
+ double LanguageModel;
+ double LanguageModel_OOV;
+ double MaxLexFgivenE;
+ double MaxLexEgivenF;
+ double PassThrough;
+ double PassThrough_1;
+ double PassThrough_2;
+ double PassThrough_3;
+ double PassThrough_4;
+ double PassThrough_5;
+ double PassThrough_6;
+ double SampleCountF;
+ double WordPenalty;
+
+ MSGPACK_DEFINE(CountEF, EgivenFCoherent, Glue, IsSingletonF, IsSingletonFE, LanguageModel, LanguageModel_OOV, MaxLexEgivenF, MaxLexFgivenE, PassThrough, PassThrough_1, PassThrough_2, PassThrough_3, PassThrough_4, PassThrough_5, PassThrough_6, SampleCountF, WordPenalty);
+};
+
+struct Edge {
+ int head;
+ string rule;
+ vector<int> tails;
+ Vector f;
+ double weight;
+
+ MSGPACK_DEFINE(head, rule, tails, f, weight);
+};
+
+struct Hg {
+ Vector weights;
+ vector<Node> nodes;
+ vector<Edge> edges;
+ vector<string> rules;
+
+ MSGPACK_DEFINE(weights, nodes, edges, rules);
+};
+
+template<typename X> inline void
+serialize(jsoncpp::Stream<X>& stream, Hg& o)
+{
+ fields(o, stream, "weights", o.weights, "nodes", o.nodes, "edges", o.edges, "rules", o.rules);
+}
+
+template<typename X> inline void
+serialize(jsoncpp::Stream<X>& stream, Edge& o)
+{
+ fields(o, stream, "head", o.head, "rule", o.rule, "tails", o.tails, "f", o.f, "weight", o.weight);
+}
+
+template<typename X> inline void
+serialize(jsoncpp::Stream<X>& stream, Vector& o)
+{
+ fields(o, stream, "EgivenFCoherent", o.EgivenFCoherent, "SampleCountF", o.SampleCountF, "CountEF", o.CountEF, "MaxLexFgivenE", o.MaxLexFgivenE, "MaxLexEgivenF", o.MaxLexEgivenF, "IsSingletonF", o.IsSingletonF, "IsSingletonFE", o.IsSingletonFE, "LanguageModel", o.LanguageModel, "LanguageModel_OOV", o.LanguageModel_OOV, "PassThrough", o.PassThrough, "PassThrough_1", o.PassThrough_1, "PassThrough_2", o.PassThrough_2, "PassThrough_3", o.PassThrough_3, "PassThrough_4", o.PassThrough_4, "PassThrough_5", o.PassThrough_5, "PassThrough_6", o.PassThrough_6, "WordPenalty", o.WordPenalty, "Glue", o.Glue);
+}
+
+template<typename X> inline void
+serialize(jsoncpp::Stream<X>& stream, Node& o)
+{
+ fields(o, stream, "id", o.id, "cat", o.cat, "span", o.span);
+}
+
+int
+main(int argc, char** argv)
+{
+ ifstream ifs(argv[1]);
+ string json_str((istreambuf_iterator<char>(ifs) ),
+ (istreambuf_iterator<char>()));
+
+ Hg hg;
+ Vector w;
+ hg.weights = w;
+ vector<Node> nodes;
+ hg.nodes = nodes;
+ vector<Edge> edges;
+ hg.edges = edges;
+ jsoncpp::parse(hg, json_str);
+
+ FILE* file = fopen(argv[2], "wb");
+ msgpack::fbuffer fbuf(file);
+ msgpack::pack(fbuf, hg);
+ fclose(file);
+
+ /*ifstream ifs1(argv[2]);
+ string str1((istreambuf_iterator<char>(jfs1)),
+ (istreambuf_iterator<char>()));
+
+ msgpack::zone zone;
+ msgpack::object obj;
+ msgpack::unpack(str1.data(), str1.size(), NULL, &zone, &obj);
+
+ Hg hg;
+ obj.convert(&hg);*/
+
+ return 0;
+}
+
diff --git a/data/to_ascii.rb b/data/to_ascii.rb
new file mode 100755
index 0000000..6c1d23e
--- /dev/null
+++ b/data/to_ascii.rb
@@ -0,0 +1,13 @@
+#!/usr/bin/env ruby
+
+
+while line = STDIN.gets
+ encoding_options = {
+ :invalid => :replace,
+ :undef => :replace,
+ :replace => '?',
+ :universal_newline => true
+ }
+ puts line.encode 'ASCII', encoding_options
+end
+
diff --git a/data/weights.init b/data/weights.init
new file mode 100644
index 0000000..0d09f9f
--- /dev/null
+++ b/data/weights.init
@@ -0,0 +1,12 @@
+CountEF 0.1
+EgivenFCoherent -0.1
+Glue 0.01
+IsSingletonF -0.01
+IsSingletonFE -0.01
+LanguageModel 0.1
+LanguageModel_OOV -1
+MaxLexFgivenE -0.1
+MaxLexEgivenF -0.1
+PassThrough -0.1
+SampleCountF -0.1
+WordPenalty -0.1
diff --git a/memusg.sh b/memusg.sh
new file mode 100755
index 0000000..e3b6f90
--- /dev/null
+++ b/memusg.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+
+"$@" &
+pid=$! peak=0
+while true; do
+ sleep 1
+ sample="$(ps -o rss= $pid 2> /dev/null)" || break
+ let peak='sample > peak ? sample : peak'
+done
+#echo "Peak: $peak" 1>&2
+echo "$(( ${peak%% *} / 1024)) m"
+
diff --git a/run.sh b/run.sh
new file mode 100755
index 0000000..83144b3
--- /dev/null
+++ b/run.sh
@@ -0,0 +1,43 @@
+#!/bin/zsh
+
+
+export PATH=$PATH:/home/pks/src/scripts/
+export GEM_PATH=$GEM_PATH:/home/pks/lib/ruby
+REPEAT=10
+
+rm -f .overall
+
+echo
+echo "JSON parsing benchmark"
+echo "----------------------"
+echo " REAPEAT=$REPEAT"
+echo
+
+# fails: test_MicroJSON.sh \
+for prg in \
+ test_cdec_json_parser \
+ test_gason \
+ test_JsonBox \
+ test_jsoncpp \
+ test_json-cpp \
+ test_jsonxx \
+ test_libjson \
+ test_nosjob \
+ test_picojson \
+ test_rapidjson \
+ test_sajson
+do
+ echo "[$prg]"
+ sync; echo 3 > /proc/sys/vm/drop_caches
+ echo > .overall
+ for file in `ls -S data/*.json`; do
+ echo "$file:\t$(./benchmark.rb $REPEAT ./$prg $file 2>/dev/null | tee -a .overall | avg | round 2) s"
+ done
+ echo "---"
+ echo "overall:\t$(avg < .overall | round 2)"
+ echo " memory:\t$(./memusg.sh ./$prg data/1020.json 2>/dev/null)"
+ echo
+done
+
+rm .overall
+
diff --git a/run_msgpack.sh b/run_msgpack.sh
new file mode 100755
index 0000000..46c8127
--- /dev/null
+++ b/run_msgpack.sh
@@ -0,0 +1,33 @@
+#!/bin/zsh
+
+
+export PATH=$PATH:/home/pks/src/scripts/
+export GEM_PATH=$GEM_PATH:/home/pks/lib/ruby
+REPEAT=10
+
+rm -f .overall_msgpack
+
+echo
+echo "MSGPACK parsing benchmark"
+echo "-------------------------"
+echo " REAPEAT=$REPEAT"
+echo
+
+for prg in \
+ test_msgpack \
+ test_msgpack_ruby
+do
+ echo "[$prg]"
+ sync; echo 3 > /proc/sys/vm/drop_caches
+ echo > .overall_msgpack
+ for file in `ls -S data/*.pak`; do
+ echo "$file:\t$(./benchmark.rb $REPEAT ./$prg $file 2>/dev/null | tee -a .overall_msgpack | avg | round 2) s"
+ done
+ echo "---"
+ echo "overall:\t$(avg < .overall_msgpack | round 2)"
+ echo " memory:\t$(./memusg.sh ./$prg data/1020.pak 2>/dev/null)"
+ echo
+done
+
+rm .overall_msgpack
+
diff --git a/test_JsonBox.cc b/test_JsonBox.cc
new file mode 100644
index 0000000..e1b22c4
--- /dev/null
+++ b/test_JsonBox.cc
@@ -0,0 +1,24 @@
+#include <iostream>
+#include <string>
+
+/*
+ * https://github.com/anhero/JsonBox
+ *
+ */
+#include "JsonBox/include/JsonBox.h"
+
+using namespace std;
+
+
+int
+main(int argc, char** argv)
+{
+ JsonBox::Value v;
+ v.loadFromFile(argv[1]);
+ JsonBox::Value w = v["edges"].getArray().back();
+ string s = w["rule"].getString();
+ cerr << s.substr(1,4) << endl;
+
+ return 0;
+}
+
diff --git a/test_MicroJSON.cc b/test_MicroJSON.cc
new file mode 100644
index 0000000..d8d4969
--- /dev/null
+++ b/test_MicroJSON.cc
@@ -0,0 +1,28 @@
+#include <iostream>
+#include <fstream>
+#include <string>
+
+/*
+ * http://grigory.info/MicroJSON.About.html
+ *
+ */
+#include "MicroJSON-0.3.2/Node.h"
+
+using namespace std;
+
+
+int
+main(int argc, char** argv)
+{
+ ifstream ifs(argv[1]);
+ string json_str((istreambuf_iterator<char>(ifs)),
+ (istreambuf_iterator<char>()));
+
+ MicroJSON::Node Root;
+ Root.Parse(json_str);
+ MicroJSON::Node* edges = Root.GetSubNode("edges");
+ cerr << edges->GetChildren().back()->GetSubNode("rule") << endl;
+
+ return 0;
+}
+
diff --git a/test_MicroJSON.sh b/test_MicroJSON.sh
new file mode 100755
index 0000000..89d1d3d
--- /dev/null
+++ b/test_MicroJSON.sh
@@ -0,0 +1,5 @@
+#!/bin/sh -x
+
+
+LD_LIBRARY_PATH="/home/pks/z/test/json_test/MicroJSON-0.3.2" ./test_MicroJSON $1
+
diff --git a/test_cdec_json_parser.cc b/test_cdec_json_parser.cc
new file mode 100644
index 0000000..e805318
--- /dev/null
+++ b/test_cdec_json_parser.cc
@@ -0,0 +1,25 @@
+#include <iostream>
+#include <fstream>
+#include <string>
+
+/*
+ * https://github.com/redpony/cdec/tree/master/decoder
+ *
+ */
+#include "cdec_json_parser/json_parse.h"
+
+using namespace std;
+
+
+int
+main(int argc, char** argv)
+{
+ ifstream ifs(argv[1]);
+
+ istream& s = ifs;
+ JSONParser p;
+ p.Parse(&s);
+
+ return 0;
+}
+
diff --git a/test_gason.cc b/test_gason.cc
new file mode 100644
index 0000000..d78c385
--- /dev/null
+++ b/test_gason.cc
@@ -0,0 +1,71 @@
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <string.h>
+
+/*
+ * https://github.com/vivkin/gason
+ *
+ */
+#include "gason/gason.h"
+
+using namespace std;
+
+
+void
+print(const char *s)
+{
+ string u(s);
+ u = u.substr(1, 4);
+ if (u == "Goal") {
+ cerr << u << endl;
+ }
+}
+
+void
+walk(JsonValue o)
+{
+ switch (o.getTag()) {
+ case JSON_TAG_NUMBER:
+ break;
+ case JSON_TAG_BOOL:
+ break;
+ case JSON_TAG_STRING:
+ print(o.toString());
+ break;
+ case JSON_TAG_ARRAY:
+ if (!o.toNode())
+ break;
+ for (auto i : o)
+ walk(i->value);
+ break;
+ case JSON_TAG_OBJECT:
+ if (!o.toNode())
+ break;
+ for (auto i : o) {
+ print(i->key);
+ walk(i->value);
+ }
+ break;
+ case JSON_TAG_NULL:
+ break;
+ }
+}
+
+int
+main(int argc, char** argv)
+{
+ ifstream ifs(argv[1]);
+ string json_str((istreambuf_iterator<char>(ifs)),
+ (istreambuf_iterator<char>()));
+
+ char* s = strdup(json_str.c_str());
+ char *p;
+ JsonValue v;
+ JsonAllocator a;
+ JsonParseStatus status = jsonParse(s, &p, &v, a);
+ walk(v);
+
+ return 0;
+}
+
diff --git a/test_json-cpp.cc b/test_json-cpp.cc
new file mode 100644
index 0000000..0791704
--- /dev/null
+++ b/test_json-cpp.cc
@@ -0,0 +1,100 @@
+#include <iostream>
+#include <fstream>
+#include <string>
+
+/*
+ * https://github.com/ascheglov/json-cpp
+ *
+ */
+#include "json-cpp.hpp"
+
+using namespace std;
+
+
+struct Node {
+ int id;
+ string cat;
+ vector<int> span;
+};
+
+struct Vector {
+ double CountEF;
+ double EgivenFCoherent;
+ double Glue;
+ double IsSingletonF;
+ double IsSingletonFE;
+ double LanguageModel;
+ double LanguageModel_OOV;
+ double MaxLexFgivenE;
+ double MaxLexEgivenF;
+ double PassThrough;
+ double PassThrough_1;
+ double PassThrough_2;
+ double PassThrough_3;
+ double PassThrough_4;
+ double PassThrough_5;
+ double PassThrough_6;
+ double SampleCountF;
+ double WordPenalty;
+};
+
+struct Edge {
+ int head;
+ string rule;
+ vector<int> tails;
+ Vector f;
+ double weight;
+};
+
+struct Hg {
+ Vector weights;
+ vector<Node> nodes;
+ vector<Edge> edges;
+ vector<string> rules;
+};
+
+template<typename X> inline void
+serialize(jsoncpp::Stream<X>& stream, Hg& o)
+{
+ fields(o, stream, "weights", o.weights, "nodes", o.nodes, "edges", o.edges, "rules", o.rules);
+}
+
+template<typename X> inline void
+serialize(jsoncpp::Stream<X>& stream, Edge& o)
+{
+ fields(o, stream, "head", o.head, "rule", o.rule, "tails", o.tails, "f", o.f, "weight", o.weight);
+}
+
+template<typename X> inline void
+serialize(jsoncpp::Stream<X>& stream, Vector& o)
+{
+ fields(o, stream, "EgivenFCoherent", o.EgivenFCoherent, "SampleCountF", o.SampleCountF, "CountEF", o.CountEF, "MaxLexFgivenE", o.MaxLexFgivenE, "MaxLexEgivenF", o.MaxLexEgivenF, "IsSingletonF", o.IsSingletonF, "IsSingletonFE", o.IsSingletonFE, "LanguageModel", o.LanguageModel, "LanguageModel_OOV", o.LanguageModel_OOV, "PassThrough", o.PassThrough, "PassThrough_1", o.PassThrough_1, "PassThrough_2", o.PassThrough_2, "PassThrough_3", o.PassThrough_3, "PassThrough_4", o.PassThrough_4, "PassThrough_5", o.PassThrough_5, "PassThrough_6", o.PassThrough_6, "WordPenalty", o.WordPenalty, "Glue", o.Glue);
+}
+
+template<typename X> inline void
+serialize(jsoncpp::Stream<X>& stream, Node& o)
+{
+ fields(o, stream, "id", o.id, "cat", o.cat, "span", o.span);
+}
+
+int
+main(int argc, char** argv)
+{
+ ifstream ifs(argv[1]);
+ string json_str((istreambuf_iterator<char>(ifs) ),
+ (istreambuf_iterator<char>()));
+
+ Hg hg;
+ Vector w;
+ hg.weights = w;
+ vector<Node> nodes;
+ hg.nodes = nodes;
+ vector<Edge> edges;
+ hg.edges = edges;
+ jsoncpp::parse(hg, json_str);
+ Edge& last_edge = hg.edges.back();
+ cerr << last_edge.rule.substr(1, 4) << endl;
+
+ return 0;
+}
+
diff --git a/test_jsoncpp.cc b/test_jsoncpp.cc
new file mode 100644
index 0000000..ab3bd0c
--- /dev/null
+++ b/test_jsoncpp.cc
@@ -0,0 +1,29 @@
+#include <iostream>
+#include <fstream>
+#include <string>
+
+/*
+ * https://github.com/open-source-parsers/jsoncpp
+ *
+ */
+#include "jsoncpp/include/json/json.h"
+
+using namespace std;
+
+
+int
+main(int argc, char** argv)
+{
+ ifstream ifs(argv[1]);
+ string json_str((istreambuf_iterator<char>(ifs)),
+ (istreambuf_iterator<char>()));
+
+ Json::Value v;
+ Json::Reader reader;
+ reader.parse(json_str, v);
+ Json::Value last_edge = v["edges"][v["edges"].size()-1];
+ cerr << last_edge["rule"].asString().substr(1, 4) << endl;
+
+ return 0;
+}
+
diff --git a/test_jsonxx.cc b/test_jsonxx.cc
new file mode 100644
index 0000000..d06640e
--- /dev/null
+++ b/test_jsonxx.cc
@@ -0,0 +1,35 @@
+#include <iostream>
+#include <fstream>
+#include <string>
+
+/*
+ * https://github.com/hjiang/jsonxx
+ *
+ */
+#include "jsonxx/jsonxx.h"
+
+using namespace std;
+
+
+int
+main(int argc, char** argv)
+{
+ ifstream ifs(argv[1]);
+ string json_str((istreambuf_iterator<char>(ifs)),
+ (istreambuf_iterator<char>()));
+
+ jsonxx::Object o;
+ o.parse(json_str);
+ jsonxx::Array edges = o.get<jsonxx::Array>("edges");
+ jsonxx::Array::container::const_iterator it = edges.values().begin(), end = edges.values().end();
+ while (it != end) {
+ jsonxx::Object e = (*it)->get<jsonxx::Object>();
+ string s = e.get<string>("rule").substr(1, 4);
+ if (s == "Goal")
+ cerr << s << endl;
+ ++it;
+ }
+
+ return 0;
+}
+
diff --git a/test_libjson.cc b/test_libjson.cc
new file mode 100644
index 0000000..6b3e2a9
--- /dev/null
+++ b/test_libjson.cc
@@ -0,0 +1,44 @@
+#include <iostream>
+#include <fstream>
+#include <string>
+
+/*
+ * http://sourceforge.net/projects/libjson/
+ *
+ */
+#include "libjson/libjson.h"
+
+using namespace std;
+
+
+void
+walk(const JSONNode & n)
+{
+ JSONNode::const_iterator it = n.begin();
+ while (it != n.end()){
+ if (it->type() == JSON_ARRAY || it->type() == JSON_NODE){
+ walk(*it);
+ }
+ string s = it->as_string();
+ if (s.size() >= 5) {
+ string t = s.substr(1, 4);
+ if (t == "Goal")
+ cerr << t << endl;
+ }
+ ++it;
+ }
+}
+
+int
+main(int argc, char** argv)
+{
+ ifstream ifs(argv[1]);
+ string json_str((istreambuf_iterator<char>(ifs)),
+ (istreambuf_iterator<char>()));
+
+ JSONNode n = libjson::parse(json_str);
+ walk(n);
+
+ return 0;
+}
+
diff --git a/test_msgpack.cc b/test_msgpack.cc
new file mode 100644
index 0000000..1204b05
--- /dev/null
+++ b/test_msgpack.cc
@@ -0,0 +1,83 @@
+#include <iostream>
+#include <fstream>
+#include <string>
+
+/*
+ * http://msgpack.org/
+ *
+ */
+#include <msgpack.hpp>
+#include <msgpack/fbuffer.h>
+#include <msgpack/fbuffer.hpp>
+
+using namespace std;
+
+
+struct Node {
+ int id;
+ string cat;
+ vector<int> span;
+
+ MSGPACK_DEFINE(id, cat, span);
+};
+
+struct Vector {
+ double CountEF;
+ double EgivenFCoherent;
+ double Glue;
+ double IsSingletonF;
+ double IsSingletonFE;
+ double LanguageModel;
+ double LanguageModel_OOV;
+ double MaxLexFgivenE;
+ double MaxLexEgivenF;
+ double PassThrough;
+ double PassThrough_1;
+ double PassThrough_2;
+ double PassThrough_3;
+ double PassThrough_4;
+ double PassThrough_5;
+ double PassThrough_6;
+ double SampleCountF;
+ double WordPenalty;
+
+ MSGPACK_DEFINE(CountEF, EgivenFCoherent, Glue, IsSingletonF, IsSingletonFE, LanguageModel, LanguageModel_OOV, MaxLexEgivenF, MaxLexFgivenE, PassThrough, PassThrough_1, PassThrough_2, PassThrough_3, PassThrough_4, PassThrough_5, PassThrough_6, SampleCountF, WordPenalty);
+};
+
+struct Edge {
+ int head;
+ string rule;
+ vector<int> tails;
+ Vector f;
+ double weight;
+
+ MSGPACK_DEFINE(head, rule, tails, f, weight);
+};
+
+struct Hg {
+ Vector weights;
+ vector<Node> nodes;
+ vector<Edge> edges;
+ vector<string> rules;
+
+ MSGPACK_DEFINE(weights, nodes, edges, rules);
+};
+
+int
+main(int argc, char** argv)
+{
+ ifstream ifs(argv[1]);
+ string str((istreambuf_iterator<char>(ifs)),
+ (istreambuf_iterator<char>()));
+
+ msgpack::zone zone;
+ msgpack::object obj;
+ msgpack::unpack(str.data(), str.size(), NULL, &zone, &obj);
+ Hg hg;
+ obj.convert(&hg);
+ Edge last_edge = hg.edges.back();
+ cerr << last_edge.rule.substr(1, 4) << endl;
+
+ return 0;
+}
+
diff --git a/test_msgpack_ruby b/test_msgpack_ruby
new file mode 100755
index 0000000..0f2d387
--- /dev/null
+++ b/test_msgpack_ruby
@@ -0,0 +1,9 @@
+#!/usr/bin/env ruby
+
+require 'msgpack'
+
+
+msg = MessagePack.unpack(File.new(ARGV[0]).read)
+
+STDERR.write "#{msg["edges"].last()["rule"][1..4]}\n"
+
diff --git a/test_nosjob.cc b/test_nosjob.cc
new file mode 100644
index 0000000..cf8891f
--- /dev/null
+++ b/test_nosjob.cc
@@ -0,0 +1,32 @@
+#include <iostream>
+#include <fstream>
+#include <string>
+
+/*
+ * http://fossil.wanderinghorse.net/repos/nosjob/index.cgi/index
+ *
+ */
+#include "nosjob-e1d67401fcda6e05/include/wh/nosjob/nosjob.hpp"
+
+using namespace std;
+
+
+int
+main(int argc, char** argv)
+{
+ ifstream ifs(argv[1]);
+ string json_str((istreambuf_iterator<char>(ifs)),
+ (istreambuf_iterator<char>()));
+
+ nosjob::Atom root = nosjob::JsonParser().parse(json_str);
+ nosjob::Object o = nosjob::Object::cast(root);
+ nosjob::Atom edges = o.get(nosjob::Utf8String("edges"));
+ nosjob::Array a = nosjob::Array::cast(edges);
+ nosjob::Object last_edge = nosjob::Object::cast(a.get(a.size()-1));
+ nosjob::Utf8String s = nosjob::Utf8String::cast(last_edge.get(nosjob::Utf8String("rule")));
+ string t((char*)s.c_str());
+ cerr << t.substr(1, 4) << endl;
+
+ return 0;
+}
+
diff --git a/test_picojson.cc b/test_picojson.cc
new file mode 100644
index 0000000..cf3b621
--- /dev/null
+++ b/test_picojson.cc
@@ -0,0 +1,32 @@
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <sstream>
+
+/*
+ * https://github.com/kazuho/picojson
+ *
+ */
+#include "picojson/picojson.h"
+
+using namespace std;
+
+
+int
+main(int argc, char** argv)
+{
+ ifstream ifs(argv[1]);
+ string json_str((istreambuf_iterator<char>(ifs)),
+ (istreambuf_iterator<char>()));
+
+ picojson::value v;
+ istringstream iss(json_str);
+ picojson::parse(v, iss);
+ picojson::value::object& obj = v.get<picojson::object>();
+ picojson::value::object& last_edge = obj["edges"].get<picojson::array>().back().get<picojson::object>();
+ string s(last_edge["rule"].get<string>());
+ cerr << s.substr(1, 4) << endl;
+
+ return 0;
+}
+
diff --git a/test_rapidjson.cc b/test_rapidjson.cc
new file mode 100644
index 0000000..b344ed0
--- /dev/null
+++ b/test_rapidjson.cc
@@ -0,0 +1,31 @@
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <string.h>
+
+/*
+ * https://github.com/miloyip/rapidjson
+ *
+ */
+#include "rapidjson/include/rapidjson/rapidjson.h"
+#include "rapidjson/include/rapidjson/document.h"
+#include "rapidjson/include/rapidjson/stringbuffer.h"
+
+using namespace std;
+
+
+int
+main(int argc, char** argv)
+{
+ ifstream ifs(argv[1]);
+ string json_str((istreambuf_iterator<char>(ifs)),
+ (istreambuf_iterator<char>()));
+
+ rapidjson::Document d;
+ d.Parse(json_str.c_str());
+ string s(d["edges"][d["edges"].Size()-1]["rule"].GetString());
+ cerr << s.substr(1, 4) << endl;
+
+ return 0;
+}
+
diff --git a/test_sajson.cc b/test_sajson.cc
new file mode 100644
index 0000000..4081d43
--- /dev/null
+++ b/test_sajson.cc
@@ -0,0 +1,32 @@
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <string.h>
+
+/*
+ * https://github.com/chadaustin/sajson
+ *
+ */
+#include "sajson/include/sajson.h"
+
+using namespace std;
+
+
+int
+main(int argc, char** argv)
+{
+ ifstream ifs(argv[1]);
+ string json_str((istreambuf_iterator<char>(ifs)),
+ (istreambuf_iterator<char>()));
+
+ const sajson::document& document = sajson::parse(sajson::literal(json_str.c_str()));
+ size_t index_a = document.get_root().find_object_key(sajson::literal("edges"));
+ const sajson::value& edges = document.get_root().get_object_value(index_a);
+ const sajson::value& last_edge = edges.get_array_element(edges.get_length()-1);
+ size_t index_r = last_edge.find_object_key(sajson::literal("rule"));
+ const sajson::value& r = last_edge.get_object_value(index_r);
+ cerr << r.as_string().substr(1, 4) << endl;
+
+ return 0;
+}
+