From 9a0859212de4d1304f9392fe910921227421c8c3 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Sat, 16 Aug 2014 21:25:52 +0100 Subject: cleanup --- .gitignore | 20 +- .gitmodules | 27 + Makefile | 54 -- README.md | 4 +- benchmark.rb | 9 - benchmark/benchmark.rb | 9 + benchmark/run.sh | 41 ++ benchmark/run_msgpack.sh | 38 ++ cdec_json_parser/JSON_parser.c | 1012 ------------------------------------ cdec_json_parser/JSON_parser.h | 152 ------ cdec_json_parser/LICENSE | 213 -------- cdec_json_parser/Makefile | 7 - cdec_json_parser/json_parse.cc | 31 -- cdec_json_parser/json_parse.h | 62 --- data/Makefile | 9 - data/cdec.ini | 4 - data/make.sh | 8 - data/make_paks.cc | 125 ----- data/make_paks2.cc | 121 ----- data/to_ascii.rb | 13 - data/weights.init | 12 - lib/JsonBox | 1 + lib/cdec_json_parser/JSON_parser.c | 1012 ++++++++++++++++++++++++++++++++++++ lib/cdec_json_parser/JSON_parser.h | 152 ++++++ lib/cdec_json_parser/LICENSE | 213 ++++++++ lib/cdec_json_parser/Makefile | 7 + lib/cdec_json_parser/json_parse.cc | 31 ++ lib/cdec_json_parser/json_parse.h | 62 +++ lib/gason | 1 + lib/json-cpp | 1 + lib/jsoncpp | 1 + lib/jsonxx | 1 + lib/msgpack-c | 1 + lib/picojson | 1 + lib/rapidjson | 1 + lib/sajson | 1 + memusg.sh | 13 - run.sh | 43 -- run_msgpack.sh | 39 -- src/Makefile | 61 +++ src/make_pak.cc | 125 +++++ src/make_pak_s.cc | 121 +++++ src/test_JsonBox.cc | 24 + src/test_MicroJSON.cc | 28 + src/test_MicroJSON.sh | 5 + src/test_cdec_json_parser.cc | 25 + src/test_gason.cc | 71 +++ src/test_json-cpp.cc | 100 ++++ src/test_jsoncpp.cc | 29 ++ src/test_jsonxx.cc | 35 ++ src/test_libjson.cc | 44 ++ src/test_msgpack.cc | 81 +++ src/test_msgpack_ruby | 9 + src/test_msgpack_streaming.cc | 99 ++++ src/test_nosjob.cc | 32 ++ src/test_picojson.cc | 32 ++ src/test_rapidjson.cc | 31 ++ src/test_sajson.cc | 32 ++ test_JsonBox.cc | 24 - test_MicroJSON.cc | 28 - test_MicroJSON.sh | 5 - test_cdec_json_parser.cc | 25 - test_gason.cc | 71 --- test_json-cpp.cc | 100 ---- test_jsoncpp.cc | 29 -- test_jsonxx.cc | 35 -- test_libjson.cc | 44 -- test_msgpack.cc | 81 --- test_msgpack_ruby | 9 - test_msgpack_streaming.cc | 99 ---- test_nosjob.cc | 32 -- test_picojson.cc | 32 -- test_rapidjson.cc | 31 -- test_sajson.cc | 32 -- 74 files changed, 2593 insertions(+), 2620 deletions(-) create mode 100644 .gitmodules delete mode 100644 Makefile delete mode 100755 benchmark.rb create mode 100755 benchmark/benchmark.rb create mode 100755 benchmark/run.sh create mode 100755 benchmark/run_msgpack.sh delete mode 100644 cdec_json_parser/JSON_parser.c delete mode 100644 cdec_json_parser/JSON_parser.h delete mode 100644 cdec_json_parser/LICENSE delete mode 100644 cdec_json_parser/Makefile delete mode 100644 cdec_json_parser/json_parse.cc delete mode 100644 cdec_json_parser/json_parse.h delete mode 100644 data/Makefile delete mode 100644 data/cdec.ini delete mode 100755 data/make.sh delete mode 100644 data/make_paks.cc delete mode 100644 data/make_paks2.cc delete mode 100755 data/to_ascii.rb delete mode 100644 data/weights.init create mode 160000 lib/JsonBox create mode 100644 lib/cdec_json_parser/JSON_parser.c create mode 100644 lib/cdec_json_parser/JSON_parser.h create mode 100644 lib/cdec_json_parser/LICENSE create mode 100644 lib/cdec_json_parser/Makefile create mode 100644 lib/cdec_json_parser/json_parse.cc create mode 100644 lib/cdec_json_parser/json_parse.h create mode 160000 lib/gason create mode 160000 lib/json-cpp create mode 160000 lib/jsoncpp create mode 160000 lib/jsonxx create mode 160000 lib/msgpack-c create mode 160000 lib/picojson create mode 160000 lib/rapidjson create mode 160000 lib/sajson delete mode 100755 memusg.sh delete mode 100755 run.sh delete mode 100755 run_msgpack.sh create mode 100644 src/Makefile create mode 100644 src/make_pak.cc create mode 100644 src/make_pak_s.cc create mode 100644 src/test_JsonBox.cc create mode 100644 src/test_MicroJSON.cc create mode 100755 src/test_MicroJSON.sh create mode 100644 src/test_cdec_json_parser.cc create mode 100644 src/test_gason.cc create mode 100644 src/test_json-cpp.cc create mode 100644 src/test_jsoncpp.cc create mode 100644 src/test_jsonxx.cc create mode 100644 src/test_libjson.cc create mode 100644 src/test_msgpack.cc create mode 100755 src/test_msgpack_ruby create mode 100644 src/test_msgpack_streaming.cc create mode 100644 src/test_nosjob.cc create mode 100644 src/test_picojson.cc create mode 100644 src/test_rapidjson.cc create mode 100644 src/test_sajson.cc delete mode 100644 test_JsonBox.cc delete mode 100644 test_MicroJSON.cc delete mode 100755 test_MicroJSON.sh delete mode 100644 test_cdec_json_parser.cc delete mode 100644 test_gason.cc delete mode 100644 test_json-cpp.cc delete mode 100644 test_jsoncpp.cc delete mode 100644 test_jsonxx.cc delete mode 100644 test_libjson.cc delete mode 100644 test_msgpack.cc delete mode 100755 test_msgpack_ruby delete mode 100644 test_msgpack_streaming.cc delete mode 100644 test_nosjob.cc delete mode 100644 test_picojson.cc delete mode 100644 test_rapidjson.cc delete mode 100644 test_sajson.cc diff --git a/.gitignore b/.gitignore index dbdb389..382dbee 100644 --- a/.gitignore +++ b/.gitignore @@ -1,16 +1,6 @@ -JsonBox/ -MicroJSON*/ -gason/ -json-cpp.hpp -jsoncpp/ -jsonxx/ -libjson/ -nosjob*/ -picojson/ -proto_map/ -rapidjson/ -sajson/ -msgpack-c/ +MicroJSON-*/ +libjson-*/ +nosjob-*/ *.o test_JsonBox test_MicroJSON @@ -26,8 +16,8 @@ test_rapidjson test_sajson test_msgpack test_msgpack_streaming -data/make_paks -data/make_paks2 +make_pak +make_pak_s data/*.pak* data/*.json data/*.in diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..d7259ef --- /dev/null +++ b/.gitmodules @@ -0,0 +1,27 @@ +[submodule "sajson"] + path = lib/sajson + url = https://github.com/chadaustin/sajson +[submodule "rapidjson"] + path = lib/rapidjson + url = https://github.com/miloyip/rapidjson +[submodule "picojson"] + path = lib/picojson + url = https://github.com/kazuho/picojson.git +[submodule "msgpack-c"] + path = lib/msgpack-c + url = https://github.com/msgpack/msgpack-c.git +[submodule "gason"] + path = lib/gason + url = https://github.com/vivkin/gason/ +[submodule "JsonBox"] + path = lib/JsonBox + url = https://github.com/anhero/JsonBox.git +[submodule "jsoncpp"] + path = lib/jsoncpp + url = https://github.com/open-source-parsers/jsoncpp +[submodule "jsonxx"] + path = lib/jsonxx + url = https://github.com/hjiang/jsonxx.git +[submodule "json-cpp"] + path = lib/json-cpp + url = https://github.com/ascheglov/json-cpp.git diff --git a/Makefile b/Makefile deleted file mode 100644 index 3013d32..0000000 --- a/Makefile +++ /dev/null @@ -1,54 +0,0 @@ -COMPILER := g++ -CXXFLAGS := -O3 -march=native -mtune=native - - -all: test_gason test_json-cpp test_jsoncpp test_libjson test_picojson test_rapidjson test_sajson test_JsonBox test_jsonxx test_MicroJSON test_nosjob test_cdec_json_parser - -test_gason: test_gason.cc - $(COMPILER) $(CXXFLAGS) -std=c++11 test_gason.cc -o test_gason gason/gason.o - -test_json-cpp: test_json-cpp.cc - $(COMPILER) $(CXXFLAGS) -std=c++11 test_json-cpp.cc -o test_json-cpp - -test_jsoncpp: test_jsoncpp.cc - $(COMPILER) $(CXXFLAGS) test_jsoncpp.cc jsoncpp/lib/libjsoncpp.a -o test_jsoncpp - -test_libjson: test_libjson.cc - $(COMPILER) $(CXXFLAGS) test_libjson.cc libjson/libjson.a -o test_libjson - -test_picojson: test_picojson.cc - $(COMPILER) $(CXXFLAGS) test_picojson.cc -o test_picojson - -test_rapidjson: test_rapidjson.cc - $(COMPILER) $(CXXFLAGS) test_rapidjson.cc -o test_rapidjson - -test_sajson: test_sajson.cc - $(COMPILER) $(CXXFLAGS) test_sajson.cc -o test_sajson - -test_JsonBox: test_JsonBox.cc - $(COMPILER) $(CXXFLAGS) test_JsonBox.cc -I./JsonBox/include/ JsonBox/libJsonBox.a -o test_JsonBox - -test_jsonxx: test_jsonxx.cc - $(COMPILER) $(CXXFLAGS) test_jsonxx.cc jsonxx/jsonxx.o -o test_jsonxx - -test_MicroJSON: test_MicroJSON.cc - #$(COMPILER) $(CXXFLAGS) test_MicroJSON.cc MicroJSON-0.3.2/libMicroJSON-0.so -lUTF8Strings-1 -o test_MicroJSON - -test_nosjob: test_nosjob.cc - $(COMPILER) $(CXXFLAGS) test_nosjob.cc nosjob-e1d67401fcda6e05/libnosjob.a -o test_nosjob - -test_cdec_json_parser: test_cdec_json_parser.cc - $(COMPILER) $(CXXFLAGS) test_cdec_json_parser.cc cdec_json_parser/json_parse.o cdec_json_parser/JSON_parser.o -o test_cdec_json_parser - -test_msgpack: test_msgpack.cc - $(COMPILER) $(CXXFLAGS) test_msgpack.cc -I./msgpack-c/include/ ./msgpack-c/lib/libmsgpack.a -o test_msgpack - -test_msgpack_streaming: test_msgpack_streaming.cc - $(COMPILER) $(CXXFLAGS) test_msgpack_streaming.cc -I./msgpack-c/include/ ./msgpack-c/lib/libmsgpack.a -o test_msgpack_streaming - -clean: - rm -f test_gason test_json-cpp test_jsoncpp test_libjson - rm -f test_picojson test_rapidjson test_sajson test_JsonBox - rm -f test_jsonxx test_MicroJSON test_nosjob test_cdec_json_parser - rm -f test_msgpack - diff --git a/README.md b/README.md index 78d60d6..79b18f6 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ with data (e.g. the cdec json parser). * gason: git clone https://github.com/vivkin/gason.git * JsonBox: git clone https://github.com/anhero/JsonBox.git * jsoncpp: git clone https://github.com/open-source-parsers/jsoncpp.git -* json-cpp: wget "https://raw.githubusercontent.com/ascheglov/json-cpp/master/single_include/json-cpp.hpp" +* json-cpp: git clone "https://github.com/ascheglov/json-cpp.git" * jsonxx: git clone https://github.com/hjiang/jsonxx.git * libjson: wget "http://downloads.sourceforge.net/project/libjson/libjson_7.6.1.zip?r=&ts=1405248411&use_mirror=heanet" * MicroJSON: wget http://grigory.info/distfiles/MicroJSON-0.3.2.tar.bz2 @@ -23,6 +23,8 @@ with data (e.g. the cdec json parser). * rapidjson: git clone https://github.com/miloyip/rapidjson.git * sajson: git clone https://github.com/chadaustin/sajson.git +Put all libraries in lib/ and build as described by the authors of the respective +library. To run the benchmark You'll need root privileges to clear the disk caches. The run scripts assume that these scripts [3] are in the PATH. diff --git a/benchmark.rb b/benchmark.rb deleted file mode 100755 index 81e11f8..0000000 --- a/benchmark.rb +++ /dev/null @@ -1,9 +0,0 @@ -#!/usr/bin/env ruby - - -ARGV[0].to_i.times { - start = Time.now - `#{ARGV[1]} #{ARGV[2]}` - puts Time.now-start -} - diff --git a/benchmark/benchmark.rb b/benchmark/benchmark.rb new file mode 100755 index 0000000..81e11f8 --- /dev/null +++ b/benchmark/benchmark.rb @@ -0,0 +1,9 @@ +#!/usr/bin/env ruby + + +ARGV[0].to_i.times { + start = Time.now + `#{ARGV[1]} #{ARGV[2]}` + puts Time.now-start +} + diff --git a/benchmark/run.sh b/benchmark/run.sh new file mode 100755 index 0000000..c4bf31f --- /dev/null +++ b/benchmark/run.sh @@ -0,0 +1,41 @@ +#!/bin/zsh + + +REPEAT=10 +TMP_OVERALL=/tmp/.overall + + +echo +echo "JSON parsing benchmark" +echo "----------------------" +echo " REAPEAT=$REPEAT" +echo + +for prg in \ + test_cdec_json_parser \ + test_gason \ + test_JsonBox \ + test_jsoncpp \ + test_json-cpp \ + test_jsonxx \ + test_libjson \ + test_nosjob \ + test_picojson \ + test_rapidjson \ + test_sajson +do + echo "[$prg]" + sudo sync + sudo sh -c "echo 3 > /proc/sys/vm/drop_caches" + echo > $TMP_OVERALL + for file in `ls -S ../data/*.json`; do + echo "$file:\t$(./benchmark.rb $REPEAT ../src/$prg $file 2>/dev/null | tee -a $TMP_OVERALL | avg | round 2) s" + done + echo "---" + echo "overall:\t$(avg < $TMP_OVERALL | round 2)" + echo " memory:\t$(./memusg.sh ../src/$prg data/1020.json 2>/dev/null)" + echo +done + +rm $TMP_OVERALL + diff --git a/benchmark/run_msgpack.sh b/benchmark/run_msgpack.sh new file mode 100755 index 0000000..7a45df0 --- /dev/null +++ b/benchmark/run_msgpack.sh @@ -0,0 +1,38 @@ +#!/bin/bash + + +REPEAT=10 +TMP_OVERALL=/tmp/.overall_msgpack + + +echo +echo "MSGPACK parsing benchmark" +echo "-------------------------" +echo " REAPEAT=$REPEAT" +echo + +for prg in \ + test_msgpack \ + test_msgpack_streaming \ + test_msgpack_ruby +do + echo "[$prg]" + sudo sync + sudo sh -c "echo 3 > /proc/sys/vm/drop_caches" + echo > $TMP_OVERALL + if [[ $prg == test_msgpack_streaming ]]; then + A="_s" # special format for streaming + else + A="" + fi + for file in `ls -S ../data/*.pak$A`; do + echo "$file:\t$(./benchmark.rb $REPEAT ../src/$prg $file 2>/dev/null | tee -a $TMP_OVERALL | avg | round 2) s" + done + echo "---" + echo "overall:\t$(avg < $TMP_OVERALL | round 2)" + echo " memory:\t$(./memusg.sh ../src/$prg ../data/1020.pak 2>/dev/null)" + echo +done + +rm $TMP_OVERALL + diff --git a/cdec_json_parser/JSON_parser.c b/cdec_json_parser/JSON_parser.c deleted file mode 100644 index 5e392bc..0000000 --- a/cdec_json_parser/JSON_parser.c +++ /dev/null @@ -1,1012 +0,0 @@ -/* JSON_parser.c */ - -/* 2007-08-24 */ - -/* -Copyright (c) 2005 JSON.org - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -The Software shall be used for Good, not Evil. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -*/ - -/* - Callbacks, comments, Unicode handling by Jean Gressmann (jean@0x42.de), 2007-2009. - - For the added features the license above applies also. - - Changelog: - 2009-05-17 - Incorporated benrudiak@googlemail.com fix for UTF16 decoding. - - 2009-05-14 - Fixed float parsing bug related to a locale being set that didn't - use '.' as decimal point character (charles@transmissionbt.com). - - 2008-10-14 - Renamed states.IN to states.IT to avoid name clash which IN macro - defined in windef.h (alexey.pelykh@gmail.com) - - 2008-07-19 - Removed some duplicate code & debugging variable (charles@transmissionbt.com) - - 2008-05-28 - Made JSON_value structure ansi C compliant. This bug was report by - trisk@acm.jhu.edu - - 2008-05-20 - Fixed bug reported by charles@transmissionbt.com where the switching - from static to dynamic parse buffer did not copy the static parse - buffer's content. -*/ - - - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "JSON_parser.h" - -#ifdef _MSC_VER -# if _MSC_VER >= 1400 /* Visual Studio 2005 and up */ -# pragma warning(disable:4996) // unsecure sscanf -# endif -#endif - - -#define true 1 -#define false 0 -#define __ -1 /* the universal error code */ - -/* values chosen so that the object size is approx equal to one page (4K) */ -#ifndef JSON_PARSER_STACK_SIZE -# define JSON_PARSER_STACK_SIZE 128 -#endif - -#ifndef JSON_PARSER_PARSE_BUFFER_SIZE -# define JSON_PARSER_PARSE_BUFFER_SIZE 3500 -#endif - -typedef unsigned short UTF16; - -struct JSON_parser_struct { - JSON_parser_callback callback; - void* ctx; - signed char state, before_comment_state, type, escaped, comment, allow_comments, handle_floats_manually; - UTF16 utf16_high_surrogate; - long depth; - long top; - signed char* stack; - long stack_capacity; - char decimal_point; - char* parse_buffer; - size_t parse_buffer_capacity; - size_t parse_buffer_count; - size_t comment_begin_offset; - signed char static_stack[JSON_PARSER_STACK_SIZE]; - char static_parse_buffer[JSON_PARSER_PARSE_BUFFER_SIZE]; -}; - -#define COUNTOF(x) (sizeof(x)/sizeof(x[0])) - -/* - Characters are mapped into these character classes. This allows for - a significant reduction in the size of the state transition table. -*/ - - - -enum classes { - C_SPACE, /* space */ - C_WHITE, /* other whitespace */ - C_LCURB, /* { */ - C_RCURB, /* } */ - C_LSQRB, /* [ */ - C_RSQRB, /* ] */ - C_COLON, /* : */ - C_COMMA, /* , */ - C_QUOTE, /* " */ - C_BACKS, /* \ */ - C_SLASH, /* / */ - C_PLUS, /* + */ - C_MINUS, /* - */ - C_POINT, /* . */ - C_ZERO , /* 0 */ - C_DIGIT, /* 123456789 */ - C_LOW_A, /* a */ - C_LOW_B, /* b */ - C_LOW_C, /* c */ - C_LOW_D, /* d */ - C_LOW_E, /* e */ - C_LOW_F, /* f */ - C_LOW_L, /* l */ - C_LOW_N, /* n */ - C_LOW_R, /* r */ - C_LOW_S, /* s */ - C_LOW_T, /* t */ - C_LOW_U, /* u */ - C_ABCDF, /* ABCDF */ - C_E, /* E */ - C_ETC, /* everything else */ - C_STAR, /* * */ - NR_CLASSES -}; - -static int ascii_class[128] = { -/* - This array maps the 128 ASCII characters into character classes. - The remaining Unicode characters should be mapped to C_ETC. - Non-whitespace control characters are errors. -*/ - __, __, __, __, __, __, __, __, - __, C_WHITE, C_WHITE, __, __, C_WHITE, __, __, - __, __, __, __, __, __, __, __, - __, __, __, __, __, __, __, __, - - C_SPACE, C_ETC, C_QUOTE, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, - C_ETC, C_ETC, C_STAR, C_PLUS, C_COMMA, C_MINUS, C_POINT, C_SLASH, - C_ZERO, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, - C_DIGIT, C_DIGIT, C_COLON, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, - - C_ETC, C_ABCDF, C_ABCDF, C_ABCDF, C_ABCDF, C_E, C_ABCDF, C_ETC, - C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, - C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, - C_ETC, C_ETC, C_ETC, C_LSQRB, C_BACKS, C_RSQRB, C_ETC, C_ETC, - - C_ETC, C_LOW_A, C_LOW_B, C_LOW_C, C_LOW_D, C_LOW_E, C_LOW_F, C_ETC, - C_ETC, C_ETC, C_ETC, C_ETC, C_LOW_L, C_ETC, C_LOW_N, C_ETC, - C_ETC, C_ETC, C_LOW_R, C_LOW_S, C_LOW_T, C_LOW_U, C_ETC, C_ETC, - C_ETC, C_ETC, C_ETC, C_LCURB, C_ETC, C_RCURB, C_ETC, C_ETC -}; - - -/* - The state codes. -*/ -enum states { - GO, /* start */ - OK, /* ok */ - OB, /* object */ - KE, /* key */ - CO, /* colon */ - VA, /* value */ - AR, /* array */ - ST, /* string */ - ES, /* escape */ - U1, /* u1 */ - U2, /* u2 */ - U3, /* u3 */ - U4, /* u4 */ - MI, /* minus */ - ZE, /* zero */ - IT, /* integer */ - FR, /* fraction */ - E1, /* e */ - E2, /* ex */ - E3, /* exp */ - T1, /* tr */ - T2, /* tru */ - T3, /* true */ - F1, /* fa */ - F2, /* fal */ - F3, /* fals */ - F4, /* false */ - N1, /* nu */ - N2, /* nul */ - N3, /* null */ - C1, /* / */ - C2, /* / * */ - C3, /* * */ - FX, /* *.* *eE* */ - D1, /* second UTF-16 character decoding started by \ */ - D2, /* second UTF-16 character proceeded by u */ - NR_STATES -}; - -enum actions -{ - CB = -10, /* comment begin */ - CE = -11, /* comment end */ - FA = -12, /* false */ - TR = -13, /* false */ - NU = -14, /* null */ - DE = -15, /* double detected by exponent e E */ - DF = -16, /* double detected by fraction . */ - SB = -17, /* string begin */ - MX = -18, /* integer detected by minus */ - ZX = -19, /* integer detected by zero */ - IX = -20, /* integer detected by 1-9 */ - EX = -21, /* next char is escaped */ - UC = -22 /* Unicode character read */ -}; - - -static int state_transition_table[NR_STATES][NR_CLASSES] = { -/* - The state transition table takes the current state and the current symbol, - and returns either a new state or an action. An action is represented as a - negative number. A JSON text is accepted if at the end of the text the - state is OK and if the mode is MODE_DONE. - - white 1-9 ABCDF etc - space | { } [ ] : , " \ / + - . 0 | a b c d e f l n r s t u | E | * */ -/*start GO*/ {GO,GO,-6,__,-5,__,__,__,__,__,CB,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, -/*ok OK*/ {OK,OK,__,-8,__,-7,__,-3,__,__,CB,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, -/*object OB*/ {OB,OB,__,-9,__,__,__,__,SB,__,CB,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, -/*key KE*/ {KE,KE,__,__,__,__,__,__,SB,__,CB,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, -/*colon CO*/ {CO,CO,__,__,__,__,-2,__,__,__,CB,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, -/*value VA*/ {VA,VA,-6,__,-5,__,__,__,SB,__,CB,__,MX,__,ZX,IX,__,__,__,__,__,FA,__,NU,__,__,TR,__,__,__,__,__}, -/*array AR*/ {AR,AR,-6,__,-5,-7,__,__,SB,__,CB,__,MX,__,ZX,IX,__,__,__,__,__,FA,__,NU,__,__,TR,__,__,__,__,__}, -/*string ST*/ {ST,__,ST,ST,ST,ST,ST,ST,-4,EX,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST}, -/*escape ES*/ {__,__,__,__,__,__,__,__,ST,ST,ST,__,__,__,__,__,__,ST,__,__,__,ST,__,ST,ST,__,ST,U1,__,__,__,__}, -/*u1 U1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,U2,U2,U2,U2,U2,U2,U2,U2,__,__,__,__,__,__,U2,U2,__,__}, -/*u2 U2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,U3,U3,U3,U3,U3,U3,U3,U3,__,__,__,__,__,__,U3,U3,__,__}, -/*u3 U3*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,U4,U4,U4,U4,U4,U4,U4,U4,__,__,__,__,__,__,U4,U4,__,__}, -/*u4 U4*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,UC,UC,UC,UC,UC,UC,UC,UC,__,__,__,__,__,__,UC,UC,__,__}, -/*minus MI*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,ZE,IT,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, -/*zero ZE*/ {OK,OK,__,-8,__,-7,__,-3,__,__,CB,__,__,DF,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, -/*int IT*/ {OK,OK,__,-8,__,-7,__,-3,__,__,CB,__,__,DF,IT,IT,__,__,__,__,DE,__,__,__,__,__,__,__,__,DE,__,__}, -/*frac FR*/ {OK,OK,__,-8,__,-7,__,-3,__,__,CB,__,__,__,FR,FR,__,__,__,__,E1,__,__,__,__,__,__,__,__,E1,__,__}, -/*e E1*/ {__,__,__,__,__,__,__,__,__,__,__,E2,E2,__,E3,E3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, -/*ex E2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,E3,E3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, -/*exp E3*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,__,E3,E3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, -/*tr T1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,T2,__,__,__,__,__,__,__}, -/*tru T2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,T3,__,__,__,__}, -/*true T3*/ {__,__,__,__,__,__,__,__,__,__,CB,__,__,__,__,__,__,__,__,__,OK,__,__,__,__,__,__,__,__,__,__,__}, -/*fa F1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F2,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, -/*fal F2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F3,__,__,__,__,__,__,__,__,__}, -/*fals F3*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F4,__,__,__,__,__,__}, -/*false F4*/ {__,__,__,__,__,__,__,__,__,__,CB,__,__,__,__,__,__,__,__,__,OK,__,__,__,__,__,__,__,__,__,__,__}, -/*nu N1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,N2,__,__,__,__}, -/*nul N2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,N3,__,__,__,__,__,__,__,__,__}, -/*null N3*/ {__,__,__,__,__,__,__,__,__,__,CB,__,__,__,__,__,__,__,__,__,__,__,OK,__,__,__,__,__,__,__,__,__}, -/*/ C1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,C2}, -/*/* C2*/ {C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C3}, -/** C3*/ {C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,CE,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C3}, -/*_. FX*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,__,FR,FR,__,__,__,__,E1,__,__,__,__,__,__,__,__,E1,__,__}, -/*\ D1*/ {__,__,__,__,__,__,__,__,__,D2,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, -/*\ D2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,U1,__,__,__,__}, -}; - - -/* - These modes can be pushed on the stack. -*/ -enum modes { - MODE_ARRAY = 1, - MODE_DONE = 2, - MODE_KEY = 3, - MODE_OBJECT = 4 -}; - -static int -push(JSON_parser jc, int mode) -{ -/* - Push a mode onto the stack. Return false if there is overflow. -*/ - jc->top += 1; - if (jc->depth < 0) { - if (jc->top >= jc->stack_capacity) { - size_t bytes_to_allocate; - jc->stack_capacity *= 2; - bytes_to_allocate = jc->stack_capacity * sizeof(jc->static_stack[0]); - if (jc->stack == &jc->static_stack[0]) { - jc->stack = (signed char*)malloc(bytes_to_allocate); - memcpy(jc->stack, jc->static_stack, sizeof(jc->static_stack)); - } else { - jc->stack = (signed char*)realloc(jc->stack, bytes_to_allocate); - } - } - } else { - if (jc->top >= jc->depth) { - return false; - } - } - - jc->stack[jc->top] = mode; - return true; -} - - -static int -pop(JSON_parser jc, int mode) -{ -/* - Pop the stack, assuring that the current mode matches the expectation. - Return false if there is underflow or if the modes mismatch. -*/ - if (jc->top < 0 || jc->stack[jc->top] != mode) { - return false; - } - jc->top -= 1; - return true; -} - - -#define parse_buffer_clear(jc) \ - do {\ - jc->parse_buffer_count = 0;\ - jc->parse_buffer[0] = 0;\ - } while (0) - -#define parse_buffer_pop_back_char(jc)\ - do {\ - assert(jc->parse_buffer_count >= 1);\ - --jc->parse_buffer_count;\ - jc->parse_buffer[jc->parse_buffer_count] = 0;\ - } while (0) - -void delete_JSON_parser(JSON_parser jc) -{ - if (jc) { - if (jc->stack != &jc->static_stack[0]) { - free((void*)jc->stack); - } - if (jc->parse_buffer != &jc->static_parse_buffer[0]) { - free((void*)jc->parse_buffer); - } - free((void*)jc); - } -} - - -JSON_parser -new_JSON_parser(JSON_config* config) -{ -/* - new_JSON_parser starts the checking process by constructing a JSON_parser - object. It takes a depth parameter that restricts the level of maximum - nesting. - - To continue the process, call JSON_parser_char for each character in the - JSON text, and then call JSON_parser_done to obtain the final result. - These functions are fully reentrant. -*/ - - int depth = 0; - JSON_config default_config; - - JSON_parser jc = (JSON_parser)malloc(sizeof(struct JSON_parser_struct)); - - memset(jc, 0, sizeof(*jc)); - - - /* initialize configuration */ - init_JSON_config(&default_config); - - /* set to default configuration if none was provided */ - if (config == NULL) { - config = &default_config; - } - - depth = config->depth; - - /* We need to be able to push at least one object */ - if (depth == 0) { - depth = 1; - } - - jc->state = GO; - jc->top = -1; - - /* Do we want non-bound stack? */ - if (depth > 0) { - jc->stack_capacity = depth; - jc->depth = depth; - if (depth <= (int)COUNTOF(jc->static_stack)) { - jc->stack = &jc->static_stack[0]; - } else { - jc->stack = (signed char*)malloc(jc->stack_capacity * sizeof(jc->static_stack[0])); - } - } else { - jc->stack_capacity = COUNTOF(jc->static_stack); - jc->depth = -1; - jc->stack = &jc->static_stack[0]; - } - - /* set parser to start */ - push(jc, MODE_DONE); - - /* set up the parse buffer */ - jc->parse_buffer = &jc->static_parse_buffer[0]; - jc->parse_buffer_capacity = COUNTOF(jc->static_parse_buffer); - parse_buffer_clear(jc); - - /* set up callback, comment & float handling */ - jc->callback = config->callback; - jc->ctx = config->callback_ctx; - jc->allow_comments = config->allow_comments != 0; - jc->handle_floats_manually = config->handle_floats_manually != 0; - - /* set up decimal point */ - jc->decimal_point = *localeconv()->decimal_point; - - return jc; -} - -static void grow_parse_buffer(JSON_parser jc) -{ - size_t bytes_to_allocate; - jc->parse_buffer_capacity *= 2; - bytes_to_allocate = jc->parse_buffer_capacity * sizeof(jc->parse_buffer[0]); - if (jc->parse_buffer == &jc->static_parse_buffer[0]) { - jc->parse_buffer = (char*)malloc(bytes_to_allocate); - memcpy(jc->parse_buffer, jc->static_parse_buffer, jc->parse_buffer_count); - } else { - jc->parse_buffer = (char*)realloc(jc->parse_buffer, bytes_to_allocate); - } -} - -#define parse_buffer_push_back_char(jc, c)\ - do {\ - if (jc->parse_buffer_count + 1 >= jc->parse_buffer_capacity) grow_parse_buffer(jc);\ - jc->parse_buffer[jc->parse_buffer_count++] = c;\ - jc->parse_buffer[jc->parse_buffer_count] = 0;\ - } while (0) - -#define assert_is_non_container_type(jc) \ - assert( \ - jc->type == JSON_T_NULL || \ - jc->type == JSON_T_FALSE || \ - jc->type == JSON_T_TRUE || \ - jc->type == JSON_T_FLOAT || \ - jc->type == JSON_T_INTEGER || \ - jc->type == JSON_T_STRING) - - -static int parse_parse_buffer(JSON_parser jc) -{ - if (jc->callback) { - JSON_value value, *arg = NULL; - - if (jc->type != JSON_T_NONE) { - assert_is_non_container_type(jc); - - switch(jc->type) { - case JSON_T_FLOAT: - arg = &value; - if (jc->handle_floats_manually) { - value.vu.str.value = jc->parse_buffer; - value.vu.str.length = jc->parse_buffer_count; - } else { - /*sscanf(jc->parse_buffer, "%Lf", &value.vu.float_value);*/ - - /* not checking with end pointer b/c there may be trailing ws */ - value.vu.float_value = strtod(jc->parse_buffer, NULL); - } - break; - case JSON_T_INTEGER: - arg = &value; - sscanf(jc->parse_buffer, JSON_PARSER_INTEGER_SSCANF_TOKEN, &value.vu.integer_value); - break; - case JSON_T_STRING: - arg = &value; - value.vu.str.value = jc->parse_buffer; - value.vu.str.length = jc->parse_buffer_count; - break; - } - - if (!(*jc->callback)(jc->ctx, jc->type, arg)) { - return false; - } - } - } - - parse_buffer_clear(jc); - - return true; -} - -#define IS_HIGH_SURROGATE(uc) (((uc) & 0xFC00) == 0xD800) -#define IS_LOW_SURROGATE(uc) (((uc) & 0xFC00) == 0xDC00) -#define DECODE_SURROGATE_PAIR(hi,lo) ((((hi) & 0x3FF) << 10) + ((lo) & 0x3FF) + 0x10000) -static unsigned char utf8_lead_bits[4] = { 0x00, 0xC0, 0xE0, 0xF0 }; - -static int decode_unicode_char(JSON_parser jc) -{ - int i; - unsigned uc = 0; - char* p; - int trail_bytes; - - assert(jc->parse_buffer_count >= 6); - - p = &jc->parse_buffer[jc->parse_buffer_count - 4]; - - for (i = 12; i >= 0; i -= 4, ++p) { - unsigned x = *p; - - if (x >= 'a') { - x -= ('a' - 10); - } else if (x >= 'A') { - x -= ('A' - 10); - } else { - x &= ~0x30u; - } - - assert(x < 16); - - uc |= x << i; - } - - /* clear UTF-16 char from buffer */ - jc->parse_buffer_count -= 6; - jc->parse_buffer[jc->parse_buffer_count] = 0; - - /* attempt decoding ... */ - if (jc->utf16_high_surrogate) { - if (IS_LOW_SURROGATE(uc)) { - uc = DECODE_SURROGATE_PAIR(jc->utf16_high_surrogate, uc); - trail_bytes = 3; - jc->utf16_high_surrogate = 0; - } else { - /* high surrogate without a following low surrogate */ - return false; - } - } else { - if (uc < 0x80) { - trail_bytes = 0; - } else if (uc < 0x800) { - trail_bytes = 1; - } else if (IS_HIGH_SURROGATE(uc)) { - /* save the high surrogate and wait for the low surrogate */ - jc->utf16_high_surrogate = uc; - return true; - } else if (IS_LOW_SURROGATE(uc)) { - /* low surrogate without a preceding high surrogate */ - return false; - } else { - trail_bytes = 2; - } - } - - jc->parse_buffer[jc->parse_buffer_count++] = (char) ((uc >> (trail_bytes * 6)) | utf8_lead_bits[trail_bytes]); - - for (i = trail_bytes * 6 - 6; i >= 0; i -= 6) { - jc->parse_buffer[jc->parse_buffer_count++] = (char) (((uc >> i) & 0x3F) | 0x80); - } - - jc->parse_buffer[jc->parse_buffer_count] = 0; - - return true; -} - -static int add_escaped_char_to_parse_buffer(JSON_parser jc, int next_char) -{ - jc->escaped = 0; - /* remove the backslash */ - parse_buffer_pop_back_char(jc); - switch(next_char) { - case 'b': - parse_buffer_push_back_char(jc, '\b'); - break; - case 'f': - parse_buffer_push_back_char(jc, '\f'); - break; - case 'n': - parse_buffer_push_back_char(jc, '\n'); - break; - case 'r': - parse_buffer_push_back_char(jc, '\r'); - break; - case 't': - parse_buffer_push_back_char(jc, '\t'); - break; - case '"': - parse_buffer_push_back_char(jc, '"'); - break; - case '\\': - parse_buffer_push_back_char(jc, '\\'); - break; - case '/': - parse_buffer_push_back_char(jc, '/'); - break; - case 'u': - parse_buffer_push_back_char(jc, '\\'); - parse_buffer_push_back_char(jc, 'u'); - break; - default: - return false; - } - - return true; -} - -#define add_char_to_parse_buffer(jc, next_char, next_class) \ - do { \ - if (jc->escaped) { \ - if (!add_escaped_char_to_parse_buffer(jc, next_char)) \ - return false; \ - } else if (!jc->comment) { \ - if ((jc->type != JSON_T_NONE) | !((next_class == C_SPACE) | (next_class == C_WHITE)) /* non-white-space */) { \ - parse_buffer_push_back_char(jc, (char)next_char); \ - } \ - } \ - } while (0) - - -#define assert_type_isnt_string_null_or_bool(jc) \ - assert(jc->type != JSON_T_FALSE); \ - assert(jc->type != JSON_T_TRUE); \ - assert(jc->type != JSON_T_NULL); \ - assert(jc->type != JSON_T_STRING) - - -int -JSON_parser_char(JSON_parser jc, int next_char) -{ -/* - After calling new_JSON_parser, call this function for each character (or - partial character) in your JSON text. It can accept UTF-8, UTF-16, or - UTF-32. It returns true if things are looking ok so far. If it rejects the - text, it returns false. -*/ - int next_class, next_state; - -/* - Determine the character's class. -*/ - if (next_char < 0) { - return false; - } - if (next_char >= 128) { - next_class = C_ETC; - } else { - next_class = ascii_class[next_char]; - if (next_class <= __) { - return false; - } - } - - add_char_to_parse_buffer(jc, next_char, next_class); - -/* - Get the next state from the state transition table. -*/ - next_state = state_transition_table[jc->state][next_class]; - if (next_state >= 0) { -/* - Change the state. -*/ - jc->state = next_state; - } else { -/* - Or perform one of the actions. -*/ - switch (next_state) { -/* Unicode character */ - case UC: - if(!decode_unicode_char(jc)) { - return false; - } - /* check if we need to read a second UTF-16 char */ - if (jc->utf16_high_surrogate) { - jc->state = D1; - } else { - jc->state = ST; - } - break; -/* escaped char */ - case EX: - jc->escaped = 1; - jc->state = ES; - break; -/* integer detected by minus */ - case MX: - jc->type = JSON_T_INTEGER; - jc->state = MI; - break; -/* integer detected by zero */ - case ZX: - jc->type = JSON_T_INTEGER; - jc->state = ZE; - break; -/* integer detected by 1-9 */ - case IX: - jc->type = JSON_T_INTEGER; - jc->state = IT; - break; - -/* floating point number detected by exponent*/ - case DE: - assert_type_isnt_string_null_or_bool(jc); - jc->type = JSON_T_FLOAT; - jc->state = E1; - break; - -/* floating point number detected by fraction */ - case DF: - assert_type_isnt_string_null_or_bool(jc); - if (!jc->handle_floats_manually) { -/* - Some versions of strtod (which underlies sscanf) don't support converting - C-locale formated floating point values. -*/ - assert(jc->parse_buffer[jc->parse_buffer_count-1] == '.'); - jc->parse_buffer[jc->parse_buffer_count-1] = jc->decimal_point; - } - jc->type = JSON_T_FLOAT; - jc->state = FX; - break; -/* string begin " */ - case SB: - parse_buffer_clear(jc); - assert(jc->type == JSON_T_NONE); - jc->type = JSON_T_STRING; - jc->state = ST; - break; - -/* n */ - case NU: - assert(jc->type == JSON_T_NONE); - jc->type = JSON_T_NULL; - jc->state = N1; - break; -/* f */ - case FA: - assert(jc->type == JSON_T_NONE); - jc->type = JSON_T_FALSE; - jc->state = F1; - break; -/* t */ - case TR: - assert(jc->type == JSON_T_NONE); - jc->type = JSON_T_TRUE; - jc->state = T1; - break; - -/* closing comment */ - case CE: - jc->comment = 0; - assert(jc->parse_buffer_count == 0); - assert(jc->type == JSON_T_NONE); - jc->state = jc->before_comment_state; - break; - -/* opening comment */ - case CB: - if (!jc->allow_comments) { - return false; - } - parse_buffer_pop_back_char(jc); - if (!parse_parse_buffer(jc)) { - return false; - } - assert(jc->parse_buffer_count == 0); - assert(jc->type != JSON_T_STRING); - switch (jc->stack[jc->top]) { - case MODE_ARRAY: - case MODE_OBJECT: - switch(jc->state) { - case VA: - case AR: - jc->before_comment_state = jc->state; - break; - default: - jc->before_comment_state = OK; - break; - } - break; - default: - jc->before_comment_state = jc->state; - break; - } - jc->type = JSON_T_NONE; - jc->state = C1; - jc->comment = 1; - break; -/* empty } */ - case -9: - parse_buffer_clear(jc); - if (jc->callback && !(*jc->callback)(jc->ctx, JSON_T_OBJECT_END, NULL)) { - return false; - } - if (!pop(jc, MODE_KEY)) { - return false; - } - jc->state = OK; - break; - -/* } */ case -8: - parse_buffer_pop_back_char(jc); - if (!parse_parse_buffer(jc)) { - return false; - } - if (jc->callback && !(*jc->callback)(jc->ctx, JSON_T_OBJECT_END, NULL)) { - return false; - } - if (!pop(jc, MODE_OBJECT)) { - return false; - } - jc->type = JSON_T_NONE; - jc->state = OK; - break; - -/* ] */ case -7: - parse_buffer_pop_back_char(jc); - if (!parse_parse_buffer(jc)) { - return false; - } - if (jc->callback && !(*jc->callback)(jc->ctx, JSON_T_ARRAY_END, NULL)) { - return false; - } - if (!pop(jc, MODE_ARRAY)) { - return false; - } - - jc->type = JSON_T_NONE; - jc->state = OK; - break; - -/* { */ case -6: - parse_buffer_pop_back_char(jc); - if (jc->callback && !(*jc->callback)(jc->ctx, JSON_T_OBJECT_BEGIN, NULL)) { - return false; - } - if (!push(jc, MODE_KEY)) { - return false; - } - assert(jc->type == JSON_T_NONE); - jc->state = OB; - break; - -/* [ */ case -5: - parse_buffer_pop_back_char(jc); - if (jc->callback && !(*jc->callback)(jc->ctx, JSON_T_ARRAY_BEGIN, NULL)) { - return false; - } - if (!push(jc, MODE_ARRAY)) { - return false; - } - assert(jc->type == JSON_T_NONE); - jc->state = AR; - break; - -/* string end " */ case -4: - parse_buffer_pop_back_char(jc); - switch (jc->stack[jc->top]) { - case MODE_KEY: - assert(jc->type == JSON_T_STRING); - jc->type = JSON_T_NONE; - jc->state = CO; - - if (jc->callback) { - JSON_value value; - value.vu.str.value = jc->parse_buffer; - value.vu.str.length = jc->parse_buffer_count; - if (!(*jc->callback)(jc->ctx, JSON_T_KEY, &value)) { - return false; - } - } - parse_buffer_clear(jc); - break; - case MODE_ARRAY: - case MODE_OBJECT: - assert(jc->type == JSON_T_STRING); - if (!parse_parse_buffer(jc)) { - return false; - } - jc->type = JSON_T_NONE; - jc->state = OK; - break; - default: - return false; - } - break; - -/* , */ case -3: - parse_buffer_pop_back_char(jc); - if (!parse_parse_buffer(jc)) { - return false; - } - switch (jc->stack[jc->top]) { - case MODE_OBJECT: -/* - A comma causes a flip from object mode to key mode. -*/ - if (!pop(jc, MODE_OBJECT) || !push(jc, MODE_KEY)) { - return false; - } - assert(jc->type != JSON_T_STRING); - jc->type = JSON_T_NONE; - jc->state = KE; - break; - case MODE_ARRAY: - assert(jc->type != JSON_T_STRING); - jc->type = JSON_T_NONE; - jc->state = VA; - break; - default: - return false; - } - break; - -/* : */ case -2: -/* - A colon causes a flip from key mode to object mode. -*/ - parse_buffer_pop_back_char(jc); - if (!pop(jc, MODE_KEY) || !push(jc, MODE_OBJECT)) { - return false; - } - assert(jc->type == JSON_T_NONE); - jc->state = VA; - break; -/* - Bad action. -*/ - default: - return false; - } - } - return true; -} - - -int -JSON_parser_done(JSON_parser jc) -{ - const int result = jc->state == OK && pop(jc, MODE_DONE); - - return result; -} - - -int JSON_parser_is_legal_white_space_string(const char* s) -{ - int c, char_class; - - if (s == NULL) { - return false; - } - - for (; *s; ++s) { - c = *s; - - if (c < 0 || c >= 128) { - return false; - } - - char_class = ascii_class[c]; - - if (char_class != C_SPACE && char_class != C_WHITE) { - return false; - } - } - - return true; -} - - - -void init_JSON_config(JSON_config* config) -{ - if (config) { - memset(config, 0, sizeof(*config)); - - config->depth = JSON_PARSER_STACK_SIZE - 1; - } -} diff --git a/cdec_json_parser/JSON_parser.h b/cdec_json_parser/JSON_parser.h deleted file mode 100644 index de98007..0000000 --- a/cdec_json_parser/JSON_parser.h +++ /dev/null @@ -1,152 +0,0 @@ -#ifndef JSON_PARSER_H -#define JSON_PARSER_H - -/* JSON_parser.h */ - - -#include - -/* Windows DLL stuff */ -#ifdef _WIN32 -# ifdef JSON_PARSER_DLL_EXPORTS -# define JSON_PARSER_DLL_API __declspec(dllexport) -# else -# define JSON_PARSER_DLL_API __declspec(dllimport) -# endif -#else -# define JSON_PARSER_DLL_API -#endif - -/* Determine the integer type use to parse non-floating point numbers */ -#if __STDC_VERSION__ >= 199901L || HAVE_LONG_LONG == 1 -typedef long long JSON_int_t; -#define JSON_PARSER_INTEGER_SSCANF_TOKEN "%lld" -#define JSON_PARSER_INTEGER_SPRINTF_TOKEN "%lld" -#else -typedef long JSON_int_t; -#define JSON_PARSER_INTEGER_SSCANF_TOKEN "%ld" -#define JSON_PARSER_INTEGER_SPRINTF_TOKEN "%ld" -#endif - - -#ifdef __cplusplus -extern "C" { -#endif - -typedef enum -{ - JSON_T_NONE = 0, - JSON_T_ARRAY_BEGIN, // 1 - JSON_T_ARRAY_END, // 2 - JSON_T_OBJECT_BEGIN, // 3 - JSON_T_OBJECT_END, // 4 - JSON_T_INTEGER, // 5 - JSON_T_FLOAT, // 6 - JSON_T_NULL, // 7 - JSON_T_TRUE, // 8 - JSON_T_FALSE, // 9 - JSON_T_STRING, // 10 - JSON_T_KEY, // 11 - JSON_T_MAX // 12 -} JSON_type; - -typedef struct JSON_value_struct { - union { - JSON_int_t integer_value; - - double float_value; - - struct { - const char* value; - size_t length; - } str; - } vu; -} JSON_value; - -typedef struct JSON_parser_struct* JSON_parser; - -/*! \brief JSON parser callback - - \param ctx The pointer passed to new_JSON_parser. - \param type An element of JSON_type but not JSON_T_NONE. - \param value A representation of the parsed value. This parameter is NULL for - JSON_T_ARRAY_BEGIN, JSON_T_ARRAY_END, JSON_T_OBJECT_BEGIN, JSON_T_OBJECT_END, - JSON_T_NULL, JSON_T_TRUE, and SON_T_FALSE. String values are always returned - as zero-terminated C strings. - - \return Non-zero if parsing should continue, else zero. -*/ -typedef int (*JSON_parser_callback)(void* ctx, int type, const struct JSON_value_struct* value); - - -/*! \brief The structure used to configure a JSON parser object - - \param depth If negative, the parser can parse arbitrary levels of JSON, otherwise - the depth is the limit - \param Pointer to a callback. This parameter may be NULL. In this case the input is merely checked for validity. - \param Callback context. This parameter may be NULL. - \param depth. Specifies the levels of nested JSON to allow. Negative numbers yield unlimited nesting. - \param allowComments. To allow C style comments in JSON, set to non-zero. - \param handleFloatsManually. To decode floating point numbers manually set this parameter to non-zero. - - \return The parser object. -*/ -typedef struct { - JSON_parser_callback callback; - void* callback_ctx; - int depth; - int allow_comments; - int handle_floats_manually; -} JSON_config; - - -/*! \brief Initializes the JSON parser configuration structure to default values. - - The default configuration is - - 127 levels of nested JSON (depends on JSON_PARSER_STACK_SIZE, see json_parser.c) - - no parsing, just checking for JSON syntax - - no comments - - \param config. Used to configure the parser. -*/ -JSON_PARSER_DLL_API void init_JSON_config(JSON_config* config); - -/*! \brief Create a JSON parser object - - \param config. Used to configure the parser. Set to NULL to use the default configuration. - See init_JSON_config - - \return The parser object. -*/ -JSON_PARSER_DLL_API extern JSON_parser new_JSON_parser(JSON_config* config); - -/*! \brief Destroy a previously created JSON parser object. */ -JSON_PARSER_DLL_API extern void delete_JSON_parser(JSON_parser jc); - -/*! \brief Parse a character. - - \return Non-zero, if all characters passed to this function are part of are valid JSON. -*/ -JSON_PARSER_DLL_API extern int JSON_parser_char(JSON_parser jc, int next_char); - -/*! \brief Finalize parsing. - - Call this method once after all input characters have been consumed. - - \return Non-zero, if all parsed characters are valid JSON, zero otherwise. -*/ -JSON_PARSER_DLL_API extern int JSON_parser_done(JSON_parser jc); - -/*! \brief Determine if a given string is valid JSON white space - - \return Non-zero if the string is valid, zero otherwise. -*/ -JSON_PARSER_DLL_API extern int JSON_parser_is_legal_white_space_string(const char* s); - - -#ifdef __cplusplus -} -#endif - - -#endif /* JSON_PARSER_H */ diff --git a/cdec_json_parser/LICENSE b/cdec_json_parser/LICENSE deleted file mode 100644 index a390938..0000000 --- a/cdec_json_parser/LICENSE +++ /dev/null @@ -1,213 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - ----------------------------------------------- - -L-BFGS CODE FROM COMPUTATIONAL CRYSTALLOGRAPHY TOOLBOX (CCTBX) - -This package includes source code (training/lbfgs.h) based on source -code distributed as part of the Compational Crystallography Toolbox -(CCTBX), which has separate copyright notices and license terms. Use of -this source code is subject to the terms and conditions of the license -contained in the file LICENSE.cctbx . - diff --git a/cdec_json_parser/Makefile b/cdec_json_parser/Makefile deleted file mode 100644 index c976aba..0000000 --- a/cdec_json_parser/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -all: - gcc JSON_parser.c -c - g++ json_parse.cc -c - -clean: - rm *.o - diff --git a/cdec_json_parser/json_parse.cc b/cdec_json_parser/json_parse.cc deleted file mode 100644 index 4580fc8..0000000 --- a/cdec_json_parser/json_parse.cc +++ /dev/null @@ -1,31 +0,0 @@ -#include "json_parse.h" - -#include -#include - -using namespace std; - - -bool JSONParser::HandleJSONEvent(int type, const JSON_value* value) { - switch(type) { - case JSON_T_OBJECT_BEGIN: - case JSON_T_OBJECT_END: - case JSON_T_ARRAY_BEGIN: - case JSON_T_ARRAY_END: - case JSON_T_NULL: - case JSON_T_TRUE: - case JSON_T_FALSE: - case JSON_T_KEY: - case JSON_T_INTEGER: - case JSON_T_FLOAT: - break; - case JSON_T_STRING: - string s = value->vu.str.value; - string t = s.substr(1, 4); - if (t == "Goal") - cerr << t << endl; - break; - } - return true; -} - diff --git a/cdec_json_parser/json_parse.h b/cdec_json_parser/json_parse.h deleted file mode 100644 index 80c037b..0000000 --- a/cdec_json_parser/json_parse.h +++ /dev/null @@ -1,62 +0,0 @@ -#ifndef _JSON_WRAPPER_H_ -#define _JSON_WRAPPER_H_ - -#include -#include -#include "JSON_parser.h" - -class JSONParser { - public: - JSONParser() { - state = -1; - init_JSON_config(&config); - hack.mf = &JSONParser::Callback; - config.depth = 10; - config.callback_ctx = reinterpret_cast(this); - config.callback = hack.cb; - config.allow_comments = 1; - config.handle_floats_manually = 1; - jc = new_JSON_parser(&config); - } - virtual ~JSONParser() { - delete_JSON_parser(jc); - } - bool Parse(std::istream* in) { - int count = 0; - int lc = 1; - for (; in ; ++count) { - int next_char = in->get(); - if (!in->good()) break; - if (lc == '\n') { ++lc; } - if (!JSON_parser_char(jc, next_char)) { - std::cerr << "JSON_parser_char: syntax error, line " << lc << " (byte " << count << ")" << std::endl; - return false; - } - } - if (!JSON_parser_done(jc)) { - std::cerr << "JSON_parser_done: syntax error\n"; - return false; - } - return true; - } - static void WriteEscapedString(const std::string& in, std::ostream* out); - protected: - bool HandleJSONEvent(int type, const JSON_value* value); - private: - int state; - std::string cur_key; - std::string cat; - int Callback(int type, const JSON_value* value) { - if (HandleJSONEvent(type, value)) return 1; - return 0; - } - JSON_parser_struct* jc; - JSON_config config; - typedef int (JSONParser::* MF)(int type, const struct JSON_value_struct* value); - union CBHack { - JSON_parser_callback cb; - MF mf; - } hack; -}; - -#endif diff --git a/data/Makefile b/data/Makefile deleted file mode 100644 index 24d85a3..0000000 --- a/data/Makefile +++ /dev/null @@ -1,9 +0,0 @@ -make_paks: make_paks.cc - g++ -std=c++11 make_paks.cc -I../msgpack-c/include/ ../msgpack-c/lib/libmsgpack.a -o make_paks - -make_paks2: make_paks2.cc - g++ -std=c++11 make_paks2.cc -I../msgpack-c/include/ ../msgpack-c/lib/libmsgpack.a -o make_paks2 - -clean: - rm -f make_paks - diff --git a/data/cdec.ini b/data/cdec.ini deleted file mode 100644 index ddbe54c..0000000 --- a/data/cdec.ini +++ /dev/null @@ -1,4 +0,0 @@ -formalism=scfg -intersection_strategy=full -add_pass_through_rules=true - diff --git a/data/make.sh b/data/make.sh deleted file mode 100755 index 5e0c31b..0000000 --- a/data/make.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/zsh - - -# wmt/14/newstest2008 data -for i in 1020 1391 1495 1570 1889 2002 429 748; do - ~/src/weaver/util/cdec2json.py -c cdec.ini -w weights.init -g grammar.$i.gz < $i.in | ./to_ascii.rb > $i.json -done - diff --git a/data/make_paks.cc b/data/make_paks.cc deleted file mode 100644 index ca6c9b2..0000000 --- a/data/make_paks.cc +++ /dev/null @@ -1,125 +0,0 @@ -#include -#include -#include -#include -#include -#include - - -/* - * https://github.com/ascheglov/json-cpp - * - */ -#include "../json-cpp.hpp" - -using namespace std; - - -struct Node { - int id; - string cat; - vector span; - - MSGPACK_DEFINE(id, cat, span); -}; - -struct Vector { - double CountEF; - double EgivenFCoherent; - double Glue; - double IsSingletonF; - double IsSingletonFE; - double LanguageModel; - double LanguageModel_OOV; - double MaxLexFgivenE; - double MaxLexEgivenF; - double PassThrough; - double PassThrough_1; - double PassThrough_2; - double PassThrough_3; - double PassThrough_4; - double PassThrough_5; - double PassThrough_6; - double SampleCountF; - double WordPenalty; - - MSGPACK_DEFINE(CountEF, EgivenFCoherent, Glue, IsSingletonF, IsSingletonFE, LanguageModel, LanguageModel_OOV, MaxLexEgivenF, MaxLexFgivenE, PassThrough, PassThrough_1, PassThrough_2, PassThrough_3, PassThrough_4, PassThrough_5, PassThrough_6, SampleCountF, WordPenalty); -}; - -struct Edge { - int head; - string rule; - vector tails; - Vector f; - double weight; - - MSGPACK_DEFINE(head, rule, tails, f, weight); -}; - -struct Hg { - Vector weights; - vector nodes; - vector edges; - - MSGPACK_DEFINE(weights, nodes, edges); -}; - -template inline void -serialize(jsoncpp::Stream& stream, Hg& o) -{ - fields(o, stream, "weights", o.weights, "nodes", o.nodes, "edges", o.edges); -} - -template inline void -serialize(jsoncpp::Stream& stream, Edge& o) -{ - fields(o, stream, "head", o.head, "rule", o.rule, "tails", o.tails, "f", o.f, "weight", o.weight); -} - -template inline void -serialize(jsoncpp::Stream& stream, Vector& o) -{ - fields(o, stream, "EgivenFCoherent", o.EgivenFCoherent, "SampleCountF", o.SampleCountF, "CountEF", o.CountEF, "MaxLexFgivenE", o.MaxLexFgivenE, "MaxLexEgivenF", o.MaxLexEgivenF, "IsSingletonF", o.IsSingletonF, "IsSingletonFE", o.IsSingletonFE, "LanguageModel", o.LanguageModel, "LanguageModel_OOV", o.LanguageModel_OOV, "PassThrough", o.PassThrough, "PassThrough_1", o.PassThrough_1, "PassThrough_2", o.PassThrough_2, "PassThrough_3", o.PassThrough_3, "PassThrough_4", o.PassThrough_4, "PassThrough_5", o.PassThrough_5, "PassThrough_6", o.PassThrough_6, "WordPenalty", o.WordPenalty, "Glue", o.Glue); -} - -template inline void -serialize(jsoncpp::Stream& stream, Node& o) -{ - fields(o, stream, "id", o.id, "cat", o.cat, "span", o.span); -} - -int -main(int argc, char** argv) -{ - ifstream ifs(argv[1]); - string json_str((istreambuf_iterator(ifs) ), - (istreambuf_iterator())); - - Hg hg; - Vector w; - hg.weights = w; - vector nodes; - hg.nodes = nodes; - vector edges; - hg.edges = edges; - jsoncpp::parse(hg, json_str); - - FILE* file = fopen(argv[2], "wb"); - msgpack::fbuffer fbuf(file); - msgpack::pack(fbuf, hg); - fclose(file); - - /*ifstream ifs1(argv[2]); - string str1((istreambuf_iterator(jfs1)), - (istreambuf_iterator())); - - msgpack::zone zone; - msgpack::object obj; - msgpack::unpack(str1.data(), str1.size(), NULL, &zone, &obj); - - Hg hg; - obj.convert(&hg);*/ - - return 0; -} - diff --git a/data/make_paks2.cc b/data/make_paks2.cc deleted file mode 100644 index 1b5895b..0000000 --- a/data/make_paks2.cc +++ /dev/null @@ -1,121 +0,0 @@ -#include -#include -#include -#include -#include -#include - - -/* - * https://github.com/ascheglov/json-cpp - * - */ -#include "../json-cpp.hpp" - -using namespace std; - - -struct Node { - int id; - string cat; - vector span; - - MSGPACK_DEFINE(id, cat, span); -}; - -struct Vector { - double CountEF; - double EgivenFCoherent; - double Glue; - double IsSingletonF; - double IsSingletonFE; - double LanguageModel; - double LanguageModel_OOV; - double MaxLexFgivenE; - double MaxLexEgivenF; - double PassThrough; - double PassThrough_1; - double PassThrough_2; - double PassThrough_3; - double PassThrough_4; - double PassThrough_5; - double PassThrough_6; - double SampleCountF; - double WordPenalty; - - MSGPACK_DEFINE(CountEF, EgivenFCoherent, Glue, IsSingletonF, IsSingletonFE, LanguageModel, LanguageModel_OOV, MaxLexEgivenF, MaxLexFgivenE, PassThrough, PassThrough_1, PassThrough_2, PassThrough_3, PassThrough_4, PassThrough_5, PassThrough_6, SampleCountF, WordPenalty); -}; - -struct Edge { - int head; - string rule; - vector tails; - Vector f; - double weight; - - MSGPACK_DEFINE(head, rule, tails, f, weight); -}; - -struct Hg { - Vector weights; - vector nodes; - vector edges; - - MSGPACK_DEFINE(weights, nodes, edges); -}; - -template inline void -serialize(jsoncpp::Stream& stream, Hg& o) -{ - fields(o, stream, "weights", o.weights, "nodes", o.nodes, "edges", o.edges); -} - -template inline void -serialize(jsoncpp::Stream& stream, Edge& o) -{ - fields(o, stream, "head", o.head, "rule", o.rule, "tails", o.tails, "f", o.f, "weight", o.weight); -} - -template inline void -serialize(jsoncpp::Stream& stream, Vector& o) -{ - fields(o, stream, "EgivenFCoherent", o.EgivenFCoherent, "SampleCountF", o.SampleCountF, "CountEF", o.CountEF, "MaxLexFgivenE", o.MaxLexFgivenE, "MaxLexEgivenF", o.MaxLexEgivenF, "IsSingletonF", o.IsSingletonF, "IsSingletonFE", o.IsSingletonFE, "LanguageModel", o.LanguageModel, "LanguageModel_OOV", o.LanguageModel_OOV, "PassThrough", o.PassThrough, "PassThrough_1", o.PassThrough_1, "PassThrough_2", o.PassThrough_2, "PassThrough_3", o.PassThrough_3, "PassThrough_4", o.PassThrough_4, "PassThrough_5", o.PassThrough_5, "PassThrough_6", o.PassThrough_6, "WordPenalty", o.WordPenalty, "Glue", o.Glue); -} - -template inline void -serialize(jsoncpp::Stream& stream, Node& o) -{ - fields(o, stream, "id", o.id, "cat", o.cat, "span", o.span); -} - -int -main(int argc, char** argv) -{ - ifstream ifs(argv[1]); - string json_str((istreambuf_iterator(ifs) ), - (istreambuf_iterator())); - - Hg hg; - Vector w; - hg.weights = w; - vector nodes; - hg.nodes = nodes; - vector edges; - hg.edges = edges; - jsoncpp::parse(hg, json_str); - - FILE* file = fopen(argv[2], "wb"); - msgpack::fbuffer fbuf(file); - msgpack::pack(fbuf, hg.nodes.size()); - msgpack::pack(fbuf, hg.edges.size()); - msgpack::pack(fbuf, hg.weights); - for (auto it = hg.nodes.begin(); it != hg.nodes.end(); it++) - msgpack::pack(fbuf, *it); - for (auto it = hg.edges.begin(); it != hg.edges.end(); it++) - msgpack::pack(fbuf, *it); - - fclose(file); - - return 0; -} - diff --git a/data/to_ascii.rb b/data/to_ascii.rb deleted file mode 100755 index 6c1d23e..0000000 --- a/data/to_ascii.rb +++ /dev/null @@ -1,13 +0,0 @@ -#!/usr/bin/env ruby - - -while line = STDIN.gets - encoding_options = { - :invalid => :replace, - :undef => :replace, - :replace => '?', - :universal_newline => true - } - puts line.encode 'ASCII', encoding_options -end - diff --git a/data/weights.init b/data/weights.init deleted file mode 100644 index 0d09f9f..0000000 --- a/data/weights.init +++ /dev/null @@ -1,12 +0,0 @@ -CountEF 0.1 -EgivenFCoherent -0.1 -Glue 0.01 -IsSingletonF -0.01 -IsSingletonFE -0.01 -LanguageModel 0.1 -LanguageModel_OOV -1 -MaxLexFgivenE -0.1 -MaxLexEgivenF -0.1 -PassThrough -0.1 -SampleCountF -0.1 -WordPenalty -0.1 diff --git a/lib/JsonBox b/lib/JsonBox new file mode 160000 index 0000000..fcb82eb --- /dev/null +++ b/lib/JsonBox @@ -0,0 +1 @@ +Subproject commit fcb82ebae41dffb90d32a49ac236d1608d9a67ee diff --git a/lib/cdec_json_parser/JSON_parser.c b/lib/cdec_json_parser/JSON_parser.c new file mode 100644 index 0000000..5e392bc --- /dev/null +++ b/lib/cdec_json_parser/JSON_parser.c @@ -0,0 +1,1012 @@ +/* JSON_parser.c */ + +/* 2007-08-24 */ + +/* +Copyright (c) 2005 JSON.org + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +The Software shall be used for Good, not Evil. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +/* + Callbacks, comments, Unicode handling by Jean Gressmann (jean@0x42.de), 2007-2009. + + For the added features the license above applies also. + + Changelog: + 2009-05-17 + Incorporated benrudiak@googlemail.com fix for UTF16 decoding. + + 2009-05-14 + Fixed float parsing bug related to a locale being set that didn't + use '.' as decimal point character (charles@transmissionbt.com). + + 2008-10-14 + Renamed states.IN to states.IT to avoid name clash which IN macro + defined in windef.h (alexey.pelykh@gmail.com) + + 2008-07-19 + Removed some duplicate code & debugging variable (charles@transmissionbt.com) + + 2008-05-28 + Made JSON_value structure ansi C compliant. This bug was report by + trisk@acm.jhu.edu + + 2008-05-20 + Fixed bug reported by charles@transmissionbt.com where the switching + from static to dynamic parse buffer did not copy the static parse + buffer's content. +*/ + + + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "JSON_parser.h" + +#ifdef _MSC_VER +# if _MSC_VER >= 1400 /* Visual Studio 2005 and up */ +# pragma warning(disable:4996) // unsecure sscanf +# endif +#endif + + +#define true 1 +#define false 0 +#define __ -1 /* the universal error code */ + +/* values chosen so that the object size is approx equal to one page (4K) */ +#ifndef JSON_PARSER_STACK_SIZE +# define JSON_PARSER_STACK_SIZE 128 +#endif + +#ifndef JSON_PARSER_PARSE_BUFFER_SIZE +# define JSON_PARSER_PARSE_BUFFER_SIZE 3500 +#endif + +typedef unsigned short UTF16; + +struct JSON_parser_struct { + JSON_parser_callback callback; + void* ctx; + signed char state, before_comment_state, type, escaped, comment, allow_comments, handle_floats_manually; + UTF16 utf16_high_surrogate; + long depth; + long top; + signed char* stack; + long stack_capacity; + char decimal_point; + char* parse_buffer; + size_t parse_buffer_capacity; + size_t parse_buffer_count; + size_t comment_begin_offset; + signed char static_stack[JSON_PARSER_STACK_SIZE]; + char static_parse_buffer[JSON_PARSER_PARSE_BUFFER_SIZE]; +}; + +#define COUNTOF(x) (sizeof(x)/sizeof(x[0])) + +/* + Characters are mapped into these character classes. This allows for + a significant reduction in the size of the state transition table. +*/ + + + +enum classes { + C_SPACE, /* space */ + C_WHITE, /* other whitespace */ + C_LCURB, /* { */ + C_RCURB, /* } */ + C_LSQRB, /* [ */ + C_RSQRB, /* ] */ + C_COLON, /* : */ + C_COMMA, /* , */ + C_QUOTE, /* " */ + C_BACKS, /* \ */ + C_SLASH, /* / */ + C_PLUS, /* + */ + C_MINUS, /* - */ + C_POINT, /* . */ + C_ZERO , /* 0 */ + C_DIGIT, /* 123456789 */ + C_LOW_A, /* a */ + C_LOW_B, /* b */ + C_LOW_C, /* c */ + C_LOW_D, /* d */ + C_LOW_E, /* e */ + C_LOW_F, /* f */ + C_LOW_L, /* l */ + C_LOW_N, /* n */ + C_LOW_R, /* r */ + C_LOW_S, /* s */ + C_LOW_T, /* t */ + C_LOW_U, /* u */ + C_ABCDF, /* ABCDF */ + C_E, /* E */ + C_ETC, /* everything else */ + C_STAR, /* * */ + NR_CLASSES +}; + +static int ascii_class[128] = { +/* + This array maps the 128 ASCII characters into character classes. + The remaining Unicode characters should be mapped to C_ETC. + Non-whitespace control characters are errors. +*/ + __, __, __, __, __, __, __, __, + __, C_WHITE, C_WHITE, __, __, C_WHITE, __, __, + __, __, __, __, __, __, __, __, + __, __, __, __, __, __, __, __, + + C_SPACE, C_ETC, C_QUOTE, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, + C_ETC, C_ETC, C_STAR, C_PLUS, C_COMMA, C_MINUS, C_POINT, C_SLASH, + C_ZERO, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, + C_DIGIT, C_DIGIT, C_COLON, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, + + C_ETC, C_ABCDF, C_ABCDF, C_ABCDF, C_ABCDF, C_E, C_ABCDF, C_ETC, + C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, + C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, + C_ETC, C_ETC, C_ETC, C_LSQRB, C_BACKS, C_RSQRB, C_ETC, C_ETC, + + C_ETC, C_LOW_A, C_LOW_B, C_LOW_C, C_LOW_D, C_LOW_E, C_LOW_F, C_ETC, + C_ETC, C_ETC, C_ETC, C_ETC, C_LOW_L, C_ETC, C_LOW_N, C_ETC, + C_ETC, C_ETC, C_LOW_R, C_LOW_S, C_LOW_T, C_LOW_U, C_ETC, C_ETC, + C_ETC, C_ETC, C_ETC, C_LCURB, C_ETC, C_RCURB, C_ETC, C_ETC +}; + + +/* + The state codes. +*/ +enum states { + GO, /* start */ + OK, /* ok */ + OB, /* object */ + KE, /* key */ + CO, /* colon */ + VA, /* value */ + AR, /* array */ + ST, /* string */ + ES, /* escape */ + U1, /* u1 */ + U2, /* u2 */ + U3, /* u3 */ + U4, /* u4 */ + MI, /* minus */ + ZE, /* zero */ + IT, /* integer */ + FR, /* fraction */ + E1, /* e */ + E2, /* ex */ + E3, /* exp */ + T1, /* tr */ + T2, /* tru */ + T3, /* true */ + F1, /* fa */ + F2, /* fal */ + F3, /* fals */ + F4, /* false */ + N1, /* nu */ + N2, /* nul */ + N3, /* null */ + C1, /* / */ + C2, /* / * */ + C3, /* * */ + FX, /* *.* *eE* */ + D1, /* second UTF-16 character decoding started by \ */ + D2, /* second UTF-16 character proceeded by u */ + NR_STATES +}; + +enum actions +{ + CB = -10, /* comment begin */ + CE = -11, /* comment end */ + FA = -12, /* false */ + TR = -13, /* false */ + NU = -14, /* null */ + DE = -15, /* double detected by exponent e E */ + DF = -16, /* double detected by fraction . */ + SB = -17, /* string begin */ + MX = -18, /* integer detected by minus */ + ZX = -19, /* integer detected by zero */ + IX = -20, /* integer detected by 1-9 */ + EX = -21, /* next char is escaped */ + UC = -22 /* Unicode character read */ +}; + + +static int state_transition_table[NR_STATES][NR_CLASSES] = { +/* + The state transition table takes the current state and the current symbol, + and returns either a new state or an action. An action is represented as a + negative number. A JSON text is accepted if at the end of the text the + state is OK and if the mode is MODE_DONE. + + white 1-9 ABCDF etc + space | { } [ ] : , " \ / + - . 0 | a b c d e f l n r s t u | E | * */ +/*start GO*/ {GO,GO,-6,__,-5,__,__,__,__,__,CB,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, +/*ok OK*/ {OK,OK,__,-8,__,-7,__,-3,__,__,CB,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, +/*object OB*/ {OB,OB,__,-9,__,__,__,__,SB,__,CB,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, +/*key KE*/ {KE,KE,__,__,__,__,__,__,SB,__,CB,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, +/*colon CO*/ {CO,CO,__,__,__,__,-2,__,__,__,CB,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, +/*value VA*/ {VA,VA,-6,__,-5,__,__,__,SB,__,CB,__,MX,__,ZX,IX,__,__,__,__,__,FA,__,NU,__,__,TR,__,__,__,__,__}, +/*array AR*/ {AR,AR,-6,__,-5,-7,__,__,SB,__,CB,__,MX,__,ZX,IX,__,__,__,__,__,FA,__,NU,__,__,TR,__,__,__,__,__}, +/*string ST*/ {ST,__,ST,ST,ST,ST,ST,ST,-4,EX,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST}, +/*escape ES*/ {__,__,__,__,__,__,__,__,ST,ST,ST,__,__,__,__,__,__,ST,__,__,__,ST,__,ST,ST,__,ST,U1,__,__,__,__}, +/*u1 U1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,U2,U2,U2,U2,U2,U2,U2,U2,__,__,__,__,__,__,U2,U2,__,__}, +/*u2 U2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,U3,U3,U3,U3,U3,U3,U3,U3,__,__,__,__,__,__,U3,U3,__,__}, +/*u3 U3*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,U4,U4,U4,U4,U4,U4,U4,U4,__,__,__,__,__,__,U4,U4,__,__}, +/*u4 U4*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,UC,UC,UC,UC,UC,UC,UC,UC,__,__,__,__,__,__,UC,UC,__,__}, +/*minus MI*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,ZE,IT,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, +/*zero ZE*/ {OK,OK,__,-8,__,-7,__,-3,__,__,CB,__,__,DF,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, +/*int IT*/ {OK,OK,__,-8,__,-7,__,-3,__,__,CB,__,__,DF,IT,IT,__,__,__,__,DE,__,__,__,__,__,__,__,__,DE,__,__}, +/*frac FR*/ {OK,OK,__,-8,__,-7,__,-3,__,__,CB,__,__,__,FR,FR,__,__,__,__,E1,__,__,__,__,__,__,__,__,E1,__,__}, +/*e E1*/ {__,__,__,__,__,__,__,__,__,__,__,E2,E2,__,E3,E3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, +/*ex E2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,E3,E3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, +/*exp E3*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,__,E3,E3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, +/*tr T1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,T2,__,__,__,__,__,__,__}, +/*tru T2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,T3,__,__,__,__}, +/*true T3*/ {__,__,__,__,__,__,__,__,__,__,CB,__,__,__,__,__,__,__,__,__,OK,__,__,__,__,__,__,__,__,__,__,__}, +/*fa F1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F2,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, +/*fal F2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F3,__,__,__,__,__,__,__,__,__}, +/*fals F3*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F4,__,__,__,__,__,__}, +/*false F4*/ {__,__,__,__,__,__,__,__,__,__,CB,__,__,__,__,__,__,__,__,__,OK,__,__,__,__,__,__,__,__,__,__,__}, +/*nu N1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,N2,__,__,__,__}, +/*nul N2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,N3,__,__,__,__,__,__,__,__,__}, +/*null N3*/ {__,__,__,__,__,__,__,__,__,__,CB,__,__,__,__,__,__,__,__,__,__,__,OK,__,__,__,__,__,__,__,__,__}, +/*/ C1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,C2}, +/*/* C2*/ {C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C3}, +/** C3*/ {C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,CE,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C3}, +/*_. FX*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,__,FR,FR,__,__,__,__,E1,__,__,__,__,__,__,__,__,E1,__,__}, +/*\ D1*/ {__,__,__,__,__,__,__,__,__,D2,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, +/*\ D2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,U1,__,__,__,__}, +}; + + +/* + These modes can be pushed on the stack. +*/ +enum modes { + MODE_ARRAY = 1, + MODE_DONE = 2, + MODE_KEY = 3, + MODE_OBJECT = 4 +}; + +static int +push(JSON_parser jc, int mode) +{ +/* + Push a mode onto the stack. Return false if there is overflow. +*/ + jc->top += 1; + if (jc->depth < 0) { + if (jc->top >= jc->stack_capacity) { + size_t bytes_to_allocate; + jc->stack_capacity *= 2; + bytes_to_allocate = jc->stack_capacity * sizeof(jc->static_stack[0]); + if (jc->stack == &jc->static_stack[0]) { + jc->stack = (signed char*)malloc(bytes_to_allocate); + memcpy(jc->stack, jc->static_stack, sizeof(jc->static_stack)); + } else { + jc->stack = (signed char*)realloc(jc->stack, bytes_to_allocate); + } + } + } else { + if (jc->top >= jc->depth) { + return false; + } + } + + jc->stack[jc->top] = mode; + return true; +} + + +static int +pop(JSON_parser jc, int mode) +{ +/* + Pop the stack, assuring that the current mode matches the expectation. + Return false if there is underflow or if the modes mismatch. +*/ + if (jc->top < 0 || jc->stack[jc->top] != mode) { + return false; + } + jc->top -= 1; + return true; +} + + +#define parse_buffer_clear(jc) \ + do {\ + jc->parse_buffer_count = 0;\ + jc->parse_buffer[0] = 0;\ + } while (0) + +#define parse_buffer_pop_back_char(jc)\ + do {\ + assert(jc->parse_buffer_count >= 1);\ + --jc->parse_buffer_count;\ + jc->parse_buffer[jc->parse_buffer_count] = 0;\ + } while (0) + +void delete_JSON_parser(JSON_parser jc) +{ + if (jc) { + if (jc->stack != &jc->static_stack[0]) { + free((void*)jc->stack); + } + if (jc->parse_buffer != &jc->static_parse_buffer[0]) { + free((void*)jc->parse_buffer); + } + free((void*)jc); + } +} + + +JSON_parser +new_JSON_parser(JSON_config* config) +{ +/* + new_JSON_parser starts the checking process by constructing a JSON_parser + object. It takes a depth parameter that restricts the level of maximum + nesting. + + To continue the process, call JSON_parser_char for each character in the + JSON text, and then call JSON_parser_done to obtain the final result. + These functions are fully reentrant. +*/ + + int depth = 0; + JSON_config default_config; + + JSON_parser jc = (JSON_parser)malloc(sizeof(struct JSON_parser_struct)); + + memset(jc, 0, sizeof(*jc)); + + + /* initialize configuration */ + init_JSON_config(&default_config); + + /* set to default configuration if none was provided */ + if (config == NULL) { + config = &default_config; + } + + depth = config->depth; + + /* We need to be able to push at least one object */ + if (depth == 0) { + depth = 1; + } + + jc->state = GO; + jc->top = -1; + + /* Do we want non-bound stack? */ + if (depth > 0) { + jc->stack_capacity = depth; + jc->depth = depth; + if (depth <= (int)COUNTOF(jc->static_stack)) { + jc->stack = &jc->static_stack[0]; + } else { + jc->stack = (signed char*)malloc(jc->stack_capacity * sizeof(jc->static_stack[0])); + } + } else { + jc->stack_capacity = COUNTOF(jc->static_stack); + jc->depth = -1; + jc->stack = &jc->static_stack[0]; + } + + /* set parser to start */ + push(jc, MODE_DONE); + + /* set up the parse buffer */ + jc->parse_buffer = &jc->static_parse_buffer[0]; + jc->parse_buffer_capacity = COUNTOF(jc->static_parse_buffer); + parse_buffer_clear(jc); + + /* set up callback, comment & float handling */ + jc->callback = config->callback; + jc->ctx = config->callback_ctx; + jc->allow_comments = config->allow_comments != 0; + jc->handle_floats_manually = config->handle_floats_manually != 0; + + /* set up decimal point */ + jc->decimal_point = *localeconv()->decimal_point; + + return jc; +} + +static void grow_parse_buffer(JSON_parser jc) +{ + size_t bytes_to_allocate; + jc->parse_buffer_capacity *= 2; + bytes_to_allocate = jc->parse_buffer_capacity * sizeof(jc->parse_buffer[0]); + if (jc->parse_buffer == &jc->static_parse_buffer[0]) { + jc->parse_buffer = (char*)malloc(bytes_to_allocate); + memcpy(jc->parse_buffer, jc->static_parse_buffer, jc->parse_buffer_count); + } else { + jc->parse_buffer = (char*)realloc(jc->parse_buffer, bytes_to_allocate); + } +} + +#define parse_buffer_push_back_char(jc, c)\ + do {\ + if (jc->parse_buffer_count + 1 >= jc->parse_buffer_capacity) grow_parse_buffer(jc);\ + jc->parse_buffer[jc->parse_buffer_count++] = c;\ + jc->parse_buffer[jc->parse_buffer_count] = 0;\ + } while (0) + +#define assert_is_non_container_type(jc) \ + assert( \ + jc->type == JSON_T_NULL || \ + jc->type == JSON_T_FALSE || \ + jc->type == JSON_T_TRUE || \ + jc->type == JSON_T_FLOAT || \ + jc->type == JSON_T_INTEGER || \ + jc->type == JSON_T_STRING) + + +static int parse_parse_buffer(JSON_parser jc) +{ + if (jc->callback) { + JSON_value value, *arg = NULL; + + if (jc->type != JSON_T_NONE) { + assert_is_non_container_type(jc); + + switch(jc->type) { + case JSON_T_FLOAT: + arg = &value; + if (jc->handle_floats_manually) { + value.vu.str.value = jc->parse_buffer; + value.vu.str.length = jc->parse_buffer_count; + } else { + /*sscanf(jc->parse_buffer, "%Lf", &value.vu.float_value);*/ + + /* not checking with end pointer b/c there may be trailing ws */ + value.vu.float_value = strtod(jc->parse_buffer, NULL); + } + break; + case JSON_T_INTEGER: + arg = &value; + sscanf(jc->parse_buffer, JSON_PARSER_INTEGER_SSCANF_TOKEN, &value.vu.integer_value); + break; + case JSON_T_STRING: + arg = &value; + value.vu.str.value = jc->parse_buffer; + value.vu.str.length = jc->parse_buffer_count; + break; + } + + if (!(*jc->callback)(jc->ctx, jc->type, arg)) { + return false; + } + } + } + + parse_buffer_clear(jc); + + return true; +} + +#define IS_HIGH_SURROGATE(uc) (((uc) & 0xFC00) == 0xD800) +#define IS_LOW_SURROGATE(uc) (((uc) & 0xFC00) == 0xDC00) +#define DECODE_SURROGATE_PAIR(hi,lo) ((((hi) & 0x3FF) << 10) + ((lo) & 0x3FF) + 0x10000) +static unsigned char utf8_lead_bits[4] = { 0x00, 0xC0, 0xE0, 0xF0 }; + +static int decode_unicode_char(JSON_parser jc) +{ + int i; + unsigned uc = 0; + char* p; + int trail_bytes; + + assert(jc->parse_buffer_count >= 6); + + p = &jc->parse_buffer[jc->parse_buffer_count - 4]; + + for (i = 12; i >= 0; i -= 4, ++p) { + unsigned x = *p; + + if (x >= 'a') { + x -= ('a' - 10); + } else if (x >= 'A') { + x -= ('A' - 10); + } else { + x &= ~0x30u; + } + + assert(x < 16); + + uc |= x << i; + } + + /* clear UTF-16 char from buffer */ + jc->parse_buffer_count -= 6; + jc->parse_buffer[jc->parse_buffer_count] = 0; + + /* attempt decoding ... */ + if (jc->utf16_high_surrogate) { + if (IS_LOW_SURROGATE(uc)) { + uc = DECODE_SURROGATE_PAIR(jc->utf16_high_surrogate, uc); + trail_bytes = 3; + jc->utf16_high_surrogate = 0; + } else { + /* high surrogate without a following low surrogate */ + return false; + } + } else { + if (uc < 0x80) { + trail_bytes = 0; + } else if (uc < 0x800) { + trail_bytes = 1; + } else if (IS_HIGH_SURROGATE(uc)) { + /* save the high surrogate and wait for the low surrogate */ + jc->utf16_high_surrogate = uc; + return true; + } else if (IS_LOW_SURROGATE(uc)) { + /* low surrogate without a preceding high surrogate */ + return false; + } else { + trail_bytes = 2; + } + } + + jc->parse_buffer[jc->parse_buffer_count++] = (char) ((uc >> (trail_bytes * 6)) | utf8_lead_bits[trail_bytes]); + + for (i = trail_bytes * 6 - 6; i >= 0; i -= 6) { + jc->parse_buffer[jc->parse_buffer_count++] = (char) (((uc >> i) & 0x3F) | 0x80); + } + + jc->parse_buffer[jc->parse_buffer_count] = 0; + + return true; +} + +static int add_escaped_char_to_parse_buffer(JSON_parser jc, int next_char) +{ + jc->escaped = 0; + /* remove the backslash */ + parse_buffer_pop_back_char(jc); + switch(next_char) { + case 'b': + parse_buffer_push_back_char(jc, '\b'); + break; + case 'f': + parse_buffer_push_back_char(jc, '\f'); + break; + case 'n': + parse_buffer_push_back_char(jc, '\n'); + break; + case 'r': + parse_buffer_push_back_char(jc, '\r'); + break; + case 't': + parse_buffer_push_back_char(jc, '\t'); + break; + case '"': + parse_buffer_push_back_char(jc, '"'); + break; + case '\\': + parse_buffer_push_back_char(jc, '\\'); + break; + case '/': + parse_buffer_push_back_char(jc, '/'); + break; + case 'u': + parse_buffer_push_back_char(jc, '\\'); + parse_buffer_push_back_char(jc, 'u'); + break; + default: + return false; + } + + return true; +} + +#define add_char_to_parse_buffer(jc, next_char, next_class) \ + do { \ + if (jc->escaped) { \ + if (!add_escaped_char_to_parse_buffer(jc, next_char)) \ + return false; \ + } else if (!jc->comment) { \ + if ((jc->type != JSON_T_NONE) | !((next_class == C_SPACE) | (next_class == C_WHITE)) /* non-white-space */) { \ + parse_buffer_push_back_char(jc, (char)next_char); \ + } \ + } \ + } while (0) + + +#define assert_type_isnt_string_null_or_bool(jc) \ + assert(jc->type != JSON_T_FALSE); \ + assert(jc->type != JSON_T_TRUE); \ + assert(jc->type != JSON_T_NULL); \ + assert(jc->type != JSON_T_STRING) + + +int +JSON_parser_char(JSON_parser jc, int next_char) +{ +/* + After calling new_JSON_parser, call this function for each character (or + partial character) in your JSON text. It can accept UTF-8, UTF-16, or + UTF-32. It returns true if things are looking ok so far. If it rejects the + text, it returns false. +*/ + int next_class, next_state; + +/* + Determine the character's class. +*/ + if (next_char < 0) { + return false; + } + if (next_char >= 128) { + next_class = C_ETC; + } else { + next_class = ascii_class[next_char]; + if (next_class <= __) { + return false; + } + } + + add_char_to_parse_buffer(jc, next_char, next_class); + +/* + Get the next state from the state transition table. +*/ + next_state = state_transition_table[jc->state][next_class]; + if (next_state >= 0) { +/* + Change the state. +*/ + jc->state = next_state; + } else { +/* + Or perform one of the actions. +*/ + switch (next_state) { +/* Unicode character */ + case UC: + if(!decode_unicode_char(jc)) { + return false; + } + /* check if we need to read a second UTF-16 char */ + if (jc->utf16_high_surrogate) { + jc->state = D1; + } else { + jc->state = ST; + } + break; +/* escaped char */ + case EX: + jc->escaped = 1; + jc->state = ES; + break; +/* integer detected by minus */ + case MX: + jc->type = JSON_T_INTEGER; + jc->state = MI; + break; +/* integer detected by zero */ + case ZX: + jc->type = JSON_T_INTEGER; + jc->state = ZE; + break; +/* integer detected by 1-9 */ + case IX: + jc->type = JSON_T_INTEGER; + jc->state = IT; + break; + +/* floating point number detected by exponent*/ + case DE: + assert_type_isnt_string_null_or_bool(jc); + jc->type = JSON_T_FLOAT; + jc->state = E1; + break; + +/* floating point number detected by fraction */ + case DF: + assert_type_isnt_string_null_or_bool(jc); + if (!jc->handle_floats_manually) { +/* + Some versions of strtod (which underlies sscanf) don't support converting + C-locale formated floating point values. +*/ + assert(jc->parse_buffer[jc->parse_buffer_count-1] == '.'); + jc->parse_buffer[jc->parse_buffer_count-1] = jc->decimal_point; + } + jc->type = JSON_T_FLOAT; + jc->state = FX; + break; +/* string begin " */ + case SB: + parse_buffer_clear(jc); + assert(jc->type == JSON_T_NONE); + jc->type = JSON_T_STRING; + jc->state = ST; + break; + +/* n */ + case NU: + assert(jc->type == JSON_T_NONE); + jc->type = JSON_T_NULL; + jc->state = N1; + break; +/* f */ + case FA: + assert(jc->type == JSON_T_NONE); + jc->type = JSON_T_FALSE; + jc->state = F1; + break; +/* t */ + case TR: + assert(jc->type == JSON_T_NONE); + jc->type = JSON_T_TRUE; + jc->state = T1; + break; + +/* closing comment */ + case CE: + jc->comment = 0; + assert(jc->parse_buffer_count == 0); + assert(jc->type == JSON_T_NONE); + jc->state = jc->before_comment_state; + break; + +/* opening comment */ + case CB: + if (!jc->allow_comments) { + return false; + } + parse_buffer_pop_back_char(jc); + if (!parse_parse_buffer(jc)) { + return false; + } + assert(jc->parse_buffer_count == 0); + assert(jc->type != JSON_T_STRING); + switch (jc->stack[jc->top]) { + case MODE_ARRAY: + case MODE_OBJECT: + switch(jc->state) { + case VA: + case AR: + jc->before_comment_state = jc->state; + break; + default: + jc->before_comment_state = OK; + break; + } + break; + default: + jc->before_comment_state = jc->state; + break; + } + jc->type = JSON_T_NONE; + jc->state = C1; + jc->comment = 1; + break; +/* empty } */ + case -9: + parse_buffer_clear(jc); + if (jc->callback && !(*jc->callback)(jc->ctx, JSON_T_OBJECT_END, NULL)) { + return false; + } + if (!pop(jc, MODE_KEY)) { + return false; + } + jc->state = OK; + break; + +/* } */ case -8: + parse_buffer_pop_back_char(jc); + if (!parse_parse_buffer(jc)) { + return false; + } + if (jc->callback && !(*jc->callback)(jc->ctx, JSON_T_OBJECT_END, NULL)) { + return false; + } + if (!pop(jc, MODE_OBJECT)) { + return false; + } + jc->type = JSON_T_NONE; + jc->state = OK; + break; + +/* ] */ case -7: + parse_buffer_pop_back_char(jc); + if (!parse_parse_buffer(jc)) { + return false; + } + if (jc->callback && !(*jc->callback)(jc->ctx, JSON_T_ARRAY_END, NULL)) { + return false; + } + if (!pop(jc, MODE_ARRAY)) { + return false; + } + + jc->type = JSON_T_NONE; + jc->state = OK; + break; + +/* { */ case -6: + parse_buffer_pop_back_char(jc); + if (jc->callback && !(*jc->callback)(jc->ctx, JSON_T_OBJECT_BEGIN, NULL)) { + return false; + } + if (!push(jc, MODE_KEY)) { + return false; + } + assert(jc->type == JSON_T_NONE); + jc->state = OB; + break; + +/* [ */ case -5: + parse_buffer_pop_back_char(jc); + if (jc->callback && !(*jc->callback)(jc->ctx, JSON_T_ARRAY_BEGIN, NULL)) { + return false; + } + if (!push(jc, MODE_ARRAY)) { + return false; + } + assert(jc->type == JSON_T_NONE); + jc->state = AR; + break; + +/* string end " */ case -4: + parse_buffer_pop_back_char(jc); + switch (jc->stack[jc->top]) { + case MODE_KEY: + assert(jc->type == JSON_T_STRING); + jc->type = JSON_T_NONE; + jc->state = CO; + + if (jc->callback) { + JSON_value value; + value.vu.str.value = jc->parse_buffer; + value.vu.str.length = jc->parse_buffer_count; + if (!(*jc->callback)(jc->ctx, JSON_T_KEY, &value)) { + return false; + } + } + parse_buffer_clear(jc); + break; + case MODE_ARRAY: + case MODE_OBJECT: + assert(jc->type == JSON_T_STRING); + if (!parse_parse_buffer(jc)) { + return false; + } + jc->type = JSON_T_NONE; + jc->state = OK; + break; + default: + return false; + } + break; + +/* , */ case -3: + parse_buffer_pop_back_char(jc); + if (!parse_parse_buffer(jc)) { + return false; + } + switch (jc->stack[jc->top]) { + case MODE_OBJECT: +/* + A comma causes a flip from object mode to key mode. +*/ + if (!pop(jc, MODE_OBJECT) || !push(jc, MODE_KEY)) { + return false; + } + assert(jc->type != JSON_T_STRING); + jc->type = JSON_T_NONE; + jc->state = KE; + break; + case MODE_ARRAY: + assert(jc->type != JSON_T_STRING); + jc->type = JSON_T_NONE; + jc->state = VA; + break; + default: + return false; + } + break; + +/* : */ case -2: +/* + A colon causes a flip from key mode to object mode. +*/ + parse_buffer_pop_back_char(jc); + if (!pop(jc, MODE_KEY) || !push(jc, MODE_OBJECT)) { + return false; + } + assert(jc->type == JSON_T_NONE); + jc->state = VA; + break; +/* + Bad action. +*/ + default: + return false; + } + } + return true; +} + + +int +JSON_parser_done(JSON_parser jc) +{ + const int result = jc->state == OK && pop(jc, MODE_DONE); + + return result; +} + + +int JSON_parser_is_legal_white_space_string(const char* s) +{ + int c, char_class; + + if (s == NULL) { + return false; + } + + for (; *s; ++s) { + c = *s; + + if (c < 0 || c >= 128) { + return false; + } + + char_class = ascii_class[c]; + + if (char_class != C_SPACE && char_class != C_WHITE) { + return false; + } + } + + return true; +} + + + +void init_JSON_config(JSON_config* config) +{ + if (config) { + memset(config, 0, sizeof(*config)); + + config->depth = JSON_PARSER_STACK_SIZE - 1; + } +} diff --git a/lib/cdec_json_parser/JSON_parser.h b/lib/cdec_json_parser/JSON_parser.h new file mode 100644 index 0000000..de98007 --- /dev/null +++ b/lib/cdec_json_parser/JSON_parser.h @@ -0,0 +1,152 @@ +#ifndef JSON_PARSER_H +#define JSON_PARSER_H + +/* JSON_parser.h */ + + +#include + +/* Windows DLL stuff */ +#ifdef _WIN32 +# ifdef JSON_PARSER_DLL_EXPORTS +# define JSON_PARSER_DLL_API __declspec(dllexport) +# else +# define JSON_PARSER_DLL_API __declspec(dllimport) +# endif +#else +# define JSON_PARSER_DLL_API +#endif + +/* Determine the integer type use to parse non-floating point numbers */ +#if __STDC_VERSION__ >= 199901L || HAVE_LONG_LONG == 1 +typedef long long JSON_int_t; +#define JSON_PARSER_INTEGER_SSCANF_TOKEN "%lld" +#define JSON_PARSER_INTEGER_SPRINTF_TOKEN "%lld" +#else +typedef long JSON_int_t; +#define JSON_PARSER_INTEGER_SSCANF_TOKEN "%ld" +#define JSON_PARSER_INTEGER_SPRINTF_TOKEN "%ld" +#endif + + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum +{ + JSON_T_NONE = 0, + JSON_T_ARRAY_BEGIN, // 1 + JSON_T_ARRAY_END, // 2 + JSON_T_OBJECT_BEGIN, // 3 + JSON_T_OBJECT_END, // 4 + JSON_T_INTEGER, // 5 + JSON_T_FLOAT, // 6 + JSON_T_NULL, // 7 + JSON_T_TRUE, // 8 + JSON_T_FALSE, // 9 + JSON_T_STRING, // 10 + JSON_T_KEY, // 11 + JSON_T_MAX // 12 +} JSON_type; + +typedef struct JSON_value_struct { + union { + JSON_int_t integer_value; + + double float_value; + + struct { + const char* value; + size_t length; + } str; + } vu; +} JSON_value; + +typedef struct JSON_parser_struct* JSON_parser; + +/*! \brief JSON parser callback + + \param ctx The pointer passed to new_JSON_parser. + \param type An element of JSON_type but not JSON_T_NONE. + \param value A representation of the parsed value. This parameter is NULL for + JSON_T_ARRAY_BEGIN, JSON_T_ARRAY_END, JSON_T_OBJECT_BEGIN, JSON_T_OBJECT_END, + JSON_T_NULL, JSON_T_TRUE, and SON_T_FALSE. String values are always returned + as zero-terminated C strings. + + \return Non-zero if parsing should continue, else zero. +*/ +typedef int (*JSON_parser_callback)(void* ctx, int type, const struct JSON_value_struct* value); + + +/*! \brief The structure used to configure a JSON parser object + + \param depth If negative, the parser can parse arbitrary levels of JSON, otherwise + the depth is the limit + \param Pointer to a callback. This parameter may be NULL. In this case the input is merely checked for validity. + \param Callback context. This parameter may be NULL. + \param depth. Specifies the levels of nested JSON to allow. Negative numbers yield unlimited nesting. + \param allowComments. To allow C style comments in JSON, set to non-zero. + \param handleFloatsManually. To decode floating point numbers manually set this parameter to non-zero. + + \return The parser object. +*/ +typedef struct { + JSON_parser_callback callback; + void* callback_ctx; + int depth; + int allow_comments; + int handle_floats_manually; +} JSON_config; + + +/*! \brief Initializes the JSON parser configuration structure to default values. + + The default configuration is + - 127 levels of nested JSON (depends on JSON_PARSER_STACK_SIZE, see json_parser.c) + - no parsing, just checking for JSON syntax + - no comments + + \param config. Used to configure the parser. +*/ +JSON_PARSER_DLL_API void init_JSON_config(JSON_config* config); + +/*! \brief Create a JSON parser object + + \param config. Used to configure the parser. Set to NULL to use the default configuration. + See init_JSON_config + + \return The parser object. +*/ +JSON_PARSER_DLL_API extern JSON_parser new_JSON_parser(JSON_config* config); + +/*! \brief Destroy a previously created JSON parser object. */ +JSON_PARSER_DLL_API extern void delete_JSON_parser(JSON_parser jc); + +/*! \brief Parse a character. + + \return Non-zero, if all characters passed to this function are part of are valid JSON. +*/ +JSON_PARSER_DLL_API extern int JSON_parser_char(JSON_parser jc, int next_char); + +/*! \brief Finalize parsing. + + Call this method once after all input characters have been consumed. + + \return Non-zero, if all parsed characters are valid JSON, zero otherwise. +*/ +JSON_PARSER_DLL_API extern int JSON_parser_done(JSON_parser jc); + +/*! \brief Determine if a given string is valid JSON white space + + \return Non-zero if the string is valid, zero otherwise. +*/ +JSON_PARSER_DLL_API extern int JSON_parser_is_legal_white_space_string(const char* s); + + +#ifdef __cplusplus +} +#endif + + +#endif /* JSON_PARSER_H */ diff --git a/lib/cdec_json_parser/LICENSE b/lib/cdec_json_parser/LICENSE new file mode 100644 index 0000000..a390938 --- /dev/null +++ b/lib/cdec_json_parser/LICENSE @@ -0,0 +1,213 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +---------------------------------------------- + +L-BFGS CODE FROM COMPUTATIONAL CRYSTALLOGRAPHY TOOLBOX (CCTBX) + +This package includes source code (training/lbfgs.h) based on source +code distributed as part of the Compational Crystallography Toolbox +(CCTBX), which has separate copyright notices and license terms. Use of +this source code is subject to the terms and conditions of the license +contained in the file LICENSE.cctbx . + diff --git a/lib/cdec_json_parser/Makefile b/lib/cdec_json_parser/Makefile new file mode 100644 index 0000000..d7bb81f --- /dev/null +++ b/lib/cdec_json_parser/Makefile @@ -0,0 +1,7 @@ +all: + gcc JSON_parser.c -c + g++ json_parse.cc -c + +clean: + rm -f *.o + diff --git a/lib/cdec_json_parser/json_parse.cc b/lib/cdec_json_parser/json_parse.cc new file mode 100644 index 0000000..4580fc8 --- /dev/null +++ b/lib/cdec_json_parser/json_parse.cc @@ -0,0 +1,31 @@ +#include "json_parse.h" + +#include +#include + +using namespace std; + + +bool JSONParser::HandleJSONEvent(int type, const JSON_value* value) { + switch(type) { + case JSON_T_OBJECT_BEGIN: + case JSON_T_OBJECT_END: + case JSON_T_ARRAY_BEGIN: + case JSON_T_ARRAY_END: + case JSON_T_NULL: + case JSON_T_TRUE: + case JSON_T_FALSE: + case JSON_T_KEY: + case JSON_T_INTEGER: + case JSON_T_FLOAT: + break; + case JSON_T_STRING: + string s = value->vu.str.value; + string t = s.substr(1, 4); + if (t == "Goal") + cerr << t << endl; + break; + } + return true; +} + diff --git a/lib/cdec_json_parser/json_parse.h b/lib/cdec_json_parser/json_parse.h new file mode 100644 index 0000000..80c037b --- /dev/null +++ b/lib/cdec_json_parser/json_parse.h @@ -0,0 +1,62 @@ +#ifndef _JSON_WRAPPER_H_ +#define _JSON_WRAPPER_H_ + +#include +#include +#include "JSON_parser.h" + +class JSONParser { + public: + JSONParser() { + state = -1; + init_JSON_config(&config); + hack.mf = &JSONParser::Callback; + config.depth = 10; + config.callback_ctx = reinterpret_cast(this); + config.callback = hack.cb; + config.allow_comments = 1; + config.handle_floats_manually = 1; + jc = new_JSON_parser(&config); + } + virtual ~JSONParser() { + delete_JSON_parser(jc); + } + bool Parse(std::istream* in) { + int count = 0; + int lc = 1; + for (; in ; ++count) { + int next_char = in->get(); + if (!in->good()) break; + if (lc == '\n') { ++lc; } + if (!JSON_parser_char(jc, next_char)) { + std::cerr << "JSON_parser_char: syntax error, line " << lc << " (byte " << count << ")" << std::endl; + return false; + } + } + if (!JSON_parser_done(jc)) { + std::cerr << "JSON_parser_done: syntax error\n"; + return false; + } + return true; + } + static void WriteEscapedString(const std::string& in, std::ostream* out); + protected: + bool HandleJSONEvent(int type, const JSON_value* value); + private: + int state; + std::string cur_key; + std::string cat; + int Callback(int type, const JSON_value* value) { + if (HandleJSONEvent(type, value)) return 1; + return 0; + } + JSON_parser_struct* jc; + JSON_config config; + typedef int (JSONParser::* MF)(int type, const struct JSON_value_struct* value); + union CBHack { + JSON_parser_callback cb; + MF mf; + } hack; +}; + +#endif diff --git a/lib/gason b/lib/gason new file mode 160000 index 0000000..ede29fc --- /dev/null +++ b/lib/gason @@ -0,0 +1 @@ +Subproject commit ede29fc5f0de8e47fd82c09f2f98123d2c867f28 diff --git a/lib/json-cpp b/lib/json-cpp new file mode 160000 index 0000000..4eb4b47 --- /dev/null +++ b/lib/json-cpp @@ -0,0 +1 @@ +Subproject commit 4eb4b47cf4d622bc7bf34071d6b68fc5beb37051 diff --git a/lib/jsoncpp b/lib/jsoncpp new file mode 160000 index 0000000..3515db1 --- /dev/null +++ b/lib/jsoncpp @@ -0,0 +1 @@ +Subproject commit 3515db184a836ce73b99c064bcc83884570657c2 diff --git a/lib/jsonxx b/lib/jsonxx new file mode 160000 index 0000000..b76bd0e --- /dev/null +++ b/lib/jsonxx @@ -0,0 +1 @@ +Subproject commit b76bd0e69d94cbec23a0db4e9b73930f55a4a803 diff --git a/lib/msgpack-c b/lib/msgpack-c new file mode 160000 index 0000000..dd083ca --- /dev/null +++ b/lib/msgpack-c @@ -0,0 +1 @@ +Subproject commit dd083ca933fde2a23955372eb87fe7f890c6dc1e diff --git a/lib/picojson b/lib/picojson new file mode 160000 index 0000000..2120a6c --- /dev/null +++ b/lib/picojson @@ -0,0 +1 @@ +Subproject commit 2120a6c219d9b08922ba0ffe56fbb5b8862d62bb diff --git a/lib/rapidjson b/lib/rapidjson new file mode 160000 index 0000000..4600680 --- /dev/null +++ b/lib/rapidjson @@ -0,0 +1 @@ +Subproject commit 4600680a56ac007b0c790cfe27db55e26f3d5564 diff --git a/lib/sajson b/lib/sajson new file mode 160000 index 0000000..d9be1c4 --- /dev/null +++ b/lib/sajson @@ -0,0 +1 @@ +Subproject commit d9be1c482b9a8fa4ff223ee6a0dcf06fba2e669c diff --git a/memusg.sh b/memusg.sh deleted file mode 100755 index e3b6f90..0000000 --- a/memusg.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash - - -"$@" & -pid=$! peak=0 -while true; do - sleep 1 - sample="$(ps -o rss= $pid 2> /dev/null)" || break - let peak='sample > peak ? sample : peak' -done -#echo "Peak: $peak" 1>&2 -echo "$(( ${peak%% *} / 1024)) m" - diff --git a/run.sh b/run.sh deleted file mode 100755 index 83144b3..0000000 --- a/run.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/zsh - - -export PATH=$PATH:/home/pks/src/scripts/ -export GEM_PATH=$GEM_PATH:/home/pks/lib/ruby -REPEAT=10 - -rm -f .overall - -echo -echo "JSON parsing benchmark" -echo "----------------------" -echo " REAPEAT=$REPEAT" -echo - -# fails: test_MicroJSON.sh \ -for prg in \ - test_cdec_json_parser \ - test_gason \ - test_JsonBox \ - test_jsoncpp \ - test_json-cpp \ - test_jsonxx \ - test_libjson \ - test_nosjob \ - test_picojson \ - test_rapidjson \ - test_sajson -do - echo "[$prg]" - sync; echo 3 > /proc/sys/vm/drop_caches - echo > .overall - for file in `ls -S data/*.json`; do - echo "$file:\t$(./benchmark.rb $REPEAT ./$prg $file 2>/dev/null | tee -a .overall | avg | round 2) s" - done - echo "---" - echo "overall:\t$(avg < .overall | round 2)" - echo " memory:\t$(./memusg.sh ./$prg data/1020.json 2>/dev/null)" - echo -done - -rm .overall - diff --git a/run_msgpack.sh b/run_msgpack.sh deleted file mode 100755 index b2f6eb5..0000000 --- a/run_msgpack.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/zsh - - -export PATH=$PATH:/home/pks/src/scripts/ -export GEM_PATH=$GEM_PATH:/home/pks/lib/ruby -REPEAT=10 - -rm -f .overall_msgpack - -echo -echo "MSGPACK parsing benchmark" -echo "-------------------------" -echo " REAPEAT=$REPEAT" -echo - -for prg in \ - test_msgpack \ - test_msgpack_streaming \ - test_msgpack_ruby -do - echo "[$prg]" - sync; echo 3 > /proc/sys/vm/drop_caches - echo > .overall_msgpack - if [[ $prg == test_msgpack_streaming ]]; then - A="2" - else - A="" - fi - for file in `ls -S data/*.pak$A`; do - echo "$file:\t$(./benchmark.rb $REPEAT ./$prg $file 2>/dev/null | tee -a .overall_msgpack | avg | round 2) s" - done - echo "---" - echo "overall:\t$(avg < .overall_msgpack | round 2)" - echo " memory:\t$(./memusg.sh ./$prg data/1020.pak 2>/dev/null)" - echo -done - -rm .overall_msgpack - diff --git a/src/Makefile b/src/Makefile new file mode 100644 index 0000000..c81c7dd --- /dev/null +++ b/src/Makefile @@ -0,0 +1,61 @@ +COMPILER := g++ +CXXFLAGS := -O3 -march=native -mtune=native -I../lib + + +all: test_gason test_json-cpp test_jsoncpp test_libjson test_picojson test_rapidjson test_sajson test_JsonBox test_jsonxx test_MicroJSON test_nosjob test_cdec_json_parser test_msgpack test_msgpack_streaming + +test_gason: test_gason.cc + $(COMPILER) $(CXXFLAGS) -std=c++11 test_gason.cc -o test_gason ../lib/gason/gason.o + +test_json-cpp: test_json-cpp.cc + $(COMPILER) $(CXXFLAGS) -std=c++11 test_json-cpp.cc -o test_json-cpp + +test_jsoncpp: test_jsoncpp.cc + $(COMPILER) $(CXXFLAGS) test_jsoncpp.cc ../lib/jsoncpp/lib/libjsoncpp.a -o test_jsoncpp + +test_libjson: test_libjson.cc + $(COMPILER) $(CXXFLAGS) test_libjson.cc ../lib/libjson-7.6.1/libjson.a -o test_libjson + +test_picojson: test_picojson.cc + $(COMPILER) $(CXXFLAGS) test_picojson.cc -o test_picojson + +test_rapidjson: test_rapidjson.cc + $(COMPILER) $(CXXFLAGS) test_rapidjson.cc -o test_rapidjson + +test_sajson: test_sajson.cc + $(COMPILER) $(CXXFLAGS) test_sajson.cc -o test_sajson + +test_JsonBox: test_JsonBox.cc + $(COMPILER) $(CXXFLAGS) test_JsonBox.cc -I../lib/JsonBox/include/ ../lib/JsonBox/libJsonBox.a -o test_JsonBox + +test_jsonxx: test_jsonxx.cc + $(COMPILER) $(CXXFLAGS) test_jsonxx.cc ../lib/jsonxx/jsonxx.o -o test_jsonxx + +test_MicroJSON: test_MicroJSON.cc + #$(COMPILER) $(CXXFLAGS) test_MicroJSON.cc ../lib/MicroJSON-0.3.2/libMicroJSON-0.so -lUTF8Strings-1 -o test_MicroJSON + +test_nosjob: test_nosjob.cc + $(COMPILER) $(CXXFLAGS) test_nosjob.cc ../lib/nosjob-e1d67401fcda6e05/libnosjob.a -o test_nosjob + +test_cdec_json_parser: test_cdec_json_parser.cc + $(COMPILER) $(CXXFLAGS) test_cdec_json_parser.cc ../lib/cdec_json_parser/json_parse.o ../lib/cdec_json_parser/JSON_parser.o -o test_cdec_json_parser + +test_msgpack: test_msgpack.cc + $(COMPILER) $(CXXFLAGS) test_msgpack.cc -I../lib/msgpack-c/include/ ../lib/msgpack-c/lib/libmsgpack.a -o test_msgpack + +test_msgpack_streaming: test_msgpack_streaming.cc + $(COMPILER) $(CXXFLAGS) test_msgpack_streaming.cc -I../lib/msgpack-c/include/ ../lib/msgpack-c/lib/libmsgpack.a -o test_msgpack_streaming + +make_pak: make_pak.cc + $(COMPILER) $(CXXFLAGS) -std=c++11 make_pak.cc ../lib/msgpack-c/lib/libmsgpack.a -o make_pak + +make_pak_s: make_pak_s.cc + $(COMPILER) $(CXXFLAGS) -std=c++11 make_pak_s.cc ../lib/msgpack-c/lib/libmsgpack.a -o make_pak_s + +clean: + rm -f test_gason test_json-cpp test_jsoncpp test_libjson + rm -f test_picojson test_rapidjson test_sajson test_JsonBox + rm -f test_jsonxx test_MicroJSON test_nosjob test_cdec_json_parser + rm -f test_msgpack test_msgpack_streaming + rm -f make_pak make_pak_s + diff --git a/src/make_pak.cc b/src/make_pak.cc new file mode 100644 index 0000000..bacdfe3 --- /dev/null +++ b/src/make_pak.cc @@ -0,0 +1,125 @@ +#include +#include +#include +#include +#include +#include + + +/* + * https://github.com/ascheglov/json-cpp + * + */ +#include "json-cpp/single_include/json-cpp.hpp" + +using namespace std; + + +struct Node { + int id; + string cat; + vector span; + + MSGPACK_DEFINE(id, cat, span); +}; + +struct Vector { + double CountEF; + double EgivenFCoherent; + double Glue; + double IsSingletonF; + double IsSingletonFE; + double LanguageModel; + double LanguageModel_OOV; + double MaxLexFgivenE; + double MaxLexEgivenF; + double PassThrough; + double PassThrough_1; + double PassThrough_2; + double PassThrough_3; + double PassThrough_4; + double PassThrough_5; + double PassThrough_6; + double SampleCountF; + double WordPenalty; + + MSGPACK_DEFINE(CountEF, EgivenFCoherent, Glue, IsSingletonF, IsSingletonFE, LanguageModel, LanguageModel_OOV, MaxLexEgivenF, MaxLexFgivenE, PassThrough, PassThrough_1, PassThrough_2, PassThrough_3, PassThrough_4, PassThrough_5, PassThrough_6, SampleCountF, WordPenalty); +}; + +struct Edge { + int head; + string rule; + vector tails; + Vector f; + double weight; + + MSGPACK_DEFINE(head, rule, tails, f, weight); +}; + +struct Hg { + Vector weights; + vector nodes; + vector edges; + + MSGPACK_DEFINE(weights, nodes, edges); +}; + +template inline void +serialize(jsoncpp::Stream& stream, Hg& o) +{ + fields(o, stream, "weights", o.weights, "nodes", o.nodes, "edges", o.edges); +} + +template inline void +serialize(jsoncpp::Stream& stream, Edge& o) +{ + fields(o, stream, "head", o.head, "rule", o.rule, "tails", o.tails, "f", o.f, "weight", o.weight); +} + +template inline void +serialize(jsoncpp::Stream& stream, Vector& o) +{ + fields(o, stream, "EgivenFCoherent", o.EgivenFCoherent, "SampleCountF", o.SampleCountF, "CountEF", o.CountEF, "MaxLexFgivenE", o.MaxLexFgivenE, "MaxLexEgivenF", o.MaxLexEgivenF, "IsSingletonF", o.IsSingletonF, "IsSingletonFE", o.IsSingletonFE, "LanguageModel", o.LanguageModel, "LanguageModel_OOV", o.LanguageModel_OOV, "PassThrough", o.PassThrough, "PassThrough_1", o.PassThrough_1, "PassThrough_2", o.PassThrough_2, "PassThrough_3", o.PassThrough_3, "PassThrough_4", o.PassThrough_4, "PassThrough_5", o.PassThrough_5, "PassThrough_6", o.PassThrough_6, "WordPenalty", o.WordPenalty, "Glue", o.Glue); +} + +template inline void +serialize(jsoncpp::Stream& stream, Node& o) +{ + fields(o, stream, "id", o.id, "cat", o.cat, "span", o.span); +} + +int +main(int argc, char** argv) +{ + ifstream ifs(argv[1]); + string json_str((istreambuf_iterator(ifs) ), + (istreambuf_iterator())); + + Hg hg; + Vector w; + hg.weights = w; + vector nodes; + hg.nodes = nodes; + vector edges; + hg.edges = edges; + jsoncpp::parse(hg, json_str); + + FILE* file = fopen(argv[2], "wb"); + msgpack::fbuffer fbuf(file); + msgpack::pack(fbuf, hg); + fclose(file); + + /*ifstream ifs1(argv[2]); + string str1((istreambuf_iterator(jfs1)), + (istreambuf_iterator())); + + msgpack::zone zone; + msgpack::object obj; + msgpack::unpack(str1.data(), str1.size(), NULL, &zone, &obj); + + Hg hg; + obj.convert(&hg);*/ + + return 0; +} + diff --git a/src/make_pak_s.cc b/src/make_pak_s.cc new file mode 100644 index 0000000..a72bc60 --- /dev/null +++ b/src/make_pak_s.cc @@ -0,0 +1,121 @@ +#include +#include +#include +#include +#include +#include + + +/* + * https://github.com/ascheglov/json-cpp + * + */ +#include "json-cpp/single_include/json-cpp.hpp" + +using namespace std; + + +struct Node { + int id; + string cat; + vector span; + + MSGPACK_DEFINE(id, cat, span); +}; + +struct Vector { + double CountEF; + double EgivenFCoherent; + double Glue; + double IsSingletonF; + double IsSingletonFE; + double LanguageModel; + double LanguageModel_OOV; + double MaxLexFgivenE; + double MaxLexEgivenF; + double PassThrough; + double PassThrough_1; + double PassThrough_2; + double PassThrough_3; + double PassThrough_4; + double PassThrough_5; + double PassThrough_6; + double SampleCountF; + double WordPenalty; + + MSGPACK_DEFINE(CountEF, EgivenFCoherent, Glue, IsSingletonF, IsSingletonFE, LanguageModel, LanguageModel_OOV, MaxLexEgivenF, MaxLexFgivenE, PassThrough, PassThrough_1, PassThrough_2, PassThrough_3, PassThrough_4, PassThrough_5, PassThrough_6, SampleCountF, WordPenalty); +}; + +struct Edge { + int head; + string rule; + vector tails; + Vector f; + double weight; + + MSGPACK_DEFINE(head, rule, tails, f, weight); +}; + +struct Hg { + Vector weights; + vector nodes; + vector edges; + + MSGPACK_DEFINE(weights, nodes, edges); +}; + +template inline void +serialize(jsoncpp::Stream& stream, Hg& o) +{ + fields(o, stream, "weights", o.weights, "nodes", o.nodes, "edges", o.edges); +} + +template inline void +serialize(jsoncpp::Stream& stream, Edge& o) +{ + fields(o, stream, "head", o.head, "rule", o.rule, "tails", o.tails, "f", o.f, "weight", o.weight); +} + +template inline void +serialize(jsoncpp::Stream& stream, Vector& o) +{ + fields(o, stream, "EgivenFCoherent", o.EgivenFCoherent, "SampleCountF", o.SampleCountF, "CountEF", o.CountEF, "MaxLexFgivenE", o.MaxLexFgivenE, "MaxLexEgivenF", o.MaxLexEgivenF, "IsSingletonF", o.IsSingletonF, "IsSingletonFE", o.IsSingletonFE, "LanguageModel", o.LanguageModel, "LanguageModel_OOV", o.LanguageModel_OOV, "PassThrough", o.PassThrough, "PassThrough_1", o.PassThrough_1, "PassThrough_2", o.PassThrough_2, "PassThrough_3", o.PassThrough_3, "PassThrough_4", o.PassThrough_4, "PassThrough_5", o.PassThrough_5, "PassThrough_6", o.PassThrough_6, "WordPenalty", o.WordPenalty, "Glue", o.Glue); +} + +template inline void +serialize(jsoncpp::Stream& stream, Node& o) +{ + fields(o, stream, "id", o.id, "cat", o.cat, "span", o.span); +} + +int +main(int argc, char** argv) +{ + ifstream ifs(argv[1]); + string json_str((istreambuf_iterator(ifs) ), + (istreambuf_iterator())); + + Hg hg; + Vector w; + hg.weights = w; + vector nodes; + hg.nodes = nodes; + vector edges; + hg.edges = edges; + jsoncpp::parse(hg, json_str); + + FILE* file = fopen(argv[2], "wb"); + msgpack::fbuffer fbuf(file); + msgpack::pack(fbuf, hg.nodes.size()); + msgpack::pack(fbuf, hg.edges.size()); + msgpack::pack(fbuf, hg.weights); + for (auto it = hg.nodes.begin(); it != hg.nodes.end(); it++) + msgpack::pack(fbuf, *it); + for (auto it = hg.edges.begin(); it != hg.edges.end(); it++) + msgpack::pack(fbuf, *it); + + fclose(file); + + return 0; +} + diff --git a/src/test_JsonBox.cc b/src/test_JsonBox.cc new file mode 100644 index 0000000..e1b22c4 --- /dev/null +++ b/src/test_JsonBox.cc @@ -0,0 +1,24 @@ +#include +#include + +/* + * https://github.com/anhero/JsonBox + * + */ +#include "JsonBox/include/JsonBox.h" + +using namespace std; + + +int +main(int argc, char** argv) +{ + JsonBox::Value v; + v.loadFromFile(argv[1]); + JsonBox::Value w = v["edges"].getArray().back(); + string s = w["rule"].getString(); + cerr << s.substr(1,4) << endl; + + return 0; +} + diff --git a/src/test_MicroJSON.cc b/src/test_MicroJSON.cc new file mode 100644 index 0000000..d8d4969 --- /dev/null +++ b/src/test_MicroJSON.cc @@ -0,0 +1,28 @@ +#include +#include +#include + +/* + * http://grigory.info/MicroJSON.About.html + * + */ +#include "MicroJSON-0.3.2/Node.h" + +using namespace std; + + +int +main(int argc, char** argv) +{ + ifstream ifs(argv[1]); + string json_str((istreambuf_iterator(ifs)), + (istreambuf_iterator())); + + MicroJSON::Node Root; + Root.Parse(json_str); + MicroJSON::Node* edges = Root.GetSubNode("edges"); + cerr << edges->GetChildren().back()->GetSubNode("rule") << endl; + + return 0; +} + diff --git a/src/test_MicroJSON.sh b/src/test_MicroJSON.sh new file mode 100755 index 0000000..89d1d3d --- /dev/null +++ b/src/test_MicroJSON.sh @@ -0,0 +1,5 @@ +#!/bin/sh -x + + +LD_LIBRARY_PATH="/home/pks/z/test/json_test/MicroJSON-0.3.2" ./test_MicroJSON $1 + diff --git a/src/test_cdec_json_parser.cc b/src/test_cdec_json_parser.cc new file mode 100644 index 0000000..e805318 --- /dev/null +++ b/src/test_cdec_json_parser.cc @@ -0,0 +1,25 @@ +#include +#include +#include + +/* + * https://github.com/redpony/cdec/tree/master/decoder + * + */ +#include "cdec_json_parser/json_parse.h" + +using namespace std; + + +int +main(int argc, char** argv) +{ + ifstream ifs(argv[1]); + + istream& s = ifs; + JSONParser p; + p.Parse(&s); + + return 0; +} + diff --git a/src/test_gason.cc b/src/test_gason.cc new file mode 100644 index 0000000..d78c385 --- /dev/null +++ b/src/test_gason.cc @@ -0,0 +1,71 @@ +#include +#include +#include +#include + +/* + * https://github.com/vivkin/gason + * + */ +#include "gason/gason.h" + +using namespace std; + + +void +print(const char *s) +{ + string u(s); + u = u.substr(1, 4); + if (u == "Goal") { + cerr << u << endl; + } +} + +void +walk(JsonValue o) +{ + switch (o.getTag()) { + case JSON_TAG_NUMBER: + break; + case JSON_TAG_BOOL: + break; + case JSON_TAG_STRING: + print(o.toString()); + break; + case JSON_TAG_ARRAY: + if (!o.toNode()) + break; + for (auto i : o) + walk(i->value); + break; + case JSON_TAG_OBJECT: + if (!o.toNode()) + break; + for (auto i : o) { + print(i->key); + walk(i->value); + } + break; + case JSON_TAG_NULL: + break; + } +} + +int +main(int argc, char** argv) +{ + ifstream ifs(argv[1]); + string json_str((istreambuf_iterator(ifs)), + (istreambuf_iterator())); + + char* s = strdup(json_str.c_str()); + char *p; + JsonValue v; + JsonAllocator a; + JsonParseStatus status = jsonParse(s, &p, &v, a); + walk(v); + + return 0; +} + diff --git a/src/test_json-cpp.cc b/src/test_json-cpp.cc new file mode 100644 index 0000000..c27db06 --- /dev/null +++ b/src/test_json-cpp.cc @@ -0,0 +1,100 @@ +#include +#include +#include + +/* + * https://github.com/ascheglov/json-cpp + * + */ +#include "json-cpp/single_include/json-cpp.hpp" + +using namespace std; + + +struct Node { + int id; + string cat; + vector span; +}; + +struct Vector { + double CountEF; + double EgivenFCoherent; + double Glue; + double IsSingletonF; + double IsSingletonFE; + double LanguageModel; + double LanguageModel_OOV; + double MaxLexFgivenE; + double MaxLexEgivenF; + double PassThrough; + double PassThrough_1; + double PassThrough_2; + double PassThrough_3; + double PassThrough_4; + double PassThrough_5; + double PassThrough_6; + double SampleCountF; + double WordPenalty; +}; + +struct Edge { + int head; + string rule; + vector tails; + Vector f; + double weight; +}; + +struct Hg { + Vector weights; + vector nodes; + vector edges; + vector rules; +}; + +template inline void +serialize(jsoncpp::Stream& stream, Hg& o) +{ + fields(o, stream, "weights", o.weights, "nodes", o.nodes, "edges", o.edges, "rules", o.rules); +} + +template inline void +serialize(jsoncpp::Stream& stream, Edge& o) +{ + fields(o, stream, "head", o.head, "rule", o.rule, "tails", o.tails, "f", o.f, "weight", o.weight); +} + +template inline void +serialize(jsoncpp::Stream& stream, Vector& o) +{ + fields(o, stream, "EgivenFCoherent", o.EgivenFCoherent, "SampleCountF", o.SampleCountF, "CountEF", o.CountEF, "MaxLexFgivenE", o.MaxLexFgivenE, "MaxLexEgivenF", o.MaxLexEgivenF, "IsSingletonF", o.IsSingletonF, "IsSingletonFE", o.IsSingletonFE, "LanguageModel", o.LanguageModel, "LanguageModel_OOV", o.LanguageModel_OOV, "PassThrough", o.PassThrough, "PassThrough_1", o.PassThrough_1, "PassThrough_2", o.PassThrough_2, "PassThrough_3", o.PassThrough_3, "PassThrough_4", o.PassThrough_4, "PassThrough_5", o.PassThrough_5, "PassThrough_6", o.PassThrough_6, "WordPenalty", o.WordPenalty, "Glue", o.Glue); +} + +template inline void +serialize(jsoncpp::Stream& stream, Node& o) +{ + fields(o, stream, "id", o.id, "cat", o.cat, "span", o.span); +} + +int +main(int argc, char** argv) +{ + ifstream ifs(argv[1]); + string json_str((istreambuf_iterator(ifs) ), + (istreambuf_iterator())); + + Hg hg; + Vector w; + hg.weights = w; + vector nodes; + hg.nodes = nodes; + vector edges; + hg.edges = edges; + jsoncpp::parse(hg, json_str); + Edge& last_edge = hg.edges.back(); + cerr << last_edge.rule.substr(1, 4) << endl; + + return 0; +} + diff --git a/src/test_jsoncpp.cc b/src/test_jsoncpp.cc new file mode 100644 index 0000000..ab3bd0c --- /dev/null +++ b/src/test_jsoncpp.cc @@ -0,0 +1,29 @@ +#include +#include +#include + +/* + * https://github.com/open-source-parsers/jsoncpp + * + */ +#include "jsoncpp/include/json/json.h" + +using namespace std; + + +int +main(int argc, char** argv) +{ + ifstream ifs(argv[1]); + string json_str((istreambuf_iterator(ifs)), + (istreambuf_iterator())); + + Json::Value v; + Json::Reader reader; + reader.parse(json_str, v); + Json::Value last_edge = v["edges"][v["edges"].size()-1]; + cerr << last_edge["rule"].asString().substr(1, 4) << endl; + + return 0; +} + diff --git a/src/test_jsonxx.cc b/src/test_jsonxx.cc new file mode 100644 index 0000000..d06640e --- /dev/null +++ b/src/test_jsonxx.cc @@ -0,0 +1,35 @@ +#include +#include +#include + +/* + * https://github.com/hjiang/jsonxx + * + */ +#include "jsonxx/jsonxx.h" + +using namespace std; + + +int +main(int argc, char** argv) +{ + ifstream ifs(argv[1]); + string json_str((istreambuf_iterator(ifs)), + (istreambuf_iterator())); + + jsonxx::Object o; + o.parse(json_str); + jsonxx::Array edges = o.get("edges"); + jsonxx::Array::container::const_iterator it = edges.values().begin(), end = edges.values().end(); + while (it != end) { + jsonxx::Object e = (*it)->get(); + string s = e.get("rule").substr(1, 4); + if (s == "Goal") + cerr << s << endl; + ++it; + } + + return 0; +} + diff --git a/src/test_libjson.cc b/src/test_libjson.cc new file mode 100644 index 0000000..03dc3bf --- /dev/null +++ b/src/test_libjson.cc @@ -0,0 +1,44 @@ +#include +#include +#include + +/* + * http://sourceforge.net/projects/libjson/ + * + */ +#include "libjson-7.6.1/libjson.h" + +using namespace std; + + +void +walk(const JSONNode & n) +{ + JSONNode::const_iterator it = n.begin(); + while (it != n.end()){ + if (it->type() == JSON_ARRAY || it->type() == JSON_NODE){ + walk(*it); + } + string s = it->as_string(); + if (s.size() >= 5) { + string t = s.substr(1, 4); + if (t == "Goal") + cerr << t << endl; + } + ++it; + } +} + +int +main(int argc, char** argv) +{ + ifstream ifs(argv[1]); + string json_str((istreambuf_iterator(ifs)), + (istreambuf_iterator())); + + JSONNode n = libjson::parse(json_str); + walk(n); + + return 0; +} + diff --git a/src/test_msgpack.cc b/src/test_msgpack.cc new file mode 100644 index 0000000..70cf1c8 --- /dev/null +++ b/src/test_msgpack.cc @@ -0,0 +1,81 @@ +#include +#include +#include + +/* + * http://msgpack.org/ + * + */ +#include +#include + +using namespace std; + + +struct Node { + int id; + string cat; + vector span; + + MSGPACK_DEFINE(id, cat, span); +}; + +struct Vector { + double CountEF; + double EgivenFCoherent; + double Glue; + double IsSingletonF; + double IsSingletonFE; + double LanguageModel; + double LanguageModel_OOV; + double MaxLexFgivenE; + double MaxLexEgivenF; + double PassThrough; + double PassThrough_1; + double PassThrough_2; + double PassThrough_3; + double PassThrough_4; + double PassThrough_5; + double PassThrough_6; + double SampleCountF; + double WordPenalty; + + MSGPACK_DEFINE(CountEF, EgivenFCoherent, Glue, IsSingletonF, IsSingletonFE, LanguageModel, LanguageModel_OOV, MaxLexEgivenF, MaxLexFgivenE, PassThrough, PassThrough_1, PassThrough_2, PassThrough_3, PassThrough_4, PassThrough_5, PassThrough_6, SampleCountF, WordPenalty); +}; + +struct Edge { + int head; + string rule; + vector tails; + Vector f; + double weight; + + MSGPACK_DEFINE(head, rule, tails, f, weight); +}; + +struct Hg { + Vector weights; + vector nodes; + vector edges; + + MSGPACK_DEFINE(weights, nodes, edges); +}; + +int +main(int argc, char** argv) +{ + ifstream ifs(argv[1]); + string str((istreambuf_iterator(ifs)), + (istreambuf_iterator())); + + msgpack::zone zone; + msgpack::object obj; + msgpack::unpack(str.data(), str.size(), NULL, &zone, &obj); + Hg hg; + obj.convert(&hg); + Edge last_edge = hg.edges.back(); + cerr << last_edge.rule.substr(1, 4) << endl; + + return 0; +} + diff --git a/src/test_msgpack_ruby b/src/test_msgpack_ruby new file mode 100755 index 0000000..0f2d387 --- /dev/null +++ b/src/test_msgpack_ruby @@ -0,0 +1,9 @@ +#!/usr/bin/env ruby + +require 'msgpack' + + +msg = MessagePack.unpack(File.new(ARGV[0]).read) + +STDERR.write "#{msg["edges"].last()["rule"][1..4]}\n" + diff --git a/src/test_msgpack_streaming.cc b/src/test_msgpack_streaming.cc new file mode 100644 index 0000000..1a3cf55 --- /dev/null +++ b/src/test_msgpack_streaming.cc @@ -0,0 +1,99 @@ +#include +#include +#include +#include +#include + +using namespace std; + + + +struct Node { + int id; + string cat; + vector span; + + MSGPACK_DEFINE(id, cat, span); +}; + +struct Vector { + double CountEF; + double EgivenFCoherent; + double Glue; + double IsSingletonF; + double IsSingletonFE; + double LanguageModel; + double LanguageModel_OOV; + double MaxLexFgivenE; + double MaxLexEgivenF; + double PassThrough; + double PassThrough_1; + double PassThrough_2; + double PassThrough_3; + double PassThrough_4; + double PassThrough_5; + double PassThrough_6; + double SampleCountF; + double WordPenalty; + + MSGPACK_DEFINE(CountEF, EgivenFCoherent, Glue, IsSingletonF, IsSingletonFE, LanguageModel, LanguageModel_OOV, MaxLexEgivenF, MaxLexFgivenE, PassThrough, PassThrough_1, PassThrough_2, PassThrough_3, PassThrough_4, PassThrough_5, PassThrough_6, SampleCountF, WordPenalty); +}; + +struct Edge { + int head; + string rule; + vector tails; + Vector f; + double weight; + + MSGPACK_DEFINE(head, rule, tails, f, weight); +}; + +struct Hg { + Vector weights; + vector nodes; + vector edges; + + MSGPACK_DEFINE(weights, nodes, edges); +}; + +int +main(int argc, char** argv) { + ifstream ifs(argv[1]); + + size_t count = 0, n_, e_; + msgpack::unpacker pac; + while(true) { + pac.reserve_buffer(32*1024); + size_t bytes = ifs.readsome(pac.buffer(), pac.buffer_capacity()); + pac.buffer_consumed(bytes); + msgpack::unpacked result; + while(pac.next(&result)) { + msgpack::object obj = result.get(); + if (count == 0) { + obj.convert(&n_); + n_ += 2; + } else if (count == 1) { + obj.convert(&e_); + e_ += 2; + } else if (count == 2) { + Vector v; + obj.convert(&v); + } else if (count > 2 && count <= n_) { + Node n; + obj.convert(&n); + } else if (count > n_ && count <= n_+e_+1) { + Edge e; + obj.convert(&e); + string s = e.rule.substr(1, 4); + if (s == "Goal") + cout << s << endl; + } + count++; + } + if (!bytes) break; + } + + return 0; +} + diff --git a/src/test_nosjob.cc b/src/test_nosjob.cc new file mode 100644 index 0000000..cf8891f --- /dev/null +++ b/src/test_nosjob.cc @@ -0,0 +1,32 @@ +#include +#include +#include + +/* + * http://fossil.wanderinghorse.net/repos/nosjob/index.cgi/index + * + */ +#include "nosjob-e1d67401fcda6e05/include/wh/nosjob/nosjob.hpp" + +using namespace std; + + +int +main(int argc, char** argv) +{ + ifstream ifs(argv[1]); + string json_str((istreambuf_iterator(ifs)), + (istreambuf_iterator())); + + nosjob::Atom root = nosjob::JsonParser().parse(json_str); + nosjob::Object o = nosjob::Object::cast(root); + nosjob::Atom edges = o.get(nosjob::Utf8String("edges")); + nosjob::Array a = nosjob::Array::cast(edges); + nosjob::Object last_edge = nosjob::Object::cast(a.get(a.size()-1)); + nosjob::Utf8String s = nosjob::Utf8String::cast(last_edge.get(nosjob::Utf8String("rule"))); + string t((char*)s.c_str()); + cerr << t.substr(1, 4) << endl; + + return 0; +} + diff --git a/src/test_picojson.cc b/src/test_picojson.cc new file mode 100644 index 0000000..cf3b621 --- /dev/null +++ b/src/test_picojson.cc @@ -0,0 +1,32 @@ +#include +#include +#include +#include + +/* + * https://github.com/kazuho/picojson + * + */ +#include "picojson/picojson.h" + +using namespace std; + + +int +main(int argc, char** argv) +{ + ifstream ifs(argv[1]); + string json_str((istreambuf_iterator(ifs)), + (istreambuf_iterator())); + + picojson::value v; + istringstream iss(json_str); + picojson::parse(v, iss); + picojson::value::object& obj = v.get(); + picojson::value::object& last_edge = obj["edges"].get().back().get(); + string s(last_edge["rule"].get()); + cerr << s.substr(1, 4) << endl; + + return 0; +} + diff --git a/src/test_rapidjson.cc b/src/test_rapidjson.cc new file mode 100644 index 0000000..b344ed0 --- /dev/null +++ b/src/test_rapidjson.cc @@ -0,0 +1,31 @@ +#include +#include +#include +#include + +/* + * https://github.com/miloyip/rapidjson + * + */ +#include "rapidjson/include/rapidjson/rapidjson.h" +#include "rapidjson/include/rapidjson/document.h" +#include "rapidjson/include/rapidjson/stringbuffer.h" + +using namespace std; + + +int +main(int argc, char** argv) +{ + ifstream ifs(argv[1]); + string json_str((istreambuf_iterator(ifs)), + (istreambuf_iterator())); + + rapidjson::Document d; + d.Parse(json_str.c_str()); + string s(d["edges"][d["edges"].Size()-1]["rule"].GetString()); + cerr << s.substr(1, 4) << endl; + + return 0; +} + diff --git a/src/test_sajson.cc b/src/test_sajson.cc new file mode 100644 index 0000000..4081d43 --- /dev/null +++ b/src/test_sajson.cc @@ -0,0 +1,32 @@ +#include +#include +#include +#include + +/* + * https://github.com/chadaustin/sajson + * + */ +#include "sajson/include/sajson.h" + +using namespace std; + + +int +main(int argc, char** argv) +{ + ifstream ifs(argv[1]); + string json_str((istreambuf_iterator(ifs)), + (istreambuf_iterator())); + + const sajson::document& document = sajson::parse(sajson::literal(json_str.c_str())); + size_t index_a = document.get_root().find_object_key(sajson::literal("edges")); + const sajson::value& edges = document.get_root().get_object_value(index_a); + const sajson::value& last_edge = edges.get_array_element(edges.get_length()-1); + size_t index_r = last_edge.find_object_key(sajson::literal("rule")); + const sajson::value& r = last_edge.get_object_value(index_r); + cerr << r.as_string().substr(1, 4) << endl; + + return 0; +} + diff --git a/test_JsonBox.cc b/test_JsonBox.cc deleted file mode 100644 index e1b22c4..0000000 --- a/test_JsonBox.cc +++ /dev/null @@ -1,24 +0,0 @@ -#include -#include - -/* - * https://github.com/anhero/JsonBox - * - */ -#include "JsonBox/include/JsonBox.h" - -using namespace std; - - -int -main(int argc, char** argv) -{ - JsonBox::Value v; - v.loadFromFile(argv[1]); - JsonBox::Value w = v["edges"].getArray().back(); - string s = w["rule"].getString(); - cerr << s.substr(1,4) << endl; - - return 0; -} - diff --git a/test_MicroJSON.cc b/test_MicroJSON.cc deleted file mode 100644 index d8d4969..0000000 --- a/test_MicroJSON.cc +++ /dev/null @@ -1,28 +0,0 @@ -#include -#include -#include - -/* - * http://grigory.info/MicroJSON.About.html - * - */ -#include "MicroJSON-0.3.2/Node.h" - -using namespace std; - - -int -main(int argc, char** argv) -{ - ifstream ifs(argv[1]); - string json_str((istreambuf_iterator(ifs)), - (istreambuf_iterator())); - - MicroJSON::Node Root; - Root.Parse(json_str); - MicroJSON::Node* edges = Root.GetSubNode("edges"); - cerr << edges->GetChildren().back()->GetSubNode("rule") << endl; - - return 0; -} - diff --git a/test_MicroJSON.sh b/test_MicroJSON.sh deleted file mode 100755 index 89d1d3d..0000000 --- a/test_MicroJSON.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/sh -x - - -LD_LIBRARY_PATH="/home/pks/z/test/json_test/MicroJSON-0.3.2" ./test_MicroJSON $1 - diff --git a/test_cdec_json_parser.cc b/test_cdec_json_parser.cc deleted file mode 100644 index e805318..0000000 --- a/test_cdec_json_parser.cc +++ /dev/null @@ -1,25 +0,0 @@ -#include -#include -#include - -/* - * https://github.com/redpony/cdec/tree/master/decoder - * - */ -#include "cdec_json_parser/json_parse.h" - -using namespace std; - - -int -main(int argc, char** argv) -{ - ifstream ifs(argv[1]); - - istream& s = ifs; - JSONParser p; - p.Parse(&s); - - return 0; -} - diff --git a/test_gason.cc b/test_gason.cc deleted file mode 100644 index d78c385..0000000 --- a/test_gason.cc +++ /dev/null @@ -1,71 +0,0 @@ -#include -#include -#include -#include - -/* - * https://github.com/vivkin/gason - * - */ -#include "gason/gason.h" - -using namespace std; - - -void -print(const char *s) -{ - string u(s); - u = u.substr(1, 4); - if (u == "Goal") { - cerr << u << endl; - } -} - -void -walk(JsonValue o) -{ - switch (o.getTag()) { - case JSON_TAG_NUMBER: - break; - case JSON_TAG_BOOL: - break; - case JSON_TAG_STRING: - print(o.toString()); - break; - case JSON_TAG_ARRAY: - if (!o.toNode()) - break; - for (auto i : o) - walk(i->value); - break; - case JSON_TAG_OBJECT: - if (!o.toNode()) - break; - for (auto i : o) { - print(i->key); - walk(i->value); - } - break; - case JSON_TAG_NULL: - break; - } -} - -int -main(int argc, char** argv) -{ - ifstream ifs(argv[1]); - string json_str((istreambuf_iterator(ifs)), - (istreambuf_iterator())); - - char* s = strdup(json_str.c_str()); - char *p; - JsonValue v; - JsonAllocator a; - JsonParseStatus status = jsonParse(s, &p, &v, a); - walk(v); - - return 0; -} - diff --git a/test_json-cpp.cc b/test_json-cpp.cc deleted file mode 100644 index 0791704..0000000 --- a/test_json-cpp.cc +++ /dev/null @@ -1,100 +0,0 @@ -#include -#include -#include - -/* - * https://github.com/ascheglov/json-cpp - * - */ -#include "json-cpp.hpp" - -using namespace std; - - -struct Node { - int id; - string cat; - vector span; -}; - -struct Vector { - double CountEF; - double EgivenFCoherent; - double Glue; - double IsSingletonF; - double IsSingletonFE; - double LanguageModel; - double LanguageModel_OOV; - double MaxLexFgivenE; - double MaxLexEgivenF; - double PassThrough; - double PassThrough_1; - double PassThrough_2; - double PassThrough_3; - double PassThrough_4; - double PassThrough_5; - double PassThrough_6; - double SampleCountF; - double WordPenalty; -}; - -struct Edge { - int head; - string rule; - vector tails; - Vector f; - double weight; -}; - -struct Hg { - Vector weights; - vector nodes; - vector edges; - vector rules; -}; - -template inline void -serialize(jsoncpp::Stream& stream, Hg& o) -{ - fields(o, stream, "weights", o.weights, "nodes", o.nodes, "edges", o.edges, "rules", o.rules); -} - -template inline void -serialize(jsoncpp::Stream& stream, Edge& o) -{ - fields(o, stream, "head", o.head, "rule", o.rule, "tails", o.tails, "f", o.f, "weight", o.weight); -} - -template inline void -serialize(jsoncpp::Stream& stream, Vector& o) -{ - fields(o, stream, "EgivenFCoherent", o.EgivenFCoherent, "SampleCountF", o.SampleCountF, "CountEF", o.CountEF, "MaxLexFgivenE", o.MaxLexFgivenE, "MaxLexEgivenF", o.MaxLexEgivenF, "IsSingletonF", o.IsSingletonF, "IsSingletonFE", o.IsSingletonFE, "LanguageModel", o.LanguageModel, "LanguageModel_OOV", o.LanguageModel_OOV, "PassThrough", o.PassThrough, "PassThrough_1", o.PassThrough_1, "PassThrough_2", o.PassThrough_2, "PassThrough_3", o.PassThrough_3, "PassThrough_4", o.PassThrough_4, "PassThrough_5", o.PassThrough_5, "PassThrough_6", o.PassThrough_6, "WordPenalty", o.WordPenalty, "Glue", o.Glue); -} - -template inline void -serialize(jsoncpp::Stream& stream, Node& o) -{ - fields(o, stream, "id", o.id, "cat", o.cat, "span", o.span); -} - -int -main(int argc, char** argv) -{ - ifstream ifs(argv[1]); - string json_str((istreambuf_iterator(ifs) ), - (istreambuf_iterator())); - - Hg hg; - Vector w; - hg.weights = w; - vector nodes; - hg.nodes = nodes; - vector edges; - hg.edges = edges; - jsoncpp::parse(hg, json_str); - Edge& last_edge = hg.edges.back(); - cerr << last_edge.rule.substr(1, 4) << endl; - - return 0; -} - diff --git a/test_jsoncpp.cc b/test_jsoncpp.cc deleted file mode 100644 index ab3bd0c..0000000 --- a/test_jsoncpp.cc +++ /dev/null @@ -1,29 +0,0 @@ -#include -#include -#include - -/* - * https://github.com/open-source-parsers/jsoncpp - * - */ -#include "jsoncpp/include/json/json.h" - -using namespace std; - - -int -main(int argc, char** argv) -{ - ifstream ifs(argv[1]); - string json_str((istreambuf_iterator(ifs)), - (istreambuf_iterator())); - - Json::Value v; - Json::Reader reader; - reader.parse(json_str, v); - Json::Value last_edge = v["edges"][v["edges"].size()-1]; - cerr << last_edge["rule"].asString().substr(1, 4) << endl; - - return 0; -} - diff --git a/test_jsonxx.cc b/test_jsonxx.cc deleted file mode 100644 index d06640e..0000000 --- a/test_jsonxx.cc +++ /dev/null @@ -1,35 +0,0 @@ -#include -#include -#include - -/* - * https://github.com/hjiang/jsonxx - * - */ -#include "jsonxx/jsonxx.h" - -using namespace std; - - -int -main(int argc, char** argv) -{ - ifstream ifs(argv[1]); - string json_str((istreambuf_iterator(ifs)), - (istreambuf_iterator())); - - jsonxx::Object o; - o.parse(json_str); - jsonxx::Array edges = o.get("edges"); - jsonxx::Array::container::const_iterator it = edges.values().begin(), end = edges.values().end(); - while (it != end) { - jsonxx::Object e = (*it)->get(); - string s = e.get("rule").substr(1, 4); - if (s == "Goal") - cerr << s << endl; - ++it; - } - - return 0; -} - diff --git a/test_libjson.cc b/test_libjson.cc deleted file mode 100644 index 6b3e2a9..0000000 --- a/test_libjson.cc +++ /dev/null @@ -1,44 +0,0 @@ -#include -#include -#include - -/* - * http://sourceforge.net/projects/libjson/ - * - */ -#include "libjson/libjson.h" - -using namespace std; - - -void -walk(const JSONNode & n) -{ - JSONNode::const_iterator it = n.begin(); - while (it != n.end()){ - if (it->type() == JSON_ARRAY || it->type() == JSON_NODE){ - walk(*it); - } - string s = it->as_string(); - if (s.size() >= 5) { - string t = s.substr(1, 4); - if (t == "Goal") - cerr << t << endl; - } - ++it; - } -} - -int -main(int argc, char** argv) -{ - ifstream ifs(argv[1]); - string json_str((istreambuf_iterator(ifs)), - (istreambuf_iterator())); - - JSONNode n = libjson::parse(json_str); - walk(n); - - return 0; -} - diff --git a/test_msgpack.cc b/test_msgpack.cc deleted file mode 100644 index 70cf1c8..0000000 --- a/test_msgpack.cc +++ /dev/null @@ -1,81 +0,0 @@ -#include -#include -#include - -/* - * http://msgpack.org/ - * - */ -#include -#include - -using namespace std; - - -struct Node { - int id; - string cat; - vector span; - - MSGPACK_DEFINE(id, cat, span); -}; - -struct Vector { - double CountEF; - double EgivenFCoherent; - double Glue; - double IsSingletonF; - double IsSingletonFE; - double LanguageModel; - double LanguageModel_OOV; - double MaxLexFgivenE; - double MaxLexEgivenF; - double PassThrough; - double PassThrough_1; - double PassThrough_2; - double PassThrough_3; - double PassThrough_4; - double PassThrough_5; - double PassThrough_6; - double SampleCountF; - double WordPenalty; - - MSGPACK_DEFINE(CountEF, EgivenFCoherent, Glue, IsSingletonF, IsSingletonFE, LanguageModel, LanguageModel_OOV, MaxLexEgivenF, MaxLexFgivenE, PassThrough, PassThrough_1, PassThrough_2, PassThrough_3, PassThrough_4, PassThrough_5, PassThrough_6, SampleCountF, WordPenalty); -}; - -struct Edge { - int head; - string rule; - vector tails; - Vector f; - double weight; - - MSGPACK_DEFINE(head, rule, tails, f, weight); -}; - -struct Hg { - Vector weights; - vector nodes; - vector edges; - - MSGPACK_DEFINE(weights, nodes, edges); -}; - -int -main(int argc, char** argv) -{ - ifstream ifs(argv[1]); - string str((istreambuf_iterator(ifs)), - (istreambuf_iterator())); - - msgpack::zone zone; - msgpack::object obj; - msgpack::unpack(str.data(), str.size(), NULL, &zone, &obj); - Hg hg; - obj.convert(&hg); - Edge last_edge = hg.edges.back(); - cerr << last_edge.rule.substr(1, 4) << endl; - - return 0; -} - diff --git a/test_msgpack_ruby b/test_msgpack_ruby deleted file mode 100755 index 0f2d387..0000000 --- a/test_msgpack_ruby +++ /dev/null @@ -1,9 +0,0 @@ -#!/usr/bin/env ruby - -require 'msgpack' - - -msg = MessagePack.unpack(File.new(ARGV[0]).read) - -STDERR.write "#{msg["edges"].last()["rule"][1..4]}\n" - diff --git a/test_msgpack_streaming.cc b/test_msgpack_streaming.cc deleted file mode 100644 index 1a3cf55..0000000 --- a/test_msgpack_streaming.cc +++ /dev/null @@ -1,99 +0,0 @@ -#include -#include -#include -#include -#include - -using namespace std; - - - -struct Node { - int id; - string cat; - vector span; - - MSGPACK_DEFINE(id, cat, span); -}; - -struct Vector { - double CountEF; - double EgivenFCoherent; - double Glue; - double IsSingletonF; - double IsSingletonFE; - double LanguageModel; - double LanguageModel_OOV; - double MaxLexFgivenE; - double MaxLexEgivenF; - double PassThrough; - double PassThrough_1; - double PassThrough_2; - double PassThrough_3; - double PassThrough_4; - double PassThrough_5; - double PassThrough_6; - double SampleCountF; - double WordPenalty; - - MSGPACK_DEFINE(CountEF, EgivenFCoherent, Glue, IsSingletonF, IsSingletonFE, LanguageModel, LanguageModel_OOV, MaxLexEgivenF, MaxLexFgivenE, PassThrough, PassThrough_1, PassThrough_2, PassThrough_3, PassThrough_4, PassThrough_5, PassThrough_6, SampleCountF, WordPenalty); -}; - -struct Edge { - int head; - string rule; - vector tails; - Vector f; - double weight; - - MSGPACK_DEFINE(head, rule, tails, f, weight); -}; - -struct Hg { - Vector weights; - vector nodes; - vector edges; - - MSGPACK_DEFINE(weights, nodes, edges); -}; - -int -main(int argc, char** argv) { - ifstream ifs(argv[1]); - - size_t count = 0, n_, e_; - msgpack::unpacker pac; - while(true) { - pac.reserve_buffer(32*1024); - size_t bytes = ifs.readsome(pac.buffer(), pac.buffer_capacity()); - pac.buffer_consumed(bytes); - msgpack::unpacked result; - while(pac.next(&result)) { - msgpack::object obj = result.get(); - if (count == 0) { - obj.convert(&n_); - n_ += 2; - } else if (count == 1) { - obj.convert(&e_); - e_ += 2; - } else if (count == 2) { - Vector v; - obj.convert(&v); - } else if (count > 2 && count <= n_) { - Node n; - obj.convert(&n); - } else if (count > n_ && count <= n_+e_+1) { - Edge e; - obj.convert(&e); - string s = e.rule.substr(1, 4); - if (s == "Goal") - cout << s << endl; - } - count++; - } - if (!bytes) break; - } - - return 0; -} - diff --git a/test_nosjob.cc b/test_nosjob.cc deleted file mode 100644 index cf8891f..0000000 --- a/test_nosjob.cc +++ /dev/null @@ -1,32 +0,0 @@ -#include -#include -#include - -/* - * http://fossil.wanderinghorse.net/repos/nosjob/index.cgi/index - * - */ -#include "nosjob-e1d67401fcda6e05/include/wh/nosjob/nosjob.hpp" - -using namespace std; - - -int -main(int argc, char** argv) -{ - ifstream ifs(argv[1]); - string json_str((istreambuf_iterator(ifs)), - (istreambuf_iterator())); - - nosjob::Atom root = nosjob::JsonParser().parse(json_str); - nosjob::Object o = nosjob::Object::cast(root); - nosjob::Atom edges = o.get(nosjob::Utf8String("edges")); - nosjob::Array a = nosjob::Array::cast(edges); - nosjob::Object last_edge = nosjob::Object::cast(a.get(a.size()-1)); - nosjob::Utf8String s = nosjob::Utf8String::cast(last_edge.get(nosjob::Utf8String("rule"))); - string t((char*)s.c_str()); - cerr << t.substr(1, 4) << endl; - - return 0; -} - diff --git a/test_picojson.cc b/test_picojson.cc deleted file mode 100644 index cf3b621..0000000 --- a/test_picojson.cc +++ /dev/null @@ -1,32 +0,0 @@ -#include -#include -#include -#include - -/* - * https://github.com/kazuho/picojson - * - */ -#include "picojson/picojson.h" - -using namespace std; - - -int -main(int argc, char** argv) -{ - ifstream ifs(argv[1]); - string json_str((istreambuf_iterator(ifs)), - (istreambuf_iterator())); - - picojson::value v; - istringstream iss(json_str); - picojson::parse(v, iss); - picojson::value::object& obj = v.get(); - picojson::value::object& last_edge = obj["edges"].get().back().get(); - string s(last_edge["rule"].get()); - cerr << s.substr(1, 4) << endl; - - return 0; -} - diff --git a/test_rapidjson.cc b/test_rapidjson.cc deleted file mode 100644 index b344ed0..0000000 --- a/test_rapidjson.cc +++ /dev/null @@ -1,31 +0,0 @@ -#include -#include -#include -#include - -/* - * https://github.com/miloyip/rapidjson - * - */ -#include "rapidjson/include/rapidjson/rapidjson.h" -#include "rapidjson/include/rapidjson/document.h" -#include "rapidjson/include/rapidjson/stringbuffer.h" - -using namespace std; - - -int -main(int argc, char** argv) -{ - ifstream ifs(argv[1]); - string json_str((istreambuf_iterator(ifs)), - (istreambuf_iterator())); - - rapidjson::Document d; - d.Parse(json_str.c_str()); - string s(d["edges"][d["edges"].Size()-1]["rule"].GetString()); - cerr << s.substr(1, 4) << endl; - - return 0; -} - diff --git a/test_sajson.cc b/test_sajson.cc deleted file mode 100644 index 4081d43..0000000 --- a/test_sajson.cc +++ /dev/null @@ -1,32 +0,0 @@ -#include -#include -#include -#include - -/* - * https://github.com/chadaustin/sajson - * - */ -#include "sajson/include/sajson.h" - -using namespace std; - - -int -main(int argc, char** argv) -{ - ifstream ifs(argv[1]); - string json_str((istreambuf_iterator(ifs)), - (istreambuf_iterator())); - - const sajson::document& document = sajson::parse(sajson::literal(json_str.c_str())); - size_t index_a = document.get_root().find_object_key(sajson::literal("edges")); - const sajson::value& edges = document.get_root().get_object_value(index_a); - const sajson::value& last_edge = edges.get_array_element(edges.get_length()-1); - size_t index_r = last_edge.find_object_key(sajson::literal("rule")); - const sajson::value& r = last_edge.get_object_value(index_r); - cerr << r.as_string().substr(1, 4) << endl; - - return 0; -} - -- cgit v1.2.3