From f219bab21c07d02e7e54d557e23387bd93c9ce5f Mon Sep 17 00:00:00 2001
From: Patrick Simianer
Date: Sat, 19 Jul 2014 08:30:43 +0200
Subject: hg io
---
fast/.gitignore | 6 +-
fast/Makefile | 11 +-
fast/README.md | 5 +
fast/dummyvector.h | 27 ++
fast/grammar.hh | 5 +-
fast/hypergraph.cc | 216 +++++++--
fast/hypergraph.hh | 110 +++--
fast/json-cpp.hpp | 1231 ++++++++++++++++++++++++++++++++++++++++++++++++++++
fast/main.cc | 137 +-----
fast/make_paks.cc | 112 +++++
fast/read_pak.cc | 26 ++
fast/semiring.hh | 29 +-
12 files changed, 1683 insertions(+), 232 deletions(-)
create mode 100644 fast/dummyvector.h
create mode 100644 fast/json-cpp.hpp
create mode 100644 fast/make_paks.cc
create mode 100644 fast/read_pak.cc
(limited to 'fast')
diff --git a/fast/.gitignore b/fast/.gitignore
index 80d28d5..c37a566 100644
--- a/fast/.gitignore
+++ b/fast/.gitignore
@@ -1,3 +1,5 @@
fast_weaver
-hypergraph.o
-msgpack-c/
+*.o
+data
+make_paks
+read_pak
diff --git a/fast/Makefile b/fast/Makefile
index f09ab21..1d88446 100644
--- a/fast/Makefile
+++ b/fast/Makefile
@@ -1,12 +1,19 @@
all: hypergraph.o main.cc
- clang -std=c++11 -lstdc++ -lm hypergraph.o -I./msgpack-c/include/ main.cc -o fast_weaver
+ clang -std=c++11 -lstdc++ -lm -lmsgpack hypergraph.o main.cc -o fast_weaver
hypergraph.o: hypergraph.cc hypergraph.hh grammar.o semiring.hh
- clang -std=c++11 -I./msgpack-c/include/ -c hypergraph.cc
+ clang -std=c++11 -lmsgpack -c hypergraph.cc
grammar.o: grammar.cc grammar.hh
clang -std=c++11 -c grammar.cc
+make_paks: make_paks.cc
+ g++ -std=c++11 -lmsgpack make_paks.cc -o make_paks
+
+read_pak: read_pak.cc
+ g++ -std=c++11 -lmsgpack read_pak.cc -o read_pak
+
+
clean:
rm -f fast_weaver hypergraph.o grammar.o
diff --git a/fast/README.md b/fast/README.md
index 3087bab..5bcc962 100644
--- a/fast/README.md
+++ b/fast/README.md
@@ -7,3 +7,8 @@ TODO
* hg: json input (jsoncpp?)
* language model: kenlm
+depends on msgpack [1]
+http://jscheiny.github.io/Streams/
+
+[1] http://msgpack.org
+
diff --git a/fast/dummyvector.h b/fast/dummyvector.h
new file mode 100644
index 0000000..09cf3f7
--- /dev/null
+++ b/fast/dummyvector.h
@@ -0,0 +1,27 @@
+#pragma once
+#include
+
+
+struct DummyVector {
+ double CountEF;
+ double EgivenFCoherent;
+ double Glue;
+ double IsSingletonF;
+ double IsSingletonFE;
+ double LanguageModel;
+ double LanguageModel_OOV;
+ double MaxLexFgivenE;
+ double MaxLexEgivenF;
+ double PassThrough;
+ double PassThrough_1;
+ double PassThrough_2;
+ double PassThrough_3;
+ double PassThrough_4;
+ double PassThrough_5;
+ double PassThrough_6;
+ double SampleCountF;
+ double WordPenalty;
+
+ MSGPACK_DEFINE(CountEF, EgivenFCoherent, Glue, IsSingletonF, IsSingletonFE, LanguageModel, LanguageModel_OOV, MaxLexEgivenF, MaxLexFgivenE, PassThrough, PassThrough_1, PassThrough_2, PassThrough_3, PassThrough_4, PassThrough_5, PassThrough_6, SampleCountF, WordPenalty);
+};
+
diff --git a/fast/grammar.hh b/fast/grammar.hh
index 5625b85..c4ef3ad 100644
--- a/fast/grammar.hh
+++ b/fast/grammar.hh
@@ -1,5 +1,4 @@
-#ifndef GRAMMAR_HH
-#define GRAMMAR_HH
+#pragma once
#include
#include
@@ -29,5 +28,3 @@ class Rule {
} // namespace
-#endif
-
diff --git a/fast/hypergraph.cc b/fast/hypergraph.cc
index 44e060e..4e6601f 100644
--- a/fast/hypergraph.cc
+++ b/fast/hypergraph.cc
@@ -41,7 +41,7 @@ operator<<(std::ostream& os, const Edge& e)
{
ostringstream _;
for (auto it = e.tails.begin(); it != e.tails.end(); ++it) {
- _ << (*it)->id; if (*it != e.tails.back()) _ << ",";
+ _ << (**it).id; if (*it != e.tails.back()) _ << ",";
}
os << \
"Edgeid << \
@@ -55,19 +55,26 @@ operator<<(std::ostream& os, const Edge& e)
}
/*
- * Hypergraph
- * methods
+ * functions
*
*/
void
-Hypergraph::reset()
+reset(list nodes, vector edges)
{
+ for (auto it = nodes.begin(); it != nodes.end(); ++it)
+ (**it).mark = 0;
+ for (auto it = edges.begin(); it != edges.end(); ++it)
+ (**it).mark = 0;
+}
+
+template void
+init(list& nodes, list::iterator root, Semiring& semiring)
+{
+ for (auto it = nodes.begin(); it != nodes.end(); ++it)
+ (**it).score = semiring.null;
+ (**root).score = semiring.one;
}
-/*
- * functions
- *
- */
void
topological_sort(list& nodes, list::iterator root)
{
@@ -94,37 +101,186 @@ topological_sort(list& nodes, list::iterator root)
}
}
-/*void
-init(vector& nodes, ViterbiSemiring& semiring, Node* root)
-{
- for (auto it = nodes.begin(); it != nodes.end(); ++it)
- (*it)->score = semiring.null;
- root->score = semiring.one;
-}
-
void
-viterbi(vector& nodes, map nodes_by_id, Node* root)
+viterbi(Hypergraph& hg)
{
- vector sorted = topological_sort(nodes);
- ViterbiSemiring semiring;
-
- init(sorted, semiring, root);
+ list::iterator root = hg.nodes.begin(); // FIXME?
+ Hg::topological_sort(hg.nodes, root);
+ Semiring::Viterbi semiring;
+ Hg::init(hg.nodes, root, semiring);
- for (auto n_it = sorted.begin(); n_it != sorted.end(); ++n_it) {
- for (auto e_it = (*n_it)->incoming.begin(); e_it != (*n_it)->incoming.end(); ++e_it) {
- cout << (*e_it)->s() << endl;
+ for (auto n = hg.nodes.begin(); n != hg.nodes.end(); ++n) {
+ for (auto e = (**n).incoming.begin(); e != (**n).incoming.end(); ++e) {
+ cout << **e << endl;
double s = semiring.one;
- for (auto m_it = (*e_it)->tails.begin(); m_it != (*e_it)->tails.end(); m_it++) {
- s = semiring.multiply(s, (*m_it)->score);
+ for (auto m = (**e).tails.begin(); m != (**e).tails.end(); ++m) {
+ s = semiring.multiply(s, (**m).score);
}
- (*n_it)->score = semiring.add((*n_it)->score, semiring.multiply(s, (*e_it)->score));
+ (**n).score = semiring.add((**n).score, semiring.multiply(s, (**e).score));
}
}
- for (auto it = sorted.begin(); it != sorted.end(); ++it) {
- cout << (*it)->id << " " << (*it)->score << endl;
+ for (auto it = hg.nodes.begin(); it != hg.nodes.end(); ++it) {
+ cout << (**it).id << " " << (**it).score << endl;
}
-}*/
+}
+
+namespace io {
+
+void
+read(Hypergraph& hg, string fn)
+{
+ ifstream ifs(fn);
+ size_t i = 0, nn, ne;
+ msgpack::unpacker pac;
+ while(true) {
+ pac.reserve_buffer(32*1024);
+ size_t bytes = ifs.readsome(pac.buffer(), pac.buffer_capacity());
+ pac.buffer_consumed(bytes);
+ msgpack::unpacked result;
+ while(pac.next(&result)) {
+ msgpack::object o = result.get();
+ if (i == 0) {
+ o.convert(&nn);
+ nn += 1;
+ } else if (i == 1) {
+ o.convert(&ne);
+ ne += 1;
+ } else if (i > 1 && i <= nn) {
+ //cout << "N " << o << endl;
+ Node* n = new Node;
+ o.convert(n);
+ } else if (i > nn && i <= nn+ne+1) {
+ //cout << "E " << o << endl;
+ Edge* e = new Edge;
+ o.convert(e);
+ }
+ i++;
+ }
+ if (!bytes) break;
+ }
+}
+
+void
+write(Hypergraph& hg, string fn)
+{
+ /*FILE* file = fopen(argv[2], "wb");
+ msgpack::fbuffer fbuf(file);
+ msgpack::pack(fbuf, hg.nodes.size());
+ msgpack::pack(fbuf, hg.edges.size());
+ msgpack::pack(fbuf, hg.weights);
+ for (auto it = hg.nodes.begin(); it != hg.nodes.end(); it++)
+ msgpack::pack(fbuf, *it);
+ for (auto it = hg.edges.begin(); it != hg.edges.end(); it++)
+ msgpack::pack(fbuf, *it);
+
+ fclose(file);*/
+}
+
+void
+manual(Hypergraph& hg)
+{
+ // nodes
+ Node* a = new Node; a->id = 0; a->symbol = "root"; a->left = false; a->right = false; a->mark = 0;
+ Node* b = new Node; b->id = 1; b->symbol = "NP"; b->left = 0; b->right = 1; b->mark = 0;
+ Node* c = new Node; c->id = 2; c->symbol = "V"; c->left = 1; c->right = 2; c->mark = 0;
+ Node* d = new Node; d->id = 3; d->symbol = "JJ"; d->left = 3; d->right = 4; d->mark = 0;
+ Node* e = new Node; e->id = 4; e->symbol = "NN"; e->left = 3; e->right = 5; e->mark = 0;
+ Node* f = new Node; f->id = 5; f->symbol = "NP"; f->left = 2; f->right = 5; f->mark = 0;
+ Node* g = new Node; g->id = 6; g->symbol = "NP"; g->left = 1; g->right = 5; g->mark = 0;
+ Node* h = new Node; h->id = 7; h->symbol = "S"; h->left = 0; h->right = 6; h->mark = 0;
+
+ hg.add_node(a);
+ hg.add_node(h);
+ hg.add_node(g);
+ hg.add_node(c);
+ hg.add_node(d);
+ hg.add_node(f);
+ hg.add_node(b);
+ hg.add_node(e);
+
+ // edges
+ Edge* q = new Edge; q->head = hg.nodes_by_id[1]; q->tails.push_back(hg.nodes_by_id[0]); q->score = 0.367879441171;
+ hg.nodes_by_id[1]->incoming.push_back(q);
+ hg.nodes_by_id[0]->outgoing.push_back(q);
+ q->arity = 1;
+ q->mark = 0;
+ hg.edges.push_back(q);
+
+ Edge* p = new Edge; p->head = hg.nodes_by_id[2]; p->tails.push_back(hg.nodes_by_id[0]); p->score = 0.606530659713;
+ hg.nodes_by_id[2]->incoming.push_back(p);
+ hg.nodes_by_id[0]->outgoing.push_back(p);
+ p->arity = 1;
+ p->mark = 0;
+ hg.edges.push_back(p);
+
+ Edge* r = new Edge; r->head = hg.nodes_by_id[3]; r->tails.push_back(hg.nodes_by_id[0]); r->score = 1.0;
+ hg.nodes_by_id[3]->incoming.push_back(r);
+ hg.nodes_by_id[0]->outgoing.push_back(r);
+ r->arity = 1;
+ r->mark = 0;
+ hg.edges.push_back(r);
+
+ Edge* s = new Edge; s->head = hg.nodes_by_id[3]; s->tails.push_back(hg.nodes_by_id[0]); s->score = 1.0;
+ hg.nodes_by_id[3]->incoming.push_back(s);
+ hg.nodes_by_id[0]->outgoing.push_back(s);
+ s->arity = 1;
+ s->mark = 0;
+ hg.edges.push_back(s);
+
+ Edge* t = new Edge; t->head = hg.nodes_by_id[4]; t->tails.push_back(hg.nodes_by_id[0]); t->score = 1.0;
+ hg.nodes_by_id[4]->incoming.push_back(t);
+ hg.nodes_by_id[0]->outgoing.push_back(t);
+ t->arity = 1;
+ t->mark = 0;
+ hg.edges.push_back(t);
+
+ Edge* u = new Edge; u->head = hg.nodes_by_id[4]; u->tails.push_back(hg.nodes_by_id[0]); u->score = 1.0;
+ hg.nodes_by_id[4]->incoming.push_back(u);
+ hg.nodes_by_id[0]->outgoing.push_back(u);
+ u->arity = 1;
+ u->mark = 0;
+ hg.edges.push_back(u);
+
+ Edge* v = new Edge; v->head = hg.nodes_by_id[4]; v->tails.push_back(hg.nodes_by_id[3]); v->score = 1.0;
+ hg.nodes_by_id[4]->incoming.push_back(v);
+ hg.nodes_by_id[3]->outgoing.push_back(v);
+ v->arity = 1;
+ v->mark = 0;
+ hg.edges.push_back(v);
+
+ Edge* w = new Edge; w->head = hg.nodes_by_id[4]; w->tails.push_back(hg.nodes_by_id[3]); w->score = 2.71828182846;
+ hg.nodes_by_id[4]->incoming.push_back(w);
+ hg.nodes_by_id[3]->outgoing.push_back(w);
+ w->arity = 1;
+ w->mark = 0;
+ hg.edges.push_back(w);
+
+ Edge* x = new Edge; x->head = hg.nodes_by_id[5]; x->tails.push_back(hg.nodes_by_id[4]); x->score = 1.0;
+ hg.nodes_by_id[5]->incoming.push_back(x);
+ hg.nodes_by_id[4]->outgoing.push_back(x);
+ x->arity = 1;
+ x->mark = 0;
+ hg.edges.push_back(x);
+
+ Edge* y = new Edge; y->head = hg.nodes_by_id[6]; y->tails.push_back(hg.nodes_by_id[2]); y->tails.push_back(hg.nodes_by_id[5]); y->score = 1.0;
+ hg.nodes_by_id[6]->incoming.push_back(y);
+ hg.nodes_by_id[2]->outgoing.push_back(y);
+ hg.nodes_by_id[5]->outgoing.push_back(y);
+ y->arity = 2;
+ y->mark = 0;
+ hg.edges.push_back(y);
+
+ Edge* z = new Edge; z->head = hg.nodes_by_id[7]; z->tails.push_back(hg.nodes_by_id[1]); z->tails.push_back(hg.nodes_by_id[6]); z->score = 1.0;
+ hg.nodes_by_id[7]->incoming.push_back(z);
+ hg.nodes_by_id[1]->outgoing.push_back(z);
+ hg.nodes_by_id[6]->outgoing.push_back(z);
+ z->arity = 2;
+ z->mark = 0;
+ hg.edges.push_back(z);
+}
+
+} // namespace
} // namespace
diff --git a/fast/hypergraph.hh b/fast/hypergraph.hh
index 68cca19..2e30911 100644
--- a/fast/hypergraph.hh
+++ b/fast/hypergraph.hh
@@ -1,5 +1,4 @@
-#ifndef HYPERGRAPH_HH
-#define HYPERGRAPH_HH
+#pragma once
#include "grammar.hh"
#include "semiring.hh"
@@ -12,8 +11,10 @@
#include
#include
#include
+#include
-#include "msgpack-c/include/msgpack.hpp"
+#include "dummyvector.h"
+#include
using namespace std;
@@ -23,61 +24,78 @@ typedef double weight_t;
namespace Hg {
-class Node;
+struct Node;
-class Edge {
- public:
- Node* head;
- vector tails;
- score_t score;
- //Grammar::Rule rule; FIXME
- vector f;
- unsigned int arity;
- unsigned int mark;
+struct Edge {
+ Node* head;
+ vector tails;
+ score_t score;
+ string rule; //FIXME
+ DummyVector f; //FIXME
+ unsigned int arity;
+ unsigned int mark;
- bool is_marked();
- friend std::ostream& operator<<(std::ostream& os, const Edge& s);
+ bool is_marked();
+ friend std::ostream& operator<<(std::ostream& os, const Edge& s);
- size_t head_id_;
- vector tails_ids_; // node ids
- MSGPACK_DEFINE(head_id_, tails_ids_, score, f, arity);
+ size_t head_id_;
+ vector tails_ids_; // node ids
+
+ MSGPACK_DEFINE(head_id_, tails_ids_, score, f, arity);
};
-class Node {
- public:
- size_t id;
- string symbol;
- unsigned short left;
- unsigned short right;
- score_t score;
- vector incoming;
- vector outgoing;
- unsigned int mark;
-
- bool is_marked();
- friend std::ostream& operator<<(std::ostream& os, const Node& n);
-
- vector incoming_ids_; // edge ids
- vector outgoing_ids_; // edge ids
- MSGPACK_DEFINE(id, symbol, left, right, score, incoming_ids_, outgoing_ids_);
+struct Node {
+ size_t id;
+ string symbol;
+ unsigned short left;
+ unsigned short right;
+ score_t score;
+ vector incoming;
+ vector outgoing;
+ unsigned int mark;
+
+ bool is_marked();
+ friend std::ostream& operator<<(std::ostream& os, const Node& n);
+
+ vector incoming_ids_; // edge ids
+ vector outgoing_ids_; // edge ids
+ MSGPACK_DEFINE(id, symbol, left, right, score, incoming_ids_, outgoing_ids_);
};
-class Hypergraph {
- public:
- list nodes;
- vector edges;
- unordered_map nodes_by_id;
- unsigned int arity;
+struct Hypergraph {
+ list nodes;
+ vector edges;
+ unordered_map nodes_by_id;
+ unsigned int arity;
- void reset();
- void add_node(Node* n) { nodes.push_back(n); nodes_by_id[n->id] = n; }
+ void add_node(Node* n) { nodes.push_back(n); nodes_by_id[n->id] = n; }
};
-void topological_sort(list& nodes, list::iterator root);
-void viterbi(Hypergraph& hg);
+void
+reset();
+
+template void
+init(list& nodes, list::iterator root, Semiring& semiring);
+
+void
+topological_sort(list& nodes, list::iterator root);
+
+void
+viterbi(Hypergraph& hg);
+
+namespace io {
+void
+read(Hypergraph& hg, string fn);
+
+void
+write(Hypergraph& hg, string fn);
+
+void
+manual(Hypergraph& hg);
} // namespace
-#endif
+
+} // namespace
diff --git a/fast/json-cpp.hpp b/fast/json-cpp.hpp
new file mode 100644
index 0000000..851a4f4
--- /dev/null
+++ b/fast/json-cpp.hpp
@@ -0,0 +1,1231 @@
+//
+// DO NOT EDIT !!! This file was generated with a script.
+//
+// JSON for C++
+// https://github.com/ascheglov/json-cpp
+// Version 0.1 alpha, rev. 170121e2dc099895064305e38bfb25d90a807ce3
+// Generated 2014-03-27 17:16:47.104492 UTC
+//
+// Belongs to the public domain
+
+#pragma once
+
+//----------------------------------------------------------------------
+// json-cpp.hpp begin
+
+//----------------------------------------------------------------------
+// json-cpp/parse.hpp begin
+
+#include
+#include
+#include
+#include
+#include
+
+//----------------------------------------------------------------------
+// json-cpp/ParserError.hpp begin
+
+#include
+#include
+#include
+#include
+
+#if defined _MSC_VER
+# define JSONCPP_INTERNAL_NOEXCEPT_ throw()
+#else
+# define JSONCPP_INTERNAL_NOEXCEPT_ noexcept
+#endif
+
+namespace jsoncpp
+{
+ class ParserError : public std::exception
+ {
+ public:
+ enum Type
+ {
+ NoError,
+ Eof, UnexpectedCharacter,
+ InvalidEscapeSequence, NoTrailSurrogate,
+ UnexpectedType, UnknownField,
+ NumberIsOutOfRange,
+ };
+
+ ParserError(Type type, std::size_t line, std::size_t column)
+ : m_type{type}, m_line{line}, m_column{column}
+ {
+ assert(type != NoError);
+ }
+
+ virtual const char* what() const JSONCPP_INTERNAL_NOEXCEPT_ override
+ {
+ if (m_what.empty())
+ {
+ m_what = "JSON parser error at line ";
+ m_what += std::to_string(m_line);
+ m_what += ", column ";
+ m_what += std::to_string(m_column);
+ switch (m_type)
+ {
+ case Eof: m_what += ": unexpected end of file"; break;
+ case UnexpectedCharacter: m_what += ": unexpected character"; break;
+ case InvalidEscapeSequence: m_what += ": invalid escape sequence"; break;
+ case NoTrailSurrogate: m_what += ": no UTF-16 trail surrogate"; break;
+ case UnexpectedType: m_what += ": unexpected value type"; break;
+ case UnknownField: m_what += ": unknown field name"; break;
+ case NumberIsOutOfRange: m_what += ": number is out of range"; break;
+ case NoError:
+ default:
+ m_what += ": INTERNAL ERROR"; break;
+ }
+ }
+
+ return m_what.c_str();
+ }
+
+ Type type() const { return m_type; }
+ std::size_t line() const { return m_line; }
+ std::size_t column() const { return m_column; }
+
+ private:
+ Type m_type;
+ std::size_t m_line;
+ std::size_t m_column;
+
+ mutable std::string m_what;
+ };
+}
+
+#undef JSONCPP_INTERNAL_NOEXCEPT_
+
+// json-cpp/ParserError.hpp end
+//----------------------------------------------------------------------
+
+//----------------------------------------------------------------------
+// json-cpp/Stream.hpp begin
+
+namespace jsoncpp
+{
+ template
+ class Stream;
+
+ namespace details
+ {
+ template
+ struct Traits2 {};
+
+ template
+ struct ParserTraits {};
+
+ template
+ struct GeneratorTraits {};
+ }
+
+ template
+ using Parser = Stream>;
+
+ template
+ using Generator = Stream>;
+
+ template
+ inline auto serialize(Stream& stream, T& value) -> decltype(value.serialize(stream), void())
+ {
+ value.serialize(stream);
+ }
+}
+// json-cpp/Stream.hpp end
+//----------------------------------------------------------------------
+
+//----------------------------------------------------------------------
+// json-cpp/value_types.hpp begin
+
+namespace jsoncpp
+{
+ // Helper masks
+ const auto TypeIsNotFundamental = 0x40;
+ const auto TypeIsCollection = 0x80;
+
+ enum class Type
+ {
+ Undefined = 0, // Helper type for debugging variant-like types
+ Null = 0x01,
+ Boolean = 0x02,
+ Number = 0x04,
+ String = 0x08 | TypeIsNotFundamental,
+ Array = 0x10 | TypeIsNotFundamental | TypeIsCollection,
+ Object = 0x20 | TypeIsNotFundamental | TypeIsCollection,
+ };
+}
+// json-cpp/value_types.hpp end
+//----------------------------------------------------------------------
+
+//----------------------------------------------------------------------
+// json-cpp/details/parser_utility.hpp begin
+
+#include
+#include
+#include
+
+namespace jsoncpp { namespace details
+{
+ template
+ struct CStrIterator
+ {
+ using this_type = CStrIterator;
+
+ CStrIterator()
+ {
+ static CharT null{0};
+ m_ptr = &null;
+ }
+
+ CStrIterator(const CharT* ptr) : m_ptr{ptr} {}
+
+ const CharT& operator*() { return *m_ptr; }
+ const CharT* operator->() { return m_ptr; }
+
+ this_type& operator++()
+ {
+ assert(!isEnd());
+ ++m_ptr;
+ return *this;
+ }
+
+ this_type operator++(int) { auto temp = *this; ++*this; return temp; }
+
+ bool operator==(const this_type& rhs) const { return isEnd() == rhs.isEnd(); }
+ bool operator!=(const this_type& rhs) const { return !this->operator==(rhs); }
+
+ private:
+ const CharT* m_ptr;
+
+ bool isEnd() const { return *m_ptr == 0; }
+ };
+
+ class Diagnostics
+ {
+ public:
+ void nextColumn() { ++m_column; }
+ void newLine() { ++m_line; m_column = 0; }
+
+ ParserError makeError(ParserError::Type type) const
+ {
+ return{type, m_line, m_column};
+ }
+
+ private:
+ std::size_t m_column{0};
+ std::size_t m_line{1};
+ };
+
+ template
+ struct Reader
+ {
+ using this_type = Reader;
+
+ Reader(InputIterator first, InputIterator last) : m_iter(first), m_end(last)
+ {
+ checkEnd();
+ }
+
+ char operator*() { return *m_iter; }
+ this_type& operator++()
+ {
+ checkEnd();
+ ++m_iter;
+ m_diag.nextColumn();
+ return *this;
+ }
+
+ void checkEnd()
+ {
+ if (m_iter == m_end)
+ throw m_diag.makeError(ParserError::Eof);
+ }
+
+ char getNextChar()
+ {
+ auto prev = *m_iter;
+ ++*this;
+ return prev;
+ }
+
+ Diagnostics m_diag;
+ InputIterator m_iter, m_end;
+ };
+}}
+
+// json-cpp/details/parser_utility.hpp end
+//----------------------------------------------------------------------
+
+//----------------------------------------------------------------------
+// json-cpp/details/number_parser.hpp begin
+
+#include
+
+namespace jsoncpp { namespace details
+{
+ inline bool isDigit(char c) { return c >= '0' && c <= '9'; }
+
+ template
+ inline unsigned parseIntNumber(Iterator& iter)
+ {
+ auto intPart = 0U; // TBD: 0ULL ?
+
+ do
+ {
+ intPart = intPart * 10 + (*iter - '0');
+
+ ++iter;
+ }
+ while (isDigit(*iter));
+
+ return intPart;
+ }
+
+ template
+ inline double parseRealNumber(Iterator& iter)
+ {
+ double number = 0;
+
+ if (*iter == '0')
+ {
+ ++iter;
+ }
+ else
+ {
+ number = parseIntNumber(iter);
+ }
+
+ // here `ch` is a peeked character, need to call eat()
+
+ if (*iter == '.')
+ {
+ ++iter;
+
+ auto mul = 0.1;
+ while (isDigit(*iter))
+ {
+ number += (*iter - '0') * mul;
+ mul /= 10;
+ ++iter;
+ }
+ }
+
+ // here `ch` is a peeked character, need to call eat()
+
+ if (*iter == 'e' || *iter == 'E')
+ {
+ ++iter;
+
+ auto negate = *iter == '-';
+ if (negate || *iter == '+')
+ ++iter;
+ // FIXME: check `ch` for non-digit
+
+ auto e = parseIntNumber(iter);
+
+ if (negate)
+ number /= std::pow(10, e);
+ else
+ number *= std::pow(10, e);
+ }
+
+ return number;
+ }
+}}
+// json-cpp/details/number_parser.hpp end
+//----------------------------------------------------------------------
+
+//----------------------------------------------------------------------
+// json-cpp/details/string_parser.hpp begin
+
+#include
+
+namespace jsoncpp { namespace details
+{
+ inline char32_t utf16SurrogatePairToUtf32(char32_t lead, char32_t trail)
+ {
+ return 0x10000 | (lead - 0xD800) << 10 | (trail - 0xDC00);
+ }
+
+ inline void utf32ToUtf8(char32_t c, std::string& str)
+ {
+ auto add = [&str](char32_t c){ str.push_back(static_cast(c)); };
+
+ if (c < 0x80)
+ {
+ add(c);
+ }
+ else if (c < 0x800)
+ {
+ add(0xC0 | c >> 6);
+ add(0x80 | (c & 0x3f));
+ }
+ else if (c < 0x10000)
+ {
+ add(0xE0 | c >> 12);
+ add(0x80 | ((c >> 6) & 0x3f));
+ add(0x80 | (c & 0x3f));
+ }
+ else if (c < 0x200000)
+ {
+ add(0xF0 | c >> 18);
+ add(0x80 | ((c >> 12) & 0x3f));
+ add(0x80 | ((c >> 6) & 0x3f));
+ add(0x80 | (c & 0x3f));
+ }
+ else if (c < 0x4000000)
+ {
+ add(0xF8 | c >> 24);
+ add(0x80 | ((c >> 18) & 0x3f));
+ add(0x80 | ((c >> 12) & 0x3f));
+ add(0x80 | ((c >> 6) & 0x3f));
+ add(0x80 | (c & 0x3f));
+ }
+ else
+ {
+ add(0xFC | c >> 30);
+ add(0x80 | ((c >> 24) & 0x3f));
+ add(0x80 | ((c >> 18) & 0x3f));
+ add(0x80 | ((c >> 12) & 0x3f));
+ add(0x80 | ((c >> 6) & 0x3f));
+ add(0x80 | (c & 0x3f));
+ }
+ }
+
+ enum class CharType { Raw, CodePoint, UTF16Pair };
+
+ template
+ inline void addToStr(std::basic_string& str, CharType type, char32_t c1, char32_t c2);
+
+ template<>
+ inline void addToStr(std::basic_string& str, CharType type, char32_t c1, char32_t c2)
+ {
+ if (type == CharType::Raw)
+ {
+ str.push_back(static_cast(c1));
+ }
+ else if (type == CharType::CodePoint)
+ {
+ utf32ToUtf8(c1, str);
+ }
+ else
+ {
+ auto c32 = utf16SurrogatePairToUtf32(c1, c2);
+ utf32ToUtf8(c32, str);
+ }
+ }
+
+ template<>
+ inline void addToStr(std::basic_string& str, CharType type, char32_t c1, char32_t c2)
+ {
+ str.push_back(static_cast(c1));
+ if (type == CharType::UTF16Pair)
+ str.push_back(static_cast(c2));
+ }
+
+ template<>
+ inline void addToStr(std::basic_string& str, CharType type, char32_t c1, char32_t c2)
+ {
+ auto c = (type == CharType::UTF16Pair) ? utf16SurrogatePairToUtf32(c1, c2) : c1;
+ str.push_back(static_cast(c));
+ }
+
+ template
+ inline int parseHexDigit(Iterator& iter, ParserError::Type& err)
+ {
+ auto ch = *iter;
+ ++iter;
+ if (ch >= '0' && ch <= '9') return ch - '0';
+ if (ch >= 'A' && ch <= 'F') return ch - 'A' + 10;
+ if (ch >= 'a' && ch <= 'f') return ch - 'a' + 10;
+
+ err = ParserError::InvalidEscapeSequence;
+ return 0;
+ }
+
+ template
+ inline char32_t parseUTF16CodeUnit(Iterator& iter, ParserError::Type& err)
+ {
+ auto n = parseHexDigit(iter, err) << 12;
+ n |= parseHexDigit(iter, err) << 8;
+ n |= parseHexDigit(iter, err) << 4;
+ n |= parseHexDigit(iter, err);
+ return static_cast(n);
+ }
+
+ template
+ inline ParserError::Type parseStringImpl(Iterator& iter, std::basic_string& str)
+ {
+ str.clear();
+ auto add = [&str](CharType type, char32_t c1, char32_t c2)
+ {
+ addToStr(str, type, c1, c2);
+ };
+
+ for (;;)
+ {
+ auto ch = static_cast(*iter);
+ ++iter;
+ if (ch == '"')
+ return ParserError::NoError;
+
+ if (ch == '\\')
+ {
+ ch = static_cast(*iter);
+ ++iter;
+ switch (ch)
+ {
+ case '\\': case '"': case '/':
+ break;
+
+ case 'b': ch = '\b'; break;
+ case 'f': ch = '\f'; break;
+ case 'n': ch = '\n'; break;
+ case 'r': ch = '\r'; break;
+ case 't': ch = '\t'; break;
+
+ case 'u':
+ {
+ ParserError::Type err{ParserError::NoError};
+ auto codeUnit = parseUTF16CodeUnit(iter, err);
+ if (err != ParserError::NoError)
+ return err;
+
+ if (codeUnit >= 0xD800 && codeUnit < 0xDC00)
+ {
+ if (*iter != '\\') return ParserError::NoTrailSurrogate;
+ ++iter;
+ if (*iter != 'u') return ParserError::NoTrailSurrogate;
+ ++iter;
+
+ auto trailSurrogate = parseUTF16CodeUnit(iter, err);
+ if (err != ParserError::NoError)
+ return err;
+
+ add(CharType::UTF16Pair, codeUnit, trailSurrogate);
+ }
+ else
+ {
+ add(CharType::CodePoint, codeUnit, 0);
+ }
+ }
+ continue;
+
+ default:
+ return ParserError::InvalidEscapeSequence;
+ }
+ }
+
+ add(CharType::Raw, ch, 0);
+ }
+ }
+}}
+
+// json-cpp/details/string_parser.hpp end
+//----------------------------------------------------------------------
+
+namespace jsoncpp
+{
+ template
+ class Stream>>
+ {
+ public:
+ using this_type = Parser>;
+
+ explicit Stream(InputIterator first, InputIterator last)
+ : m_reader{first, last}
+ {
+ nextValue();
+ }
+
+ Type getType() const { return m_type; }
+ bool getBoolean() const { return m_boolean; }
+ double getNumber() const { return m_number; }
+ const std::string& getFieldName() const { return m_fieldName; }
+
+ void checkType(Type type) const
+ {
+ if (getType() != type)
+ throw makeError(ParserError::UnexpectedType);
+ }
+
+ bool isListEnd(char terminator)
+ {
+ eatWhitespace();
+ if (*m_reader != terminator)
+ return false;
+
+ ++m_reader;
+ return true;
+ }
+
+ void eatListSeparator()
+ {
+ eatWhitespace();
+ check(',');
+ eatWhitespace();
+ }
+
+ void nextNameValuePair()
+ {
+ eatWhitespace();
+ check('"');
+ parseString(m_fieldName);
+ eatWhitespace();
+ check(':');
+ nextValue();
+ }
+
+ void nextValue()
+ {
+ eatWhitespace();
+ m_type = nextValueImpl();
+ }
+
+ template
+ void parseString(std::basic_string& str)
+ {
+ auto err = parseStringImpl(m_reader, str);
+ if (err != ParserError::NoError)
+ throw m_reader.m_diag.makeError(err);
+ }
+
+ ParserError makeError(ParserError::Type type) const
+ {
+ return m_reader.m_diag.makeError(type);
+ }
+
+ private:
+ Type nextValueImpl()
+ {
+ switch (*m_reader)
+ {
+ case '{': ++m_reader; return Type::Object;
+ case '[': ++m_reader; return Type::Array;
+ case 't': ++m_reader; checkLiteral("true"); m_boolean = true; return Type::Boolean;
+ case 'f': ++m_reader; checkLiteral("false"); m_boolean = false; return Type::Boolean;
+ case 'n': ++m_reader; checkLiteral("null"); return Type::Null;
+ case '"': ++m_reader; return Type::String;
+
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ m_number = parseRealNumber(m_reader);
+ return Type::Number;
+
+ case '-':
+ ++m_reader;
+ m_number = -parseRealNumber(m_reader);
+ return Type::Number;
+ }
+
+ throw unexpectedCharacter();
+ }
+
+ ParserError unexpectedCharacter() const
+ {
+ return makeError(ParserError::UnexpectedCharacter);
+ }
+
+ void check(char expectedChar)
+ {
+ if (*m_reader != expectedChar)
+ throw unexpectedCharacter();
+
+ ++m_reader;
+ }
+
+ template
+ void checkLiteral(const char(&literal)[N])
+ {
+ static_assert(N > 2, "");
+ for (auto i = 1; i != N - 1; ++i, ++m_reader)
+ if (*m_reader != literal[i])
+ throw unexpectedCharacter();
+ }
+
+ void eatWhitespace()
+ {
+ for (;; ++m_reader)
+ {
+ switch (*m_reader)
+ {
+ case '/':
+ ++m_reader;
+ check('/');
+ while (*m_reader != '\n')
+ ++m_reader;
+
+ // no break here
+ case '\n':
+ m_reader.m_diag.newLine();
+ break;
+
+ case ' ': case '\t': case '\r':
+ break;
+
+ default:
+ return;
+ }
+ }
+ }
+
+ details::Reader m_reader;
+
+ Type m_type;
+ double m_number;
+ bool m_boolean;
+ std::string m_fieldName;
+ };
+
+ template
+ inline void serialize(Parser& parser, bool& value)
+ {
+ parser.checkType(Type::Boolean);
+ value = parser.getBoolean();
+ }
+
+ template
+ inline typename std::enable_if::value>::type
+ serialize(Parser& parser, T& value)
+ {
+ parser.checkType(Type::Number);
+ auto number = parser.getNumber();
+ value = static_cast(number);
+ if (value != number)
+ throw parser.makeError(ParserError::NumberIsOutOfRange);
+ }
+
+ template
+ inline void serialize(Parser& parser, std::basic_string& value)
+ {
+ parser.checkType(Type::String);
+ parser.parseString(value);
+ }
+
+ namespace details
+ {
+ template
+ inline void parseList(Parser& parser, Type type, char terminator, Callback&& callback)
+ {
+ parser.checkType(type);
+
+ while (!parser.isListEnd(terminator))
+ {
+ callback();
+
+ if (parser.isListEnd(terminator))
+ return;
+
+ parser.eatListSeparator();
+ }
+ }
+ }
+
+ template
+ inline void parseObject(Parser& parser, Callback&& callback)
+ {
+ details::parseList(parser, Type::Object, '}', [&]
+ {
+ parser.nextNameValuePair();
+ callback(parser.getFieldName());
+ });
+ }
+
+ template
+ void parseArray(Parser& parser, Callback&& callback)
+ {
+ details::parseList(parser, Type::Array, ']', [&]
+ {
+ parser.nextValue();
+ callback();
+ });
+ }
+
+ template
+ inline void parse(T& object, InputIterator first, InputIterator last)
+ {
+ Parser> stream{first, last};
+ serialize(stream, object);
+ }
+
+ template
+ inline void parse(T& object, const CharT* str)
+ {
+ details::CStrIterator first{str}, last;
+ parse(object, first, last);
+ }
+
+ template
+ inline void parse(T& object, std::basic_string& str)
+ {
+ parse(object, std::begin(str), std::end(str));
+ }
+
+ template
+ inline void parse(T& object, std::basic_istream& stream)
+ {
+ std::istreambuf_iterator first{stream}, last;
+ parse(object, first, last);
+ }
+}
+
+// json-cpp/parse.hpp end
+//----------------------------------------------------------------------
+
+//----------------------------------------------------------------------
+// json-cpp/std_types.hpp begin
+
+#include
+#include
+#include
+#include