summaryrefslogtreecommitdiff
path: root/fast
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2014-07-19 08:30:43 +0200
committerPatrick Simianer <p@simianer.de>2014-07-19 08:30:43 +0200
commitf219bab21c07d02e7e54d557e23387bd93c9ce5f (patch)
tree14a6e2b647a3b1ab11391c154fbcf7c63841f8db /fast
parent6208c48407c359819945730006edf4c402b7ff77 (diff)
hg io
Diffstat (limited to 'fast')
-rw-r--r--fast/.gitignore6
-rw-r--r--fast/Makefile11
-rw-r--r--fast/README.md5
-rw-r--r--fast/dummyvector.h27
-rw-r--r--fast/grammar.hh5
-rw-r--r--fast/hypergraph.cc216
-rw-r--r--fast/hypergraph.hh110
-rw-r--r--fast/json-cpp.hpp1231
-rw-r--r--fast/main.cc137
-rw-r--r--fast/make_paks.cc112
-rw-r--r--fast/read_pak.cc26
-rw-r--r--fast/semiring.hh29
12 files changed, 1683 insertions, 232 deletions
diff --git a/fast/.gitignore b/fast/.gitignore
index 80d28d5..c37a566 100644
--- a/fast/.gitignore
+++ b/fast/.gitignore
@@ -1,3 +1,5 @@
fast_weaver
-hypergraph.o
-msgpack-c/
+*.o
+data
+make_paks
+read_pak
diff --git a/fast/Makefile b/fast/Makefile
index f09ab21..1d88446 100644
--- a/fast/Makefile
+++ b/fast/Makefile
@@ -1,12 +1,19 @@
all: hypergraph.o main.cc
- clang -std=c++11 -lstdc++ -lm hypergraph.o -I./msgpack-c/include/ main.cc -o fast_weaver
+ clang -std=c++11 -lstdc++ -lm -lmsgpack hypergraph.o main.cc -o fast_weaver
hypergraph.o: hypergraph.cc hypergraph.hh grammar.o semiring.hh
- clang -std=c++11 -I./msgpack-c/include/ -c hypergraph.cc
+ clang -std=c++11 -lmsgpack -c hypergraph.cc
grammar.o: grammar.cc grammar.hh
clang -std=c++11 -c grammar.cc
+make_paks: make_paks.cc
+ g++ -std=c++11 -lmsgpack make_paks.cc -o make_paks
+
+read_pak: read_pak.cc
+ g++ -std=c++11 -lmsgpack read_pak.cc -o read_pak
+
+
clean:
rm -f fast_weaver hypergraph.o grammar.o
diff --git a/fast/README.md b/fast/README.md
index 3087bab..5bcc962 100644
--- a/fast/README.md
+++ b/fast/README.md
@@ -7,3 +7,8 @@ TODO
* hg: json input (jsoncpp?)
* language model: kenlm
+depends on msgpack [1]
+http://jscheiny.github.io/Streams/
+
+[1] http://msgpack.org
+
diff --git a/fast/dummyvector.h b/fast/dummyvector.h
new file mode 100644
index 0000000..09cf3f7
--- /dev/null
+++ b/fast/dummyvector.h
@@ -0,0 +1,27 @@
+#pragma once
+#include <msgpack.hpp>
+
+
+struct DummyVector {
+ double CountEF;
+ double EgivenFCoherent;
+ double Glue;
+ double IsSingletonF;
+ double IsSingletonFE;
+ double LanguageModel;
+ double LanguageModel_OOV;
+ double MaxLexFgivenE;
+ double MaxLexEgivenF;
+ double PassThrough;
+ double PassThrough_1;
+ double PassThrough_2;
+ double PassThrough_3;
+ double PassThrough_4;
+ double PassThrough_5;
+ double PassThrough_6;
+ double SampleCountF;
+ double WordPenalty;
+
+ MSGPACK_DEFINE(CountEF, EgivenFCoherent, Glue, IsSingletonF, IsSingletonFE, LanguageModel, LanguageModel_OOV, MaxLexEgivenF, MaxLexFgivenE, PassThrough, PassThrough_1, PassThrough_2, PassThrough_3, PassThrough_4, PassThrough_5, PassThrough_6, SampleCountF, WordPenalty);
+};
+
diff --git a/fast/grammar.hh b/fast/grammar.hh
index 5625b85..c4ef3ad 100644
--- a/fast/grammar.hh
+++ b/fast/grammar.hh
@@ -1,5 +1,4 @@
-#ifndef GRAMMAR_HH
-#define GRAMMAR_HH
+#pragma once
#include <string>
#include <sstream>
@@ -29,5 +28,3 @@ class Rule {
} // namespace
-#endif
-
diff --git a/fast/hypergraph.cc b/fast/hypergraph.cc
index 44e060e..4e6601f 100644
--- a/fast/hypergraph.cc
+++ b/fast/hypergraph.cc
@@ -41,7 +41,7 @@ operator<<(std::ostream& os, const Edge& e)
{
ostringstream _;
for (auto it = e.tails.begin(); it != e.tails.end(); ++it) {
- _ << (*it)->id; if (*it != e.tails.back()) _ << ",";
+ _ << (**it).id; if (*it != e.tails.back()) _ << ",";
}
os << \
"Edge<head=" << e.head->id << \
@@ -55,19 +55,26 @@ operator<<(std::ostream& os, const Edge& e)
}
/*
- * Hypergraph
- * methods
+ * functions
*
*/
void
-Hypergraph::reset()
+reset(list<Node*> nodes, vector<Edge*> edges)
{
+ for (auto it = nodes.begin(); it != nodes.end(); ++it)
+ (**it).mark = 0;
+ for (auto it = edges.begin(); it != edges.end(); ++it)
+ (**it).mark = 0;
+}
+
+template<class Semiring> void
+init(list<Node*>& nodes, list<Node*>::iterator root, Semiring& semiring)
+{
+ for (auto it = nodes.begin(); it != nodes.end(); ++it)
+ (**it).score = semiring.null;
+ (**root).score = semiring.one;
}
-/*
- * functions
- *
- */
void
topological_sort(list<Node*>& nodes, list<Node*>::iterator root)
{
@@ -94,37 +101,186 @@ topological_sort(list<Node*>& nodes, list<Node*>::iterator root)
}
}
-/*void
-init(vector<Node*>& nodes, ViterbiSemiring<double>& semiring, Node* root)
-{
- for (auto it = nodes.begin(); it != nodes.end(); ++it)
- (*it)->score = semiring.null;
- root->score = semiring.one;
-}
-
void
-viterbi(vector<Node*>& nodes, map<unsigned int, Hg::Node*> nodes_by_id, Node* root)
+viterbi(Hypergraph& hg)
{
- vector<Node*> sorted = topological_sort(nodes);
- ViterbiSemiring<double> semiring;
-
- init(sorted, semiring, root);
+ list<Node*>::iterator root = hg.nodes.begin(); // FIXME?
+ Hg::topological_sort(hg.nodes, root);
+ Semiring::Viterbi<double> semiring;
+ Hg::init(hg.nodes, root, semiring);
- for (auto n_it = sorted.begin(); n_it != sorted.end(); ++n_it) {
- for (auto e_it = (*n_it)->incoming.begin(); e_it != (*n_it)->incoming.end(); ++e_it) {
- cout << (*e_it)->s() << endl;
+ for (auto n = hg.nodes.begin(); n != hg.nodes.end(); ++n) {
+ for (auto e = (**n).incoming.begin(); e != (**n).incoming.end(); ++e) {
+ cout << **e << endl;
double s = semiring.one;
- for (auto m_it = (*e_it)->tails.begin(); m_it != (*e_it)->tails.end(); m_it++) {
- s = semiring.multiply(s, (*m_it)->score);
+ for (auto m = (**e).tails.begin(); m != (**e).tails.end(); ++m) {
+ s = semiring.multiply(s, (**m).score);
}
- (*n_it)->score = semiring.add((*n_it)->score, semiring.multiply(s, (*e_it)->score));
+ (**n).score = semiring.add((**n).score, semiring.multiply(s, (**e).score));
}
}
- for (auto it = sorted.begin(); it != sorted.end(); ++it) {
- cout << (*it)->id << " " << (*it)->score << endl;
+ for (auto it = hg.nodes.begin(); it != hg.nodes.end(); ++it) {
+ cout << (**it).id << " " << (**it).score << endl;
}
-}*/
+}
+
+namespace io {
+
+void
+read(Hypergraph& hg, string fn)
+{
+ ifstream ifs(fn);
+ size_t i = 0, nn, ne;
+ msgpack::unpacker pac;
+ while(true) {
+ pac.reserve_buffer(32*1024);
+ size_t bytes = ifs.readsome(pac.buffer(), pac.buffer_capacity());
+ pac.buffer_consumed(bytes);
+ msgpack::unpacked result;
+ while(pac.next(&result)) {
+ msgpack::object o = result.get();
+ if (i == 0) {
+ o.convert(&nn);
+ nn += 1;
+ } else if (i == 1) {
+ o.convert(&ne);
+ ne += 1;
+ } else if (i > 1 && i <= nn) {
+ //cout << "N " << o << endl;
+ Node* n = new Node;
+ o.convert(n);
+ } else if (i > nn && i <= nn+ne+1) {
+ //cout << "E " << o << endl;
+ Edge* e = new Edge;
+ o.convert(e);
+ }
+ i++;
+ }
+ if (!bytes) break;
+ }
+}
+
+void
+write(Hypergraph& hg, string fn)
+{
+ /*FILE* file = fopen(argv[2], "wb");
+ msgpack::fbuffer fbuf(file);
+ msgpack::pack(fbuf, hg.nodes.size());
+ msgpack::pack(fbuf, hg.edges.size());
+ msgpack::pack(fbuf, hg.weights);
+ for (auto it = hg.nodes.begin(); it != hg.nodes.end(); it++)
+ msgpack::pack(fbuf, *it);
+ for (auto it = hg.edges.begin(); it != hg.edges.end(); it++)
+ msgpack::pack(fbuf, *it);
+
+ fclose(file);*/
+}
+
+void
+manual(Hypergraph& hg)
+{
+ // nodes
+ Node* a = new Node; a->id = 0; a->symbol = "root"; a->left = false; a->right = false; a->mark = 0;
+ Node* b = new Node; b->id = 1; b->symbol = "NP"; b->left = 0; b->right = 1; b->mark = 0;
+ Node* c = new Node; c->id = 2; c->symbol = "V"; c->left = 1; c->right = 2; c->mark = 0;
+ Node* d = new Node; d->id = 3; d->symbol = "JJ"; d->left = 3; d->right = 4; d->mark = 0;
+ Node* e = new Node; e->id = 4; e->symbol = "NN"; e->left = 3; e->right = 5; e->mark = 0;
+ Node* f = new Node; f->id = 5; f->symbol = "NP"; f->left = 2; f->right = 5; f->mark = 0;
+ Node* g = new Node; g->id = 6; g->symbol = "NP"; g->left = 1; g->right = 5; g->mark = 0;
+ Node* h = new Node; h->id = 7; h->symbol = "S"; h->left = 0; h->right = 6; h->mark = 0;
+
+ hg.add_node(a);
+ hg.add_node(h);
+ hg.add_node(g);
+ hg.add_node(c);
+ hg.add_node(d);
+ hg.add_node(f);
+ hg.add_node(b);
+ hg.add_node(e);
+
+ // edges
+ Edge* q = new Edge; q->head = hg.nodes_by_id[1]; q->tails.push_back(hg.nodes_by_id[0]); q->score = 0.367879441171;
+ hg.nodes_by_id[1]->incoming.push_back(q);
+ hg.nodes_by_id[0]->outgoing.push_back(q);
+ q->arity = 1;
+ q->mark = 0;
+ hg.edges.push_back(q);
+
+ Edge* p = new Edge; p->head = hg.nodes_by_id[2]; p->tails.push_back(hg.nodes_by_id[0]); p->score = 0.606530659713;
+ hg.nodes_by_id[2]->incoming.push_back(p);
+ hg.nodes_by_id[0]->outgoing.push_back(p);
+ p->arity = 1;
+ p->mark = 0;
+ hg.edges.push_back(p);
+
+ Edge* r = new Edge; r->head = hg.nodes_by_id[3]; r->tails.push_back(hg.nodes_by_id[0]); r->score = 1.0;
+ hg.nodes_by_id[3]->incoming.push_back(r);
+ hg.nodes_by_id[0]->outgoing.push_back(r);
+ r->arity = 1;
+ r->mark = 0;
+ hg.edges.push_back(r);
+
+ Edge* s = new Edge; s->head = hg.nodes_by_id[3]; s->tails.push_back(hg.nodes_by_id[0]); s->score = 1.0;
+ hg.nodes_by_id[3]->incoming.push_back(s);
+ hg.nodes_by_id[0]->outgoing.push_back(s);
+ s->arity = 1;
+ s->mark = 0;
+ hg.edges.push_back(s);
+
+ Edge* t = new Edge; t->head = hg.nodes_by_id[4]; t->tails.push_back(hg.nodes_by_id[0]); t->score = 1.0;
+ hg.nodes_by_id[4]->incoming.push_back(t);
+ hg.nodes_by_id[0]->outgoing.push_back(t);
+ t->arity = 1;
+ t->mark = 0;
+ hg.edges.push_back(t);
+
+ Edge* u = new Edge; u->head = hg.nodes_by_id[4]; u->tails.push_back(hg.nodes_by_id[0]); u->score = 1.0;
+ hg.nodes_by_id[4]->incoming.push_back(u);
+ hg.nodes_by_id[0]->outgoing.push_back(u);
+ u->arity = 1;
+ u->mark = 0;
+ hg.edges.push_back(u);
+
+ Edge* v = new Edge; v->head = hg.nodes_by_id[4]; v->tails.push_back(hg.nodes_by_id[3]); v->score = 1.0;
+ hg.nodes_by_id[4]->incoming.push_back(v);
+ hg.nodes_by_id[3]->outgoing.push_back(v);
+ v->arity = 1;
+ v->mark = 0;
+ hg.edges.push_back(v);
+
+ Edge* w = new Edge; w->head = hg.nodes_by_id[4]; w->tails.push_back(hg.nodes_by_id[3]); w->score = 2.71828182846;
+ hg.nodes_by_id[4]->incoming.push_back(w);
+ hg.nodes_by_id[3]->outgoing.push_back(w);
+ w->arity = 1;
+ w->mark = 0;
+ hg.edges.push_back(w);
+
+ Edge* x = new Edge; x->head = hg.nodes_by_id[5]; x->tails.push_back(hg.nodes_by_id[4]); x->score = 1.0;
+ hg.nodes_by_id[5]->incoming.push_back(x);
+ hg.nodes_by_id[4]->outgoing.push_back(x);
+ x->arity = 1;
+ x->mark = 0;
+ hg.edges.push_back(x);
+
+ Edge* y = new Edge; y->head = hg.nodes_by_id[6]; y->tails.push_back(hg.nodes_by_id[2]); y->tails.push_back(hg.nodes_by_id[5]); y->score = 1.0;
+ hg.nodes_by_id[6]->incoming.push_back(y);
+ hg.nodes_by_id[2]->outgoing.push_back(y);
+ hg.nodes_by_id[5]->outgoing.push_back(y);
+ y->arity = 2;
+ y->mark = 0;
+ hg.edges.push_back(y);
+
+ Edge* z = new Edge; z->head = hg.nodes_by_id[7]; z->tails.push_back(hg.nodes_by_id[1]); z->tails.push_back(hg.nodes_by_id[6]); z->score = 1.0;
+ hg.nodes_by_id[7]->incoming.push_back(z);
+ hg.nodes_by_id[1]->outgoing.push_back(z);
+ hg.nodes_by_id[6]->outgoing.push_back(z);
+ z->arity = 2;
+ z->mark = 0;
+ hg.edges.push_back(z);
+}
+
+} // namespace
} // namespace
diff --git a/fast/hypergraph.hh b/fast/hypergraph.hh
index 68cca19..2e30911 100644
--- a/fast/hypergraph.hh
+++ b/fast/hypergraph.hh
@@ -1,5 +1,4 @@
-#ifndef HYPERGRAPH_HH
-#define HYPERGRAPH_HH
+#pragma once
#include "grammar.hh"
#include "semiring.hh"
@@ -12,8 +11,10 @@
#include <functional>
#include <algorithm>
#include <iterator>
+#include <fstream>
-#include "msgpack-c/include/msgpack.hpp"
+#include "dummyvector.h"
+#include <msgpack.hpp>
using namespace std;
@@ -23,61 +24,78 @@ typedef double weight_t;
namespace Hg {
-class Node;
+struct Node;
-class Edge {
- public:
- Node* head;
- vector<Node*> tails;
- score_t score;
- //Grammar::Rule rule; FIXME
- vector<weight_t> f;
- unsigned int arity;
- unsigned int mark;
+struct Edge {
+ Node* head;
+ vector<Node*> tails;
+ score_t score;
+ string rule; //FIXME
+ DummyVector f; //FIXME
+ unsigned int arity;
+ unsigned int mark;
- bool is_marked();
- friend std::ostream& operator<<(std::ostream& os, const Edge& s);
+ bool is_marked();
+ friend std::ostream& operator<<(std::ostream& os, const Edge& s);
- size_t head_id_;
- vector<size_t> tails_ids_; // node ids
- MSGPACK_DEFINE(head_id_, tails_ids_, score, f, arity);
+ size_t head_id_;
+ vector<size_t> tails_ids_; // node ids
+
+ MSGPACK_DEFINE(head_id_, tails_ids_, score, f, arity);
};
-class Node {
- public:
- size_t id;
- string symbol;
- unsigned short left;
- unsigned short right;
- score_t score;
- vector<Edge*> incoming;
- vector<Edge*> outgoing;
- unsigned int mark;
-
- bool is_marked();
- friend std::ostream& operator<<(std::ostream& os, const Node& n);
-
- vector<size_t> incoming_ids_; // edge ids
- vector<size_t> outgoing_ids_; // edge ids
- MSGPACK_DEFINE(id, symbol, left, right, score, incoming_ids_, outgoing_ids_);
+struct Node {
+ size_t id;
+ string symbol;
+ unsigned short left;
+ unsigned short right;
+ score_t score;
+ vector<Edge*> incoming;
+ vector<Edge*> outgoing;
+ unsigned int mark;
+
+ bool is_marked();
+ friend std::ostream& operator<<(std::ostream& os, const Node& n);
+
+ vector<size_t> incoming_ids_; // edge ids
+ vector<size_t> outgoing_ids_; // edge ids
+ MSGPACK_DEFINE(id, symbol, left, right, score, incoming_ids_, outgoing_ids_);
};
-class Hypergraph {
- public:
- list<Node*> nodes;
- vector<Edge*> edges;
- unordered_map<size_t, Node*> nodes_by_id;
- unsigned int arity;
+struct Hypergraph {
+ list<Node*> nodes;
+ vector<Edge*> edges;
+ unordered_map<size_t, Node*> nodes_by_id;
+ unsigned int arity;
- void reset();
- void add_node(Node* n) { nodes.push_back(n); nodes_by_id[n->id] = n; }
+ void add_node(Node* n) { nodes.push_back(n); nodes_by_id[n->id] = n; }
};
-void topological_sort(list<Node*>& nodes, list<Node*>::iterator root);
-void viterbi(Hypergraph& hg);
+void
+reset();
+
+template<typename Semiring> void
+init(list<Node*>& nodes, list<Node*>::iterator root, Semiring& semiring);
+
+void
+topological_sort(list<Node*>& nodes, list<Node*>::iterator root);
+
+void
+viterbi(Hypergraph& hg);
+
+namespace io {
+void
+read(Hypergraph& hg, string fn);
+
+void
+write(Hypergraph& hg, string fn);
+
+void
+manual(Hypergraph& hg);
} // namespace
-#endif
+
+} // namespace
diff --git a/fast/json-cpp.hpp b/fast/json-cpp.hpp
new file mode 100644
index 0000000..851a4f4
--- /dev/null
+++ b/fast/json-cpp.hpp
@@ -0,0 +1,1231 @@
+//
+// DO NOT EDIT !!! This file was generated with a script.
+//
+// JSON for C++
+// https://github.com/ascheglov/json-cpp
+// Version 0.1 alpha, rev. 170121e2dc099895064305e38bfb25d90a807ce3
+// Generated 2014-03-27 17:16:47.104492 UTC
+//
+// Belongs to the public domain
+
+#pragma once
+
+//----------------------------------------------------------------------
+// json-cpp.hpp begin
+
+//----------------------------------------------------------------------
+// json-cpp/parse.hpp begin
+
+#include <memory>
+#include <istream>
+#include <iterator>
+#include <string>
+#include <type_traits>
+
+//----------------------------------------------------------------------
+// json-cpp/ParserError.hpp begin
+
+#include <cassert>
+#include <cstddef>
+#include <exception>
+#include <string>
+
+#if defined _MSC_VER
+# define JSONCPP_INTERNAL_NOEXCEPT_ throw()
+#else
+# define JSONCPP_INTERNAL_NOEXCEPT_ noexcept
+#endif
+
+namespace jsoncpp
+{
+ class ParserError : public std::exception
+ {
+ public:
+ enum Type
+ {
+ NoError,
+ Eof, UnexpectedCharacter,
+ InvalidEscapeSequence, NoTrailSurrogate,
+ UnexpectedType, UnknownField,
+ NumberIsOutOfRange,
+ };
+
+ ParserError(Type type, std::size_t line, std::size_t column)
+ : m_type{type}, m_line{line}, m_column{column}
+ {
+ assert(type != NoError);
+ }
+
+ virtual const char* what() const JSONCPP_INTERNAL_NOEXCEPT_ override
+ {
+ if (m_what.empty())
+ {
+ m_what = "JSON parser error at line ";
+ m_what += std::to_string(m_line);
+ m_what += ", column ";
+ m_what += std::to_string(m_column);
+ switch (m_type)
+ {
+ case Eof: m_what += ": unexpected end of file"; break;
+ case UnexpectedCharacter: m_what += ": unexpected character"; break;
+ case InvalidEscapeSequence: m_what += ": invalid escape sequence"; break;
+ case NoTrailSurrogate: m_what += ": no UTF-16 trail surrogate"; break;
+ case UnexpectedType: m_what += ": unexpected value type"; break;
+ case UnknownField: m_what += ": unknown field name"; break;
+ case NumberIsOutOfRange: m_what += ": number is out of range"; break;
+ case NoError:
+ default:
+ m_what += ": INTERNAL ERROR"; break;
+ }
+ }
+
+ return m_what.c_str();
+ }
+
+ Type type() const { return m_type; }
+ std::size_t line() const { return m_line; }
+ std::size_t column() const { return m_column; }
+
+ private:
+ Type m_type;
+ std::size_t m_line;
+ std::size_t m_column;
+
+ mutable std::string m_what;
+ };
+}
+
+#undef JSONCPP_INTERNAL_NOEXCEPT_
+
+// json-cpp/ParserError.hpp end
+//----------------------------------------------------------------------
+
+//----------------------------------------------------------------------
+// json-cpp/Stream.hpp begin
+
+namespace jsoncpp
+{
+ template<class Traits>
+ class Stream;
+
+ namespace details
+ {
+ template<typename CharT, class X>
+ struct Traits2 {};
+
+ template<class Traits>
+ struct ParserTraits {};
+
+ template<class Traits>
+ struct GeneratorTraits {};
+ }
+
+ template<class X>
+ using Parser = Stream<details::ParserTraits<X>>;
+
+ template<class X>
+ using Generator = Stream<details::GeneratorTraits<X>>;
+
+ template<typename X, typename T>
+ inline auto serialize(Stream<X>& stream, T& value) -> decltype(value.serialize(stream), void())
+ {
+ value.serialize(stream);
+ }
+}
+// json-cpp/Stream.hpp end
+//----------------------------------------------------------------------
+
+//----------------------------------------------------------------------
+// json-cpp/value_types.hpp begin
+
+namespace jsoncpp
+{
+ // Helper masks
+ const auto TypeIsNotFundamental = 0x40;
+ const auto TypeIsCollection = 0x80;
+
+ enum class Type
+ {
+ Undefined = 0, // Helper type for debugging variant-like types
+ Null = 0x01,
+ Boolean = 0x02,
+ Number = 0x04,
+ String = 0x08 | TypeIsNotFundamental,
+ Array = 0x10 | TypeIsNotFundamental | TypeIsCollection,
+ Object = 0x20 | TypeIsNotFundamental | TypeIsCollection,
+ };
+}
+// json-cpp/value_types.hpp end
+//----------------------------------------------------------------------
+
+//----------------------------------------------------------------------
+// json-cpp/details/parser_utility.hpp begin
+
+#include <cassert>
+#include <cstddef>
+#include <utility>
+
+namespace jsoncpp { namespace details
+{
+ template<typename CharT>
+ struct CStrIterator
+ {
+ using this_type = CStrIterator<CharT>;
+
+ CStrIterator()
+ {
+ static CharT null{0};
+ m_ptr = &null;
+ }
+
+ CStrIterator(const CharT* ptr) : m_ptr{ptr} {}
+
+ const CharT& operator*() { return *m_ptr; }
+ const CharT* operator->() { return m_ptr; }
+
+ this_type& operator++()
+ {
+ assert(!isEnd());
+ ++m_ptr;
+ return *this;
+ }
+
+ this_type operator++(int) { auto temp = *this; ++*this; return temp; }
+
+ bool operator==(const this_type& rhs) const { return isEnd() == rhs.isEnd(); }
+ bool operator!=(const this_type& rhs) const { return !this->operator==(rhs); }
+
+ private:
+ const CharT* m_ptr;
+
+ bool isEnd() const { return *m_ptr == 0; }
+ };
+
+ class Diagnostics
+ {
+ public:
+ void nextColumn() { ++m_column; }
+ void newLine() { ++m_line; m_column = 0; }
+
+ ParserError makeError(ParserError::Type type) const
+ {
+ return{type, m_line, m_column};
+ }
+
+ private:
+ std::size_t m_column{0};
+ std::size_t m_line{1};
+ };
+
+ template<typename InputIterator>
+ struct Reader
+ {
+ using this_type = Reader<InputIterator>;
+
+ Reader(InputIterator first, InputIterator last) : m_iter(first), m_end(last)
+ {
+ checkEnd();
+ }
+
+ char operator*() { return *m_iter; }
+ this_type& operator++()
+ {
+ checkEnd();
+ ++m_iter;
+ m_diag.nextColumn();
+ return *this;
+ }
+
+ void checkEnd()
+ {
+ if (m_iter == m_end)
+ throw m_diag.makeError(ParserError::Eof);
+ }
+
+ char getNextChar()
+ {
+ auto prev = *m_iter;
+ ++*this;
+ return prev;
+ }
+
+ Diagnostics m_diag;
+ InputIterator m_iter, m_end;
+ };
+}}
+
+// json-cpp/details/parser_utility.hpp end
+//----------------------------------------------------------------------
+
+//----------------------------------------------------------------------
+// json-cpp/details/number_parser.hpp begin
+
+#include <cmath>
+
+namespace jsoncpp { namespace details
+{
+ inline bool isDigit(char c) { return c >= '0' && c <= '9'; }
+
+ template<typename Iterator>
+ inline unsigned parseIntNumber(Iterator& iter)
+ {
+ auto intPart = 0U; // TBD: 0ULL ?
+
+ do
+ {
+ intPart = intPart * 10 + (*iter - '0');
+
+ ++iter;
+ }
+ while (isDigit(*iter));
+
+ return intPart;
+ }
+
+ template<typename Iterator>
+ inline double parseRealNumber(Iterator& iter)
+ {
+ double number = 0;
+
+ if (*iter == '0')
+ {
+ ++iter;
+ }
+ else
+ {
+ number = parseIntNumber(iter);
+ }
+
+ // here `ch` is a peeked character, need to call eat()
+
+ if (*iter == '.')
+ {
+ ++iter;
+
+ auto mul = 0.1;
+ while (isDigit(*iter))
+ {
+ number += (*iter - '0') * mul;
+ mul /= 10;
+ ++iter;
+ }
+ }
+
+ // here `ch` is a peeked character, need to call eat()
+
+ if (*iter == 'e' || *iter == 'E')
+ {
+ ++iter;
+
+ auto negate = *iter == '-';
+ if (negate || *iter == '+')
+ ++iter;
+ // FIXME: check `ch` for non-digit
+
+ auto e = parseIntNumber(iter);
+
+ if (negate)
+ number /= std::pow(10, e);
+ else
+ number *= std::pow(10, e);
+ }
+
+ return number;
+ }
+}}
+// json-cpp/details/number_parser.hpp end
+//----------------------------------------------------------------------
+
+//----------------------------------------------------------------------
+// json-cpp/details/string_parser.hpp begin
+
+#include <string>
+
+namespace jsoncpp { namespace details
+{
+ inline char32_t utf16SurrogatePairToUtf32(char32_t lead, char32_t trail)
+ {
+ return 0x10000 | (lead - 0xD800) << 10 | (trail - 0xDC00);
+ }
+
+ inline void utf32ToUtf8(char32_t c, std::string& str)
+ {
+ auto add = [&str](char32_t c){ str.push_back(static_cast<char>(c)); };
+
+ if (c < 0x80)
+ {
+ add(c);
+ }
+ else if (c < 0x800)
+ {
+ add(0xC0 | c >> 6);
+ add(0x80 | (c & 0x3f));
+ }
+ else if (c < 0x10000)
+ {
+ add(0xE0 | c >> 12);
+ add(0x80 | ((c >> 6) & 0x3f));
+ add(0x80 | (c & 0x3f));
+ }
+ else if (c < 0x200000)
+ {
+ add(0xF0 | c >> 18);
+ add(0x80 | ((c >> 12) & 0x3f));
+ add(0x80 | ((c >> 6) & 0x3f));
+ add(0x80 | (c & 0x3f));
+ }
+ else if (c < 0x4000000)
+ {
+ add(0xF8 | c >> 24);
+ add(0x80 | ((c >> 18) & 0x3f));
+ add(0x80 | ((c >> 12) & 0x3f));
+ add(0x80 | ((c >> 6) & 0x3f));
+ add(0x80 | (c & 0x3f));
+ }
+ else
+ {
+ add(0xFC | c >> 30);
+ add(0x80 | ((c >> 24) & 0x3f));
+ add(0x80 | ((c >> 18) & 0x3f));
+ add(0x80 | ((c >> 12) & 0x3f));
+ add(0x80 | ((c >> 6) & 0x3f));
+ add(0x80 | (c & 0x3f));
+ }
+ }
+
+ enum class CharType { Raw, CodePoint, UTF16Pair };
+
+ template<typename CharT, std::size_t CharSize>
+ inline void addToStr(std::basic_string<CharT>& str, CharType type, char32_t c1, char32_t c2);
+
+ template<>
+ inline void addToStr<char, 1>(std::basic_string<char>& str, CharType type, char32_t c1, char32_t c2)
+ {
+ if (type == CharType::Raw)
+ {
+ str.push_back(static_cast<char>(c1));
+ }
+ else if (type == CharType::CodePoint)
+ {
+ utf32ToUtf8(c1, str);
+ }
+ else
+ {
+ auto c32 = utf16SurrogatePairToUtf32(c1, c2);
+ utf32ToUtf8(c32, str);
+ }
+ }
+
+ template<>
+ inline void addToStr<wchar_t, 2>(std::basic_string<wchar_t>& str, CharType type, char32_t c1, char32_t c2)
+ {
+ str.push_back(static_cast<wchar_t>(c1));
+ if (type == CharType::UTF16Pair)
+ str.push_back(static_cast<wchar_t>(c2));
+ }
+
+ template<>
+ inline void addToStr<wchar_t, 4>(std::basic_string<wchar_t>& str, CharType type, char32_t c1, char32_t c2)
+ {
+ auto c = (type == CharType::UTF16Pair) ? utf16SurrogatePairToUtf32(c1, c2) : c1;
+ str.push_back(static_cast<wchar_t>(c));
+ }
+
+ template<typename Iterator>
+ inline int parseHexDigit(Iterator& iter, ParserError::Type& err)
+ {
+ auto ch = *iter;
+ ++iter;
+ if (ch >= '0' && ch <= '9') return ch - '0';
+ if (ch >= 'A' && ch <= 'F') return ch - 'A' + 10;
+ if (ch >= 'a' && ch <= 'f') return ch - 'a' + 10;
+
+ err = ParserError::InvalidEscapeSequence;
+ return 0;
+ }
+
+ template<typename Iterator>
+ inline char32_t parseUTF16CodeUnit(Iterator& iter, ParserError::Type& err)
+ {
+ auto n = parseHexDigit(iter, err) << 12;
+ n |= parseHexDigit(iter, err) << 8;
+ n |= parseHexDigit(iter, err) << 4;
+ n |= parseHexDigit(iter, err);
+ return static_cast<char32_t>(n);
+ }
+
+ template<typename Iterator, typename CharT>
+ inline ParserError::Type parseStringImpl(Iterator& iter, std::basic_string<CharT>& str)
+ {
+ str.clear();
+ auto add = [&str](CharType type, char32_t c1, char32_t c2)
+ {
+ addToStr<CharT, sizeof(CharT)>(str, type, c1, c2);
+ };
+
+ for (;;)
+ {
+ auto ch = static_cast<char32_t>(*iter);
+ ++iter;
+ if (ch == '"')
+ return ParserError::NoError;
+
+ if (ch == '\\')
+ {
+ ch = static_cast<char32_t>(*iter);
+ ++iter;
+ switch (ch)
+ {
+ case '\\': case '"': case '/':
+ break;
+
+ case 'b': ch = '\b'; break;
+ case 'f': ch = '\f'; break;
+ case 'n': ch = '\n'; break;
+ case 'r': ch = '\r'; break;
+ case 't': ch = '\t'; break;
+
+ case 'u':
+ {
+ ParserError::Type err{ParserError::NoError};
+ auto codeUnit = parseUTF16CodeUnit(iter, err);
+ if (err != ParserError::NoError)
+ return err;
+
+ if (codeUnit >= 0xD800 && codeUnit < 0xDC00)
+ {
+ if (*iter != '\\') return ParserError::NoTrailSurrogate;
+ ++iter;
+ if (*iter != 'u') return ParserError::NoTrailSurrogate;
+ ++iter;
+
+ auto trailSurrogate = parseUTF16CodeUnit(iter, err);
+ if (err != ParserError::NoError)
+ return err;
+
+ add(CharType::UTF16Pair, codeUnit, trailSurrogate);
+ }
+ else
+ {
+ add(CharType::CodePoint, codeUnit, 0);
+ }
+ }
+ continue;
+
+ default:
+ return ParserError::InvalidEscapeSequence;
+ }
+ }
+
+ add(CharType::Raw, ch, 0);
+ }
+ }
+}}
+
+// json-cpp/details/string_parser.hpp end
+//----------------------------------------------------------------------
+
+namespace jsoncpp
+{
+ template<typename CharT, typename InputIterator>
+ class Stream<details::ParserTraits<details::Traits2<CharT, InputIterator>>>
+ {
+ public:
+ using this_type = Parser<details::Traits2<CharT, InputIterator>>;
+
+ explicit Stream(InputIterator first, InputIterator last)
+ : m_reader{first, last}
+ {
+ nextValue();
+ }
+
+ Type getType() const { return m_type; }
+ bool getBoolean() const { return m_boolean; }
+ double getNumber() const { return m_number; }
+ const std::string& getFieldName() const { return m_fieldName; }
+
+ void checkType(Type type) const
+ {
+ if (getType() != type)
+ throw makeError(ParserError::UnexpectedType);
+ }
+
+ bool isListEnd(char terminator)
+ {
+ eatWhitespace();
+ if (*m_reader != terminator)
+ return false;
+
+ ++m_reader;
+ return true;
+ }
+
+ void eatListSeparator()
+ {
+ eatWhitespace();
+ check(',');
+ eatWhitespace();
+ }
+
+ void nextNameValuePair()
+ {
+ eatWhitespace();
+ check('"');
+ parseString(m_fieldName);
+ eatWhitespace();
+ check(':');
+ nextValue();
+ }
+
+ void nextValue()
+ {
+ eatWhitespace();
+ m_type = nextValueImpl();
+ }
+
+ template<typename DstCharT>
+ void parseString(std::basic_string<DstCharT>& str)
+ {
+ auto err = parseStringImpl(m_reader, str);
+ if (err != ParserError::NoError)
+ throw m_reader.m_diag.makeError(err);
+ }
+
+ ParserError makeError(ParserError::Type type) const
+ {
+ return m_reader.m_diag.makeError(type);
+ }
+
+ private:
+ Type nextValueImpl()
+ {
+ switch (*m_reader)
+ {
+ case '{': ++m_reader; return Type::Object;
+ case '[': ++m_reader; return Type::Array;
+ case 't': ++m_reader; checkLiteral("true"); m_boolean = true; return Type::Boolean;
+ case 'f': ++m_reader; checkLiteral("false"); m_boolean = false; return Type::Boolean;
+ case 'n': ++m_reader; checkLiteral("null"); return Type::Null;
+ case '"': ++m_reader; return Type::String;
+
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ m_number = parseRealNumber(m_reader);
+ return Type::Number;
+
+ case '-':
+ ++m_reader;
+ m_number = -parseRealNumber(m_reader);
+ return Type::Number;
+ }
+
+ throw unexpectedCharacter();
+ }
+
+ ParserError unexpectedCharacter() const
+ {
+ return makeError(ParserError::UnexpectedCharacter);
+ }
+
+ void check(char expectedChar)
+ {
+ if (*m_reader != expectedChar)
+ throw unexpectedCharacter();
+
+ ++m_reader;
+ }
+
+ template<std::size_t N>
+ void checkLiteral(const char(&literal)[N])
+ {
+ static_assert(N > 2, "");
+ for (auto i = 1; i != N - 1; ++i, ++m_reader)
+ if (*m_reader != literal[i])
+ throw unexpectedCharacter();
+ }
+
+ void eatWhitespace()
+ {
+ for (;; ++m_reader)
+ {
+ switch (*m_reader)
+ {
+ case '/':
+ ++m_reader;
+ check('/');
+ while (*m_reader != '\n')
+ ++m_reader;
+
+ // no break here
+ case '\n':
+ m_reader.m_diag.newLine();
+ break;
+
+ case ' ': case '\t': case '\r':
+ break;
+
+ default:
+ return;
+ }
+ }
+ }
+
+ details::Reader<InputIterator> m_reader;
+
+ Type m_type;
+ double m_number;
+ bool m_boolean;
+ std::string m_fieldName;
+ };
+
+ template<class X>
+ inline void serialize(Parser<X>& parser, bool& value)
+ {
+ parser.checkType(Type::Boolean);
+ value = parser.getBoolean();
+ }
+
+ template<class X, typename T>
+ inline typename std::enable_if<std::is_arithmetic<T>::value>::type
+ serialize(Parser<X>& parser, T& value)
+ {
+ parser.checkType(Type::Number);
+ auto number = parser.getNumber();
+ value = static_cast<T>(number);
+ if (value != number)
+ throw parser.makeError(ParserError::NumberIsOutOfRange);
+ }
+
+ template<class X, typename DstCharT>
+ inline void serialize(Parser<X>& parser, std::basic_string<DstCharT>& value)
+ {
+ parser.checkType(Type::String);
+ parser.parseString(value);
+ }
+
+ namespace details
+ {
+ template<class X, typename Callback>
+ inline void parseList(Parser<X>& parser, Type type, char terminator, Callback&& callback)
+ {
+ parser.checkType(type);
+
+ while (!parser.isListEnd(terminator))
+ {
+ callback();
+
+ if (parser.isListEnd(terminator))
+ return;
+
+ parser.eatListSeparator();
+ }
+ }
+ }
+
+ template<class X, typename Callback>
+ inline void parseObject(Parser<X>& parser, Callback&& callback)
+ {
+ details::parseList(parser, Type::Object, '}', [&]
+ {
+ parser.nextNameValuePair();
+ callback(parser.getFieldName());
+ });
+ }
+
+ template<class X, typename Callback>
+ void parseArray(Parser<X>& parser, Callback&& callback)
+ {
+ details::parseList(parser, Type::Array, ']', [&]
+ {
+ parser.nextValue();
+ callback();
+ });
+ }
+
+ template<typename CharT, class T, typename InputIterator>
+ inline void parse(T& object, InputIterator first, InputIterator last)
+ {
+ Parser<details::Traits2<CharT, InputIterator>> stream{first, last};
+ serialize(stream, object);
+ }
+
+ template<typename T, typename CharT>
+ inline void parse(T& object, const CharT* str)
+ {
+ details::CStrIterator<CharT> first{str}, last;
+ parse<CharT>(object, first, last);
+ }
+
+ template<typename T, typename CharT>
+ inline void parse(T& object, std::basic_string<CharT>& str)
+ {
+ parse<CharT>(object, std::begin(str), std::end(str));
+ }
+
+ template<typename T, typename CharT>
+ inline void parse(T& object, std::basic_istream<CharT>& stream)
+ {
+ std::istreambuf_iterator<CharT> first{stream}, last;
+ parse<CharT>(object, first, last);
+ }
+}
+
+// json-cpp/parse.hpp end
+//----------------------------------------------------------------------
+
+//----------------------------------------------------------------------
+// json-cpp/std_types.hpp begin
+
+#include <deque>
+#include <forward_list>
+#include <list>
+#include <map>
+#include <memory>
+#include <set>
+#include <type_traits>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+//----------------------------------------------------------------------
+// json-cpp/generate.hpp begin
+
+#include <sstream>
+#include <string>
+
+//----------------------------------------------------------------------
+// json-cpp/details/string_writer.hpp begin
+
+#include <string>
+
+namespace jsoncpp { namespace details
+{
+ template<typename SrcCharT, typename Sink>
+ inline void writeString(const std::basic_string<SrcCharT>& str, Sink&& sink)
+ {
+ sink('"');
+ for (auto iter = std::begin(str), last = std::end(str); iter != last; ++iter)
+ {
+ switch (char32_t ch = static_cast<unsigned char>(*iter))
+ {
+ case '"': sink('\\'); sink('"'); break;
+ case '\\': sink('\\'); sink('\\'); break;
+ case '\b': sink('\\'); sink('b'); break;
+ case '\f': sink('\\'); sink('f'); break;
+ case '\n': sink('\\'); sink('n'); break;
+ case '\r': sink('\\'); sink('r'); break;
+ case '\t': sink('\\'); sink('t'); break;
+ default:
+ if (ch < '\x20')
+ {
+ const auto table = "0123456789ABCDEF";
+ unsigned n = static_cast<unsigned char>(ch);
+ sink('\\');
+ sink('u');
+ sink('0');
+ sink('0');
+ sink(table[n >> 4]);
+ sink(table[n & 15]);
+ }
+ else
+ {
+ sink(static_cast<char>(ch));
+ }
+ }
+ }
+ sink('"');
+ }
+}}
+
+// json-cpp/details/string_writer.hpp end
+//----------------------------------------------------------------------
+
+namespace jsoncpp
+{
+ template<typename CharT, typename Sink>
+ class Stream<details::GeneratorTraits<details::Traits2<CharT, Sink>>>
+ {
+ public:
+ using this_type = Generator<details::Traits2<CharT, Sink>>;
+
+ explicit Stream(Sink& sink) : m_sink(&sink) {}
+
+ void objectBegin()
+ {
+ (*m_sink) << "{";
+ }
+
+ void fieldName(const char* name)
+ {
+ (*m_sink) << '"' << name << "\": ";
+ // TODO: use writeString (?)
+ }
+
+ template<typename StrCharT>
+ void fieldName(const std::basic_string<StrCharT>& name)
+ {
+ (*m_sink) << '"' << name << "\": ";
+ // TODO: use writeString (?)
+ }
+
+ void separator()
+ {
+ (*m_sink) << ", ";
+ }
+
+ void objectEnd()
+ {
+ (*m_sink) << '}';
+ }
+
+ void arrayBegin()
+ {
+ (*m_sink) << '[';
+ }
+
+ void arrayEnd()
+ {
+ (*m_sink) << ']';
+ }
+
+ friend void serialize(this_type& stream, std::nullptr_t)
+ {
+ (*stream.m_sink) << "null";
+ }
+
+ friend void serialize(this_type& stream, bool value)
+ {
+ (*stream.m_sink) << (value ? "true" : "false");
+ }
+
+ template<typename T>
+ friend typename std::enable_if<std::is_arithmetic<T>::value>::type serialize(this_type& stream, T& value)
+ {
+ (*stream.m_sink) << value;
+ }
+
+ template<typename SrcCharT>
+ friend void serialize(this_type& stream, const std::basic_string<SrcCharT>& value)
+ {
+ details::writeString(value, [&stream](char c){ stream.m_sink->put(c); });
+ }
+
+ private:
+ Sink* m_sink;
+ };
+
+ template<class X, typename Pointer>
+ inline void writePointer(Generator<X>& generator, Pointer& ptr)
+ {
+ if (ptr)
+ {
+ serialize(generator, *ptr);
+ }
+ else
+ {
+ serialize(generator, nullptr);
+ }
+ }
+
+ template<class X, typename Range>
+ inline void writeRange(Generator<X>& generator, Range& range)
+ {
+ generator.arrayBegin();
+
+ auto iter = std::begin(range);
+ const auto& last = std::end(range);
+ if (iter != last)
+ {
+ for (;;)
+ {
+ serialize(generator, *iter);
+
+ ++iter;
+ if (iter == last)
+ break;
+
+ generator.separator();
+ }
+ }
+
+ generator.arrayEnd();
+ }
+
+ template<class T>
+ inline std::string to_string(const T& object)
+ {
+ std::ostringstream rawStream;
+ Generator<details::Traits2<char, std::ostream>> stream{rawStream};
+ serialize(stream, const_cast<T&>(object));
+ return rawStream.str();
+ }
+}
+
+// json-cpp/generate.hpp end
+//----------------------------------------------------------------------
+
+namespace jsoncpp
+{
+ template<class X, typename T>
+ inline void serialize(Parser<X>& parser, std::shared_ptr<T>& obj)
+ {
+ if (parser.getType() != jsoncpp::Type::Null)
+ {
+ obj = std::make_shared<T>();
+ serialize(parser, *obj);
+ }
+ else
+ {
+ obj.reset();
+ }
+ }
+
+ template<class X, typename T>
+ inline void serialize(Generator<X>& generator, std::shared_ptr<T>& obj)
+ {
+ writePointer(generator, obj);
+ }
+
+ template<class X, typename T>
+ inline void serialize(Parser<X>& parser, std::unique_ptr<T>& obj)
+ {
+ if (parser.getType() != jsoncpp::Type::Null)
+ {
+ obj->reset(new T());
+ serialize(parser, *obj);
+ }
+ else
+ {
+ obj.reset();
+ }
+ }
+
+ template<class X, typename T>
+ inline void serialize(Generator<X>& generator, std::unique_ptr<T>& obj)
+ {
+ writePointer(generator, obj);
+ }
+
+ namespace details
+ {
+ template<class X, typename C>
+ inline void serializeContainer(Parser<X>& parser, C& c)
+ {
+ c.clear();
+
+ parseArray(parser, [&]
+ {
+ c.emplace_back();
+ serialize(parser, c.back());
+ });
+ }
+
+ template<class X, typename C>
+ inline void serializeContainer(Generator<X>& generator, C& c)
+ {
+ writeRange(generator, c);
+ }
+
+ template<class X, typename C>
+ inline void serializeSet(Parser<X>& parser, C& c)
+ {
+ c.clear();
+
+ parseArray(parser, [&]
+ {
+ typename C::value_type value;
+ serialize(parser, value);
+ c.insert(value);
+ });
+ }
+
+ template<class X, typename C>
+ inline void serializeSet(Generator<X>& generator, C& c)
+ {
+ writeRange(generator, c);
+ }
+
+ template<class X, typename C>
+ inline void serializeStrMap(Parser<X>& parser, C& c)
+ {
+ c.clear();
+
+ parseObject(parser, [&](const std::string& name)
+ {
+ serialize(parser, c[name]);
+ });
+ }
+
+ template<class X, typename C>
+ inline void serializeStrMap(Generator<X>& generator, C& c)
+ {
+ generator.objectBegin();
+
+ auto iter = std::begin(c);
+ const auto& last = std::end(c);
+ if (iter != last)
+ {
+ for (;;)
+ {
+ generator.fieldName(iter->first);
+ serialize(generator, iter->second);
+
+ ++iter;
+ if (iter == last)
+ break;
+
+ generator.separator();
+ }
+ }
+
+ generator.objectEnd();
+ }
+ }
+
+ template<class X, typename T>
+ inline void serialize(Stream<X>& stream, std::vector<T>& arr)
+ { details::serializeContainer(stream, arr); }
+
+ template<class X, typename T>
+ inline void serialize(Stream<X>& stream, std::list<T>& arr)
+ { details::serializeContainer(stream, arr); }
+
+ template<class X, typename T>
+ inline void serialize(Stream<X>& stream, std::forward_list<T>& arr)
+ { details::serializeContainer(stream, arr); }
+
+ template<class X, typename T>
+ inline void serialize(Stream<X>& stream, std::deque<T>& arr)
+ { details::serializeContainer(stream, arr); }
+
+ template<class X, typename T>
+ inline void serialize(Stream<X>& stream, std::set<T>& arr)
+ { details::serializeSet(stream, arr); }
+
+ template<class X, typename T>
+ inline void serialize(Stream<X>& stream, std::unordered_set<T>& arr)
+ { details::serializeSet(stream, arr); }
+
+ template<class X, typename T>
+ inline void serialize(Stream<X>& stream, std::map<std::string, T>& t)
+ { details::serializeStrMap(stream, t); }
+
+ template<class X, typename T>
+ inline void serialize(Stream<X>& stream, std::unordered_map<std::string, T>& t)
+ { details::serializeStrMap(stream, t); }
+}
+// json-cpp/std_types.hpp end
+//----------------------------------------------------------------------
+
+//----------------------------------------------------------------------
+// json-cpp/serialization_helpers.hpp begin
+
+#include <array>
+#include <unordered_map>
+
+namespace jsoncpp
+{
+ namespace details
+ {
+ template<class X, typename T>
+ inline void writeField(Generator<X>& generator, const char* name, T& value)
+ {
+ generator.fieldName(name);
+ serialize(generator, value);
+ }
+
+ template<class X, typename T, typename... F>
+ inline void writeField(Generator<X>& generator, const char* name, T& value, F&&... fieldsDef)
+ {
+ writeField(generator, name, value);
+ generator.separator();
+ writeField(generator, fieldsDef...);
+ }
+
+ template<typename ParserT>
+ class FieldsTable
+ {
+ public:
+ template<typename... F>
+ FieldsTable(F&&... fieldsDef)
+ {
+ m_map.reserve(sizeof...(fieldsDef) / 2);
+ add(1, fieldsDef...);
+ }
+
+ struct FieldInfo
+ {
+ template<typename T>
+ FieldInfo(T&, std::size_t idx)
+ {
+ m_fieldIdx = idx;
+ m_parseFn = [](ParserT& parser, void* fieldPtr)
+ {
+ serialize(parser, static_cast<T&>(*reinterpret_cast<T*>(fieldPtr)));
+ };
+ }
+
+ std::size_t m_fieldIdx;
+ void(*m_parseFn)(ParserT& parser, void* fieldPtr);
+ };
+
+ const FieldInfo* find(const std::string& name) const
+ {
+ auto it = m_map.find(name);
+ return it == m_map.end() ? nullptr : &it->second;
+ }
+
+ private:
+ template<typename T, typename... F>
+ void add(std::size_t idx, const char* name, T& value, F&&... otherFields)
+ {
+ m_map.emplace(name, FieldInfo(value, idx));
+ add(idx + 2, otherFields...);
+ }
+
+ void add(std::size_t /*idx*/) {}
+
+ std::unordered_map<std::string, FieldInfo> m_map;
+ };
+
+ inline void* makePtrs(const char*) { return nullptr; }
+
+ template<typename T>
+ inline void* makePtrs(T& obj) { return &obj; }
+ }
+
+ template<class Cls, class X, typename... F>
+ inline void fields(Cls&, Parser<X>& parser, F&&... fieldsDef)
+ {
+ std::array<void*, sizeof...(fieldsDef)> ptrs{details::makePtrs(fieldsDef)...};
+
+ static const details::FieldsTable<Parser<X>> table{fieldsDef...};
+
+ auto&& handler = [&](const std::string& fieldName)
+ {
+ auto fieldInfo = table.find(fieldName);
+ if (fieldInfo == nullptr)
+ throw parser.makeError(ParserError::UnknownField);
+
+ auto fieldPtr = ptrs[fieldInfo->m_fieldIdx];
+ fieldInfo->m_parseFn(parser, fieldPtr);
+ };
+
+ parseObject(parser, handler);
+ }
+
+ template<class Cls, class X, typename... F>
+ inline void fields(Cls&, Generator<X>& generator, F&&... fieldsDef)
+ {
+ generator.objectBegin();
+ details::writeField(generator, fieldsDef...);
+ generator.objectEnd();
+ }
+}
+
+// json-cpp/serialization_helpers.hpp end
+//----------------------------------------------------------------------
+
+// json-cpp.hpp end
+//----------------------------------------------------------------------
+
diff --git a/fast/main.cc b/fast/main.cc
index 372f0f1..a7b5837 100644
--- a/fast/main.cc
+++ b/fast/main.cc
@@ -2,140 +2,11 @@
int
-main(void)
+main(int argc, char** argv)
{
-/*
-{
-"weights":{"logp":2.0,"use_house":0.0,"use_shell":1.0},
-"nodes":
-[
-{ "id":0, "cat":"root", "span":[0,0] },
-{ "id":1, "cat":"NP", "span":[1,2] },
-{ "id":2, "cat":"V", "span":[2,3] },
-{ "id":3, "cat":"JJ", "span":[4,5] },
-{ "id":4, "cat":"NN", "span":[4,6] },
-{ "id":5, "cat":"NP", "span":[3,6] },
-{ "id":6, "cat":"VP", "span":[2,6] },
-{ "id":7, "cat":"S", "span":[1,6] }
-],
-"edges":
-[
-{ "head":1, "rule":"[NP] ||| ich ||| i", "tails":[0], "f":{"logp":-0.5,"use_i":1.0} },
-{ "head":2, "rule":"[V] ||| sah ||| saw", "tails":[0], "f":{"logp":-0.25,"use_saw":1.0} },
-{ "head":3, "rule":"[JJ] ||| kleines ||| small", "tails":[0], "f":{"logp":0.0,"use_small":1.0} },
-{ "head":3, "rule":"[JJ] ||| kleines ||| little", "tails":[0], "f":{"logp":0.0,"use_little":1.0} },
-{ "head":4, "rule":"[NN] ||| kleines haus ||| small house", "tails":[0], "f":{"logp":0.0,"use_house":1.0} },
-{ "head":4, "rule":"[NN] ||| kleines haus ||| little house", "tails":[0], "f":{"logp":0.0,"use_house":1.0} },
-{ "head":4, "rule":"[NN] ||| [JJ,1] haus ||| [JJ,1] shell", "tails":[3], "f":{"logp":0.0,"use_shell":1.0} },
-{ "head":4, "rule":"[NN] ||| [JJ,1] haus ||| [JJ,1] house", "tails":[3], "f":{"logp":0.0,"use_house":1.0} },
-{ "head":5, "rule":"[NP] ||| ein [NN,1] ||| a [NN,1]", "tails":[4], "f":{"logp":0.0,"use_a":1.0} },
-{ "head":6, "rule":"[VP] ||| [V,1] [NP,2] ||| [V,1] [NP,2]", "tails":[2, 5], "f":{"logp":0.0} },
-{ "head":7, "rule":"[S] ||| [NP,1] [VP,2] ||| [NP,1] [VP,2]", "tails":[1, 6], "f":{"logp":0.0} }
-]
-}
-*/
Hg::Hypergraph hg;
-
- // nodes
- Hg::Node a; a.id = 0; a.symbol = "root"; a.left = false; a.right = false; a.mark = 0;
- Hg::Node b; b.id = 1; b.symbol = "NP"; b.left = 0; b.right = 1; b.mark = 0;
- Hg::Node c; c.id = 2; c.symbol = "V"; c.left = 1; c.right = 2; c.mark = 0;
- Hg::Node d; d.id = 3; d.symbol = "JJ"; d.left = 3; d.right = 4; d.mark = 0;
- Hg::Node e; e.id = 4; e.symbol = "NN"; e.left = 3; e.right = 5; e.mark = 0;
- Hg::Node f; f.id = 5; f.symbol = "NP"; f.left = 2; f.right = 5; f.mark = 0;
- Hg::Node g; g.id = 6; g.symbol = "NP"; g.left = 1; g.right = 5; g.mark = 0;
- Hg::Node h; h.id = 7; h.symbol = "S"; h.left = 0; h.right = 6; h.mark = 0;
-
- hg.add_node(&a);
- hg.add_node(&h);
- hg.add_node(&g);
- hg.add_node(&c);
- hg.add_node(&d);
- hg.add_node(&f);
- hg.add_node(&b);
- hg.add_node(&e);
-
- // edges
- Hg::Edge q; q.head = hg.nodes_by_id[1]; q.tails.push_back(hg.nodes_by_id[0]); q.score = 0.367879441171;
- hg.nodes_by_id[1]->incoming.push_back(&q);
- hg.nodes_by_id[0]->outgoing.push_back(&q);
- q.arity = 1;
- q.mark = 0;
- hg.edges.push_back(&q);
-
- Hg::Edge p; p.head = hg.nodes_by_id[2]; p.tails.push_back(hg.nodes_by_id[0]); p.score = 0.606530659713;
- hg.nodes_by_id[2]->incoming.push_back(&p);
- hg.nodes_by_id[0]->outgoing.push_back(&p);
- p.arity = 1;
- p.mark = 0;
- hg.edges.push_back(&p);
-
- Hg::Edge r; r.head = hg.nodes_by_id[3]; r.tails.push_back(hg.nodes_by_id[0]); r.score = 1.0;
- hg.nodes_by_id[3]->incoming.push_back(&r);
- hg.nodes_by_id[0]->outgoing.push_back(&r);
- r.arity = 1;
- r.mark = 0;
- hg.edges.push_back(&r);
-
- Hg::Edge s; s.head = hg.nodes_by_id[3]; s.tails.push_back(hg.nodes_by_id[0]); s.score = 1.0;
- hg.nodes_by_id[3]->incoming.push_back(&s);
- hg.nodes_by_id[0]->outgoing.push_back(&s);
- s.arity = 1;
- s.mark = 0;
- hg.edges.push_back(&s);
-
- Hg::Edge t; t.head = hg.nodes_by_id[4]; t.tails.push_back(hg.nodes_by_id[0]); t.score = 1.0;
- hg.nodes_by_id[4]->incoming.push_back(&t);
- hg.nodes_by_id[0]->outgoing.push_back(&t);
- t.arity = 1;
- t.mark = 0;
- hg.edges.push_back(&t);
-
- Hg::Edge u; u.head = hg.nodes_by_id[4]; u.tails.push_back(hg.nodes_by_id[0]); u.score = 1.0;
- hg.nodes_by_id[4]->incoming.push_back(&u);
- hg.nodes_by_id[0]->outgoing.push_back(&u);
- u.arity = 1;
- u.mark = 0;
- hg.edges.push_back(&u);
-
- Hg::Edge v; v.head = hg.nodes_by_id[4]; v.tails.push_back(hg.nodes_by_id[3]); v.score = 1.0;
- hg.nodes_by_id[4]->incoming.push_back(&v);
- hg.nodes_by_id[3]->outgoing.push_back(&v);
- v.arity = 1;
- v.mark = 0;
- hg.edges.push_back(&v);
-
- Hg::Edge w; w.head = hg.nodes_by_id[4]; w.tails.push_back(hg.nodes_by_id[3]); w.score = 2.71828182846;
- hg.nodes_by_id[4]->incoming.push_back(&w);
- hg.nodes_by_id[3]->outgoing.push_back(&w);
- w.arity = 1;
- w.mark = 0;
- hg.edges.push_back(&w);
-
- Hg::Edge x; x.head = hg.nodes_by_id[5]; x.tails.push_back(hg.nodes_by_id[4]); x.score = 1.0;
- hg.nodes_by_id[5]->incoming.push_back(&x);
- hg.nodes_by_id[4]->outgoing.push_back(&x);
- x.arity = 1;
- x.mark = 0;
- hg.edges.push_back(&x);
-
- Hg::Edge y; y.head = hg.nodes_by_id[6]; y.tails.push_back(hg.nodes_by_id[2]); y.tails.push_back(hg.nodes_by_id[5]); y.score = 1.0;
- hg.nodes_by_id[6]->incoming.push_back(&y);
- hg.nodes_by_id[2]->outgoing.push_back(&y);
- hg.nodes_by_id[5]->outgoing.push_back(&y);
- y.arity = 2;
- y.mark = 0;
- hg.edges.push_back(&y);
-
- Hg::Edge z; z.head = hg.nodes_by_id[7]; z.tails.push_back(hg.nodes_by_id[1]); z.tails.push_back(hg.nodes_by_id[6]); z.score = 1.0;
- hg.nodes_by_id[7]->incoming.push_back(&z);
- hg.nodes_by_id[1]->outgoing.push_back(&z);
- hg.nodes_by_id[6]->outgoing.push_back(&z);
- z.arity = 2;
- z.mark = 0;
- hg.edges.push_back(&z);
-
- Hg::topological_sort(hg.nodes, hg.nodes.begin());
- //Hg::viterbi(nodes, hg.nodes, hg.nodes_by_id(0]);
+ //Hg::io::manual(hg);
+ Hg::io::read(hg, argv[1]);
+ //Hg::viterbi(hg);
}
diff --git a/fast/make_paks.cc b/fast/make_paks.cc
new file mode 100644
index 0000000..6fe7fae
--- /dev/null
+++ b/fast/make_paks.cc
@@ -0,0 +1,112 @@
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <msgpack.hpp>
+#include <msgpack/fbuffer.h>
+#include <msgpack/fbuffer.hpp>
+#include <unordered_map>
+
+#include "json-cpp.hpp"
+#include "hypergraph.hh"
+#include "dummyvector.h"
+
+using namespace std;
+
+
+struct DummyNode {
+ int id;
+ string cat;
+ vector<int> span;
+};
+
+struct DummyEdge {
+ int head;
+ string rule;
+ vector<size_t> tails;
+ DummyVector f;
+ score_t weight;
+};
+
+struct DummyHg {
+ vector<DummyNode> nodes;
+ vector<DummyEdge> edges;
+ DummyVector weights;
+};
+
+template<typename X> inline void
+serialize(jsoncpp::Stream<X>& stream, DummyNode& o)
+{
+ fields(o, stream, "id", o.id, "cat", o.cat, "span", o.span);
+}
+
+template<typename X> inline void
+serialize(jsoncpp::Stream<X>& stream, DummyEdge& o)
+{
+ fields(o, stream, "head", o.head, "rule", o.rule, "tails", o.tails, "f", o.f, "weight", o.weight);
+}
+
+template<typename X> inline void
+serialize(jsoncpp::Stream<X>& stream, DummyHg& o)
+{
+ fields(o, stream, "nodes", o.nodes, "edges", o.edges, "weights", o.weights);
+}
+
+template<typename X> inline void
+serialize(jsoncpp::Stream<X>& stream, DummyVector& o)
+{
+ fields(o, stream, "EgivenFCoherent", o.EgivenFCoherent, "SampleCountF", o.SampleCountF, "CountEF", o.CountEF, "MaxLexFgivenE", o.MaxLexFgivenE, "MaxLexEgivenF", o.MaxLexEgivenF, "IsSingletonF", o.IsSingletonF, "IsSingletonFE", o.IsSingletonFE, "LanguageModel", o.LanguageModel, "LanguageModel_OOV", o.LanguageModel_OOV, "PassThrough", o.PassThrough, "PassThrough_1", o.PassThrough_1, "PassThrough_2", o.PassThrough_2, "PassThrough_3", o.PassThrough_3, "PassThrough_4", o.PassThrough_4, "PassThrough_5", o.PassThrough_5, "PassThrough_6", o.PassThrough_6, "WordPenalty", o.WordPenalty, "Glue", o.Glue);
+}
+
+
+
+int
+main(int argc, char** argv)
+{
+ ifstream ifs(argv[1]);
+ string json_str((istreambuf_iterator<char>(ifs) ),
+ (istreambuf_iterator<char>()));
+
+ DummyHg hg;
+ vector<DummyNode> nodes;
+ hg.nodes = nodes;
+ vector<DummyEdge> edges;
+ hg.edges = edges;
+ DummyVector w;
+ hg.weights = w;
+ jsoncpp::parse(hg, json_str);
+
+ vector<Hg::Node*> nodes_;
+ for (auto it = hg.nodes.begin(); it != hg.nodes.end(); ++it) {
+ Hg::Node* n = new Hg::Node;
+ n->id = it->id;
+ n->symbol = it->cat;
+ n->left = it->span[0];
+ n->right = it->span[1];
+ nodes_.push_back(n);
+ }
+
+ vector<Hg::Edge*> edges_;
+ for (auto it = hg.edges.begin(); it != hg.edges.end(); ++it) {
+ Hg::Edge* e = new Hg::Edge;
+ e->head_id_ = it->head;
+ e->tails_ids_ = it->tails;
+ e->score = it->weight;
+ e->rule = it->rule;
+ e->f = it->f;
+ edges_.push_back(e);
+ }
+
+ FILE* file = fopen(argv[2], "wb");
+ msgpack::fbuffer fbuf(file);
+ msgpack::pack(fbuf, hg.nodes.size());
+ msgpack::pack(fbuf, hg.edges.size());
+ for (auto it = nodes_.begin(); it != nodes_.end(); ++it)
+ msgpack::pack(fbuf, **it);
+ for (auto it = edges_.begin(); it != edges_.end(); ++it)
+ msgpack::pack(fbuf, **it);
+
+ fclose(file);
+
+ return 0;
+}
+
diff --git a/fast/read_pak.cc b/fast/read_pak.cc
new file mode 100644
index 0000000..81eed5d
--- /dev/null
+++ b/fast/read_pak.cc
@@ -0,0 +1,26 @@
+#include <msgpack.hpp>
+#include <iostream>
+#include <fstream>
+
+using namespace std;
+
+
+int
+main(int argc, char** argv)
+{
+ ifstream ifs(argv[1]);
+ size_t i = 0, nn, ne;
+ msgpack::unpacker pac;
+ while(true) {
+ pac.reserve_buffer(32*1024);
+ size_t bytes = ifs.readsome(pac.buffer(), pac.buffer_capacity());
+ pac.buffer_consumed(bytes);
+ msgpack::unpacked result;
+ while(pac.next(&result)) {
+ msgpack::object o = result.get();
+ cout << o << endl;
+ }
+
+ if (!bytes) break;
+ }
+}
diff --git a/fast/semiring.hh b/fast/semiring.hh
index 2be19ea..5874e88 100644
--- a/fast/semiring.hh
+++ b/fast/semiring.hh
@@ -1,37 +1,36 @@
-#ifndef SEMIRING_HH
-#define SEMIRING_HH
-//#pragma once
+#pragma once
+
+namespace Semiring {
template<typename T>
-class ViterbiSemiring {
- public:
- T one = 1.0;
- T null = 0.0;
-
- T add(T x, T y);
- T multiply(T x, T y);
- T convert(T x);
+struct Viterbi {
+ T one = 1.0;
+ T null = 0.0;
+
+ T add(T x, T y);
+ T multiply(T x, T y);
+ T convert(T x);
};
template<typename T> T
-ViterbiSemiring<T>::add(T x, T y)
+Viterbi<T>::add(T x, T y)
{
return max(x, y);
}
template<typename T> T
-ViterbiSemiring<T>::multiply(T x, T y)
+Viterbi<T>::multiply(T x, T y)
{
return x * y;
}
template<typename T> T
-ViterbiSemiring<T>::convert(T x)
+Viterbi<T>::convert(T x)
{
return (T)x;
}
-#endif
+} // namespace