From 190f68c880eb27506669e95e2bc0493e2ec42c4c Mon Sep 17 00:00:00 2001
From: Patrick Simianer
Date: Sun, 17 Aug 2014 07:51:16 +0100
Subject: functional again
---
.gitmodules | 3 +
fast/Makefile | 3 +-
fast/README.md | 9 +-
fast/grammar.cc | 46 +-
fast/grammar.hh | 1 +
fast/hypergraph.cc | 28 +-
fast/hypergraph.hh | 2 +-
fast/main.cc | 11 +-
fast/sparse_vector.hh | 38 +-
util/Makefile | 2 +-
util/cdec2json.py | 11 +-
util/json-cpp | 1 +
util/json-cpp.hpp | 1231 -------------------------------------------------
util/make_pak.cc | 72 ++-
util/read_pak.cc | 1 -
15 files changed, 137 insertions(+), 1322 deletions(-)
create mode 100644 .gitmodules
create mode 160000 util/json-cpp
delete mode 100644 util/json-cpp.hpp
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..843caa2
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "util/json-cpp"]
+ path = util/json-cpp
+ url = https://github.com/ascheglov/json-cpp.git
diff --git a/fast/Makefile b/fast/Makefile
index 40ce0eb..9e88076 100644
--- a/fast/Makefile
+++ b/fast/Makefile
@@ -1,11 +1,10 @@
-COMPILER=clang
+COMPILER=g++
CFLAGS=-std=c++11 -O3
all: grammar.o hypergraph.o main.cc
$(COMPILER) $(CFLAGS) -std=c++11 -lstdc++ -lm -lmsgpack grammar.o hypergraph.o main.cc -o fast_weaver
-
hypergraph.o: hypergraph.cc hypergraph.hh grammar.o semiring.hh sparse_vector.hh weaver.hh
$(COMPILER) $(CFLAGS) -g -c hypergraph.cc
diff --git a/fast/README.md b/fast/README.md
index a11bd85..1d6bd04 100644
--- a/fast/README.md
+++ b/fast/README.md
@@ -7,12 +7,12 @@ TODO
* other semirings
* include language model
* compress/hash words/feature strings?
-
+ * cast? Rule -> Edge, ChartItem -> Node
+ * feature factory, observer
Dependencies:
* MessagePack for object serialization [1]
* kenlm language model [2]
-
This is Linux only.
@@ -20,6 +20,8 @@ This is Linux only.
[1] http://msgpack.org
[2] http://kheafield.com/code/kenlm/
+
+stuff to have a look at:
http://math.nist.gov/spblas/
http://lapackpp.sourceforge.net/
http://www.cvmlib.com/
@@ -30,5 +32,6 @@ http://bytes.com/topic/c/answers/702569-blas-vs-cblas-c
http://www.netlib.org/lapack/#_standard_c_language_apis_for_lapack
http://www.osl.iu.edu/research/mtl/download.php3
http://scicomp.stackexchange.com/questions/351/recommendations-for-a-usable-fast-c-matrix-library
-
+https://software.intel.com/en-us/tbb_4.2_doc
http://goog-perftools.sourceforge.net/doc/tcmalloc.html
+
diff --git a/fast/grammar.cc b/fast/grammar.cc
index 558f6e6..a003eb4 100644
--- a/fast/grammar.cc
+++ b/fast/grammar.cc
@@ -10,7 +10,18 @@ namespace G {
NT::NT(string& s)
{
s.erase(0, 1); s.pop_back(); // remove '[' and ']'
- stringstream ss(s);
+ istringstream ss(s);
+ if (ss >> index) { // [i]
+ symbol = "";
+ index = stoi(s);
+
+ return;
+ } else { // [X]
+ symbol = s;
+ index = 0;
+
+ return;
+ }
string buf;
size_t j = 0;
index = 0; // default
@@ -135,28 +146,43 @@ operator<<(ostream& os, const Item& i)
*
*/
Rule::Rule(const string& s)
+{
+ from_s(this, s);
+}
+
+void
+Rule::from_s(Rule* r, const string& s)
{
stringstream ss(s);
size_t j = 0;
string buf;
- arity = 0;
+ r->arity = 0;
size_t index = 1;
+ vector rhs_nt;
+ r->f = new Sv::SparseVector();
while (ss >> buf) {
if (buf == "|||") { j++; continue; }
if (j == 0) { // LHS
- lhs = new NT(buf);
+ r->lhs = new NT(buf);
} else if (j == 1) { // RHS
- rhs.push_back(new Item(buf));
- if (rhs.back()->type == NON_TERMINAL) arity++;
+ r->rhs.push_back(new Item(buf));
+ if (r->rhs.back()->type == NON_TERMINAL) {
+ rhs_nt.push_back(r->rhs.back()->nt);
+ r->arity++;
+ }
} else if (j == 2) { // TARGET
- target.push_back(new Item(buf));
- if (target.back()->type == NON_TERMINAL) {
- order.insert(make_pair(index, target.back()->nt->index));
+ r->target.push_back(new Item(buf));
+ if (r->target.back()->type == NON_TERMINAL) {
+ r->order.insert(make_pair(index, r->target.back()->nt->index));
+ if (r->target.back()->nt->symbol == "")
+ r->target.back()->nt->symbol = rhs_nt[r->target.back()->nt->index-1]->symbol;
index++;
}
} else if (j == 3) { // F TODO
+ Sv::SparseVector::from_s(r->f, buf); // FIXME this is slow!!!
} else if (j == 4) { // A TODO
- } else { // ERROR
+ } else {
+ // ERROR
}
if (j == 4) break;
}
@@ -203,7 +229,7 @@ Rule::escaped() const
os << " ||| ";
os << f->escaped();
os << " ||| ";
- os << "TODO";
+ os << "TODO(alignment)";
return os.str();
}
diff --git a/fast/grammar.hh b/fast/grammar.hh
index 48a5116..1b9ac5a 100644
--- a/fast/grammar.hh
+++ b/fast/grammar.hh
@@ -69,6 +69,7 @@ Sv::SparseVector* f;
Rule() {};
Rule(const string& s);
+ static void from_s(Rule* r, const string& s);
string repr() const;
string escaped() const;
diff --git a/fast/hypergraph.cc b/fast/hypergraph.cc
index e1debb1..a9a44f9 100644
--- a/fast/hypergraph.cc
+++ b/fast/hypergraph.cc
@@ -73,7 +73,7 @@ viterbi_path(Hypergraph& hg, Path& p)
find_if(hg.nodes.begin(), hg.nodes.end(), \
[](Node* n) { return n->incoming.size() == 0; });
- Hg::topological_sort(hg.nodes, root);
+ Hg::topological_sort(hg.nodes, root); // FIXME do I need to do this when reading from file?
Semiring::Viterbi semiring;
Hg::init(hg.nodes, root, semiring);
@@ -107,7 +107,8 @@ derive(const Path& p, const Node* cur, vector& carry)
it->head->right == cur->right) {
next = it;
}
- }
+ } // FIXME this is probably not so good
+
unsigned j = 0;
for (auto it: next->rule->target) {
if (it->type == G::NON_TERMINAL) {
@@ -125,7 +126,7 @@ void
read(Hypergraph& hg, vector& rules, const string& fn) // FIXME
{
ifstream ifs(fn);
- size_t i = 0, nr, nn, ne;
+ size_t i = 0, r, n, e;
msgpack::unpacker pac;
while(true) {
pac.reserve_buffer(32*1024);
@@ -135,17 +136,23 @@ read(Hypergraph& hg, vector& rules, const string& fn) // FIXME
while(pac.next(&result)) {
msgpack::object o = result.get();
if (i == 0) {
- o.convert(&nn);
- nn += 1;
+ o.convert(&r);
} else if (i == 1) {
- o.convert(&ne);
- ne += 1;
- } else if (i > 1 && i <= nn) {
+ o.convert(&n);
+ } else if (i == 2) {
+ o.convert(&e);
+ } else if (i > 2 && i <= r+2) {
+ string s;
+ o.convert(&s);
+ G::Rule* rule = new G::Rule;
+ G::Rule::from_s(rule, s);
+ rules.push_back(rule);
+ } else if (i > r+2 && i <= r+n+2) {
Node* n = new Node;
o.convert(n);
hg.nodes.push_back(n);
hg.nodes_by_id[n->id] = n;
- } else if (i > nn && i <= nn+ne+1) {
+ } else if (i > n+2 && i <= r+n+e+2) {
Edge* e = new Edge;
e->arity = 0;
o.convert(e);
@@ -158,6 +165,9 @@ read(Hypergraph& hg, vector& rules, const string& fn) // FIXME
e->tails.push_back(hg.nodes_by_id[*it]);
e->arity++;
}
+ e->rule = rules[e->rule_id_];
+ } else {
+ // ERROR
}
i++;
}
diff --git a/fast/hypergraph.hh b/fast/hypergraph.hh
index 699bfdf..299a62d 100644
--- a/fast/hypergraph.hh
+++ b/fast/hypergraph.hh
@@ -92,7 +92,7 @@ void
read(Hypergraph& hg, vector& rules, const string& fn); // FIXME
void
-write(Hypergraph& hg, vector& rules, const string& fn); // TODO
+write(Hypergraph& hg, vector& rules, const string& fn); // FIXME
void
manual(Hypergraph& hg, vector& rules);
diff --git a/fast/main.cc b/fast/main.cc
index 59e25d5..08fcfcf 100644
--- a/fast/main.cc
+++ b/fast/main.cc
@@ -1,4 +1,5 @@
#include "hypergraph.hh"
+#include
int
@@ -6,9 +7,9 @@ main(int argc, char** argv)
{
Hg::Hypergraph hg;
G::Grammar g;
-//Hg::io::read(hg, g.rules, argv[1]);
- Hg::io::manual(hg, g.rules);
-
+ Hg::io::read(hg, g.rules, argv[1]);
+ //Hg::io::manual(hg, g.rules);
+ clock_t begin = clock();
Hg::Path p;
Hg::viterbi_path(hg, p);
vector s;
@@ -16,7 +17,9 @@ main(int argc, char** argv)
for (auto it: s)
cout << it << " ";
cout << endl;
-
+ clock_t end = clock();
+ double elapsed_secs = double(end - begin) / CLOCKS_PER_SEC;
+ cout << elapsed_secs << " s" << endl;
return 0;
}
diff --git a/fast/sparse_vector.hh b/fast/sparse_vector.hh
index e497769..3583240 100644
--- a/fast/sparse_vector.hh
+++ b/fast/sparse_vector.hh
@@ -22,17 +22,7 @@ struct SparseVector {
SparseVector() {};
SparseVector(string& s)
{
- stringstream ss(s);
- while (!ss.eof()) {
- string t;
- ss >> t;
- size_t eq = t.find_first_of("=");
- t.replace(eq, 1, " ");
- stringstream tt(t);
- K k; V v;
- tt >> k >> v;
- m_.emplace(k.substr(k.find_first_of("\"")+1, k.find_last_of("\"")-1), v);
- }
+ from_s(this, s);
};
void
@@ -138,6 +128,25 @@ struct SparseVector {
return *this;
};
+ static void
+ from_s(SparseVector* w, const string& s)
+ {
+ stringstream ss(s);
+ while (!ss.eof()) {
+ string t;
+ ss >> t;
+ size_t eq = t.find_first_of("=");
+ if (eq == string::npos) {
+ return;
+ }
+ t.replace(eq, 1, " ");
+ stringstream tt(t);
+ K k; V v;
+ tt >> k >> v;
+ w->m_.emplace(k.substr(k.find_first_of("\"")+1, k.find_last_of("\"")-1), v);
+ }
+ }
+
string
repr() const
{
@@ -154,10 +163,13 @@ struct SparseVector {
};
string
- escaped() const {
+ escaped(bool quote_keys=false) const {
ostringstream os;
for (auto it = m_.cbegin(); it != m_.cend(); it++) {
- os << '"' << util::json_escape(it->first) << '"' << "=" << it->second;
+ if (quote_keys) os << '"';
+ os << util::json_escape(it->first);
+ if (quote_keys) os << '"';
+ os << "=" << it->second;
if (next(it) != m_.cend()) os << " ";
}
diff --git a/util/Makefile b/util/Makefile
index 08ead26..30564fe 100644
--- a/util/Makefile
+++ b/util/Makefile
@@ -3,7 +3,7 @@ COMPILER=clang
all: make_pak read_pak
-make_pak: make_pak.cc
+make_pak: make_pak.cc json-cpp/single_include/json-cpp.hpp ../fast/hypergraph.hh ../fast/weaver.hh
$(COMPILER) -std=c++11 -lstdc++ -lm -lmsgpack make_pak.cc -o make_pak
read_pak: read_pak.cc
diff --git a/util/cdec2json.py b/util/cdec2json.py
index adddb64..e7c8e93 100755
--- a/util/cdec2json.py
+++ b/util/cdec2json.py
@@ -15,13 +15,6 @@ def hg2json(hg, weights):
"""
res = ''
res += "{\n"
- res += '"weights":{'+"\n"
- a = []
- for i in weights:
- if i[1] != 0:
- a.append( '"%s":%s'%(i[0], i[1]) )
- res += ", ".join(a)+"\n"
- res += "},\n"
res += '"rules":[\n'
rules = []
for i in hg.edges:
@@ -35,9 +28,9 @@ def hg2json(hg, weights):
res += '"nodes":'+"\n"
res += "[\n"
a = []
- a.append( '{ "id":0, "cat":"root", "span":[-1,-1] }' )
+ a.append( '{ "id":0, "symbol":"root", "span":[-1,-1] }' )
for i in hg.nodes:
- a.append('{ "id":%d, "cat":"%s", "span":[%d,%d] }'%(i.id+1, i.cat, i.span[0], i.span[1]))
+ a.append('{ "id":%d, "symbol":"%s", "span":[%d,%d] }'%(i.id+1, i.cat, i.span[0], i.span[1]))
res += ",\n".join(a)+"\n"
res += "],\n"
res += '"edges":'+"\n"
diff --git a/util/json-cpp b/util/json-cpp
new file mode 160000
index 0000000..4eb4b47
--- /dev/null
+++ b/util/json-cpp
@@ -0,0 +1 @@
+Subproject commit 4eb4b47cf4d622bc7bf34071d6b68fc5beb37051
diff --git a/util/json-cpp.hpp b/util/json-cpp.hpp
deleted file mode 100644
index 851a4f4..0000000
--- a/util/json-cpp.hpp
+++ /dev/null
@@ -1,1231 +0,0 @@
-//
-// DO NOT EDIT !!! This file was generated with a script.
-//
-// JSON for C++
-// https://github.com/ascheglov/json-cpp
-// Version 0.1 alpha, rev. 170121e2dc099895064305e38bfb25d90a807ce3
-// Generated 2014-03-27 17:16:47.104492 UTC
-//
-// Belongs to the public domain
-
-#pragma once
-
-//----------------------------------------------------------------------
-// json-cpp.hpp begin
-
-//----------------------------------------------------------------------
-// json-cpp/parse.hpp begin
-
-#include
-#include
-#include
-#include
-#include
-
-//----------------------------------------------------------------------
-// json-cpp/ParserError.hpp begin
-
-#include
-#include
-#include
-#include
-
-#if defined _MSC_VER
-# define JSONCPP_INTERNAL_NOEXCEPT_ throw()
-#else
-# define JSONCPP_INTERNAL_NOEXCEPT_ noexcept
-#endif
-
-namespace jsoncpp
-{
- class ParserError : public std::exception
- {
- public:
- enum Type
- {
- NoError,
- Eof, UnexpectedCharacter,
- InvalidEscapeSequence, NoTrailSurrogate,
- UnexpectedType, UnknownField,
- NumberIsOutOfRange,
- };
-
- ParserError(Type type, std::size_t line, std::size_t column)
- : m_type{type}, m_line{line}, m_column{column}
- {
- assert(type != NoError);
- }
-
- virtual const char* what() const JSONCPP_INTERNAL_NOEXCEPT_ override
- {
- if (m_what.empty())
- {
- m_what = "JSON parser error at line ";
- m_what += std::to_string(m_line);
- m_what += ", column ";
- m_what += std::to_string(m_column);
- switch (m_type)
- {
- case Eof: m_what += ": unexpected end of file"; break;
- case UnexpectedCharacter: m_what += ": unexpected character"; break;
- case InvalidEscapeSequence: m_what += ": invalid escape sequence"; break;
- case NoTrailSurrogate: m_what += ": no UTF-16 trail surrogate"; break;
- case UnexpectedType: m_what += ": unexpected value type"; break;
- case UnknownField: m_what += ": unknown field name"; break;
- case NumberIsOutOfRange: m_what += ": number is out of range"; break;
- case NoError:
- default:
- m_what += ": INTERNAL ERROR"; break;
- }
- }
-
- return m_what.c_str();
- }
-
- Type type() const { return m_type; }
- std::size_t line() const { return m_line; }
- std::size_t column() const { return m_column; }
-
- private:
- Type m_type;
- std::size_t m_line;
- std::size_t m_column;
-
- mutable std::string m_what;
- };
-}
-
-#undef JSONCPP_INTERNAL_NOEXCEPT_
-
-// json-cpp/ParserError.hpp end
-//----------------------------------------------------------------------
-
-//----------------------------------------------------------------------
-// json-cpp/Stream.hpp begin
-
-namespace jsoncpp
-{
- template
- class Stream;
-
- namespace details
- {
- template
- struct Traits2 {};
-
- template
- struct ParserTraits {};
-
- template
- struct GeneratorTraits {};
- }
-
- template
- using Parser = Stream>;
-
- template
- using Generator = Stream>;
-
- template
- inline auto serialize(Stream& stream, T& value) -> decltype(value.serialize(stream), void())
- {
- value.serialize(stream);
- }
-}
-// json-cpp/Stream.hpp end
-//----------------------------------------------------------------------
-
-//----------------------------------------------------------------------
-// json-cpp/value_types.hpp begin
-
-namespace jsoncpp
-{
- // Helper masks
- const auto TypeIsNotFundamental = 0x40;
- const auto TypeIsCollection = 0x80;
-
- enum class Type
- {
- Undefined = 0, // Helper type for debugging variant-like types
- Null = 0x01,
- Boolean = 0x02,
- Number = 0x04,
- String = 0x08 | TypeIsNotFundamental,
- Array = 0x10 | TypeIsNotFundamental | TypeIsCollection,
- Object = 0x20 | TypeIsNotFundamental | TypeIsCollection,
- };
-}
-// json-cpp/value_types.hpp end
-//----------------------------------------------------------------------
-
-//----------------------------------------------------------------------
-// json-cpp/details/parser_utility.hpp begin
-
-#include
-#include
-#include
-
-namespace jsoncpp { namespace details
-{
- template
- struct CStrIterator
- {
- using this_type = CStrIterator;
-
- CStrIterator()
- {
- static CharT null{0};
- m_ptr = &null;
- }
-
- CStrIterator(const CharT* ptr) : m_ptr{ptr} {}
-
- const CharT& operator*() { return *m_ptr; }
- const CharT* operator->() { return m_ptr; }
-
- this_type& operator++()
- {
- assert(!isEnd());
- ++m_ptr;
- return *this;
- }
-
- this_type operator++(int) { auto temp = *this; ++*this; return temp; }
-
- bool operator==(const this_type& rhs) const { return isEnd() == rhs.isEnd(); }
- bool operator!=(const this_type& rhs) const { return !this->operator==(rhs); }
-
- private:
- const CharT* m_ptr;
-
- bool isEnd() const { return *m_ptr == 0; }
- };
-
- class Diagnostics
- {
- public:
- void nextColumn() { ++m_column; }
- void newLine() { ++m_line; m_column = 0; }
-
- ParserError makeError(ParserError::Type type) const
- {
- return{type, m_line, m_column};
- }
-
- private:
- std::size_t m_column{0};
- std::size_t m_line{1};
- };
-
- template
- struct Reader
- {
- using this_type = Reader;
-
- Reader(InputIterator first, InputIterator last) : m_iter(first), m_end(last)
- {
- checkEnd();
- }
-
- char operator*() { return *m_iter; }
- this_type& operator++()
- {
- checkEnd();
- ++m_iter;
- m_diag.nextColumn();
- return *this;
- }
-
- void checkEnd()
- {
- if (m_iter == m_end)
- throw m_diag.makeError(ParserError::Eof);
- }
-
- char getNextChar()
- {
- auto prev = *m_iter;
- ++*this;
- return prev;
- }
-
- Diagnostics m_diag;
- InputIterator m_iter, m_end;
- };
-}}
-
-// json-cpp/details/parser_utility.hpp end
-//----------------------------------------------------------------------
-
-//----------------------------------------------------------------------
-// json-cpp/details/number_parser.hpp begin
-
-#include
-
-namespace jsoncpp { namespace details
-{
- inline bool isDigit(char c) { return c >= '0' && c <= '9'; }
-
- template
- inline unsigned parseIntNumber(Iterator& iter)
- {
- auto intPart = 0U; // TBD: 0ULL ?
-
- do
- {
- intPart = intPart * 10 + (*iter - '0');
-
- ++iter;
- }
- while (isDigit(*iter));
-
- return intPart;
- }
-
- template
- inline double parseRealNumber(Iterator& iter)
- {
- double number = 0;
-
- if (*iter == '0')
- {
- ++iter;
- }
- else
- {
- number = parseIntNumber(iter);
- }
-
- // here `ch` is a peeked character, need to call eat()
-
- if (*iter == '.')
- {
- ++iter;
-
- auto mul = 0.1;
- while (isDigit(*iter))
- {
- number += (*iter - '0') * mul;
- mul /= 10;
- ++iter;
- }
- }
-
- // here `ch` is a peeked character, need to call eat()
-
- if (*iter == 'e' || *iter == 'E')
- {
- ++iter;
-
- auto negate = *iter == '-';
- if (negate || *iter == '+')
- ++iter;
- // FIXME: check `ch` for non-digit
-
- auto e = parseIntNumber(iter);
-
- if (negate)
- number /= std::pow(10, e);
- else
- number *= std::pow(10, e);
- }
-
- return number;
- }
-}}
-// json-cpp/details/number_parser.hpp end
-//----------------------------------------------------------------------
-
-//----------------------------------------------------------------------
-// json-cpp/details/string_parser.hpp begin
-
-#include
-
-namespace jsoncpp { namespace details
-{
- inline char32_t utf16SurrogatePairToUtf32(char32_t lead, char32_t trail)
- {
- return 0x10000 | (lead - 0xD800) << 10 | (trail - 0xDC00);
- }
-
- inline void utf32ToUtf8(char32_t c, std::string& str)
- {
- auto add = [&str](char32_t c){ str.push_back(static_cast(c)); };
-
- if (c < 0x80)
- {
- add(c);
- }
- else if (c < 0x800)
- {
- add(0xC0 | c >> 6);
- add(0x80 | (c & 0x3f));
- }
- else if (c < 0x10000)
- {
- add(0xE0 | c >> 12);
- add(0x80 | ((c >> 6) & 0x3f));
- add(0x80 | (c & 0x3f));
- }
- else if (c < 0x200000)
- {
- add(0xF0 | c >> 18);
- add(0x80 | ((c >> 12) & 0x3f));
- add(0x80 | ((c >> 6) & 0x3f));
- add(0x80 | (c & 0x3f));
- }
- else if (c < 0x4000000)
- {
- add(0xF8 | c >> 24);
- add(0x80 | ((c >> 18) & 0x3f));
- add(0x80 | ((c >> 12) & 0x3f));
- add(0x80 | ((c >> 6) & 0x3f));
- add(0x80 | (c & 0x3f));
- }
- else
- {
- add(0xFC | c >> 30);
- add(0x80 | ((c >> 24) & 0x3f));
- add(0x80 | ((c >> 18) & 0x3f));
- add(0x80 | ((c >> 12) & 0x3f));
- add(0x80 | ((c >> 6) & 0x3f));
- add(0x80 | (c & 0x3f));
- }
- }
-
- enum class CharType { Raw, CodePoint, UTF16Pair };
-
- template
- inline void addToStr(std::basic_string& str, CharType type, char32_t c1, char32_t c2);
-
- template<>
- inline void addToStr(std::basic_string& str, CharType type, char32_t c1, char32_t c2)
- {
- if (type == CharType::Raw)
- {
- str.push_back(static_cast(c1));
- }
- else if (type == CharType::CodePoint)
- {
- utf32ToUtf8(c1, str);
- }
- else
- {
- auto c32 = utf16SurrogatePairToUtf32(c1, c2);
- utf32ToUtf8(c32, str);
- }
- }
-
- template<>
- inline void addToStr(std::basic_string& str, CharType type, char32_t c1, char32_t c2)
- {
- str.push_back(static_cast(c1));
- if (type == CharType::UTF16Pair)
- str.push_back(static_cast(c2));
- }
-
- template<>
- inline void addToStr(std::basic_string& str, CharType type, char32_t c1, char32_t c2)
- {
- auto c = (type == CharType::UTF16Pair) ? utf16SurrogatePairToUtf32(c1, c2) : c1;
- str.push_back(static_cast(c));
- }
-
- template
- inline int parseHexDigit(Iterator& iter, ParserError::Type& err)
- {
- auto ch = *iter;
- ++iter;
- if (ch >= '0' && ch <= '9') return ch - '0';
- if (ch >= 'A' && ch <= 'F') return ch - 'A' + 10;
- if (ch >= 'a' && ch <= 'f') return ch - 'a' + 10;
-
- err = ParserError::InvalidEscapeSequence;
- return 0;
- }
-
- template
- inline char32_t parseUTF16CodeUnit(Iterator& iter, ParserError::Type& err)
- {
- auto n = parseHexDigit(iter, err) << 12;
- n |= parseHexDigit(iter, err) << 8;
- n |= parseHexDigit(iter, err) << 4;
- n |= parseHexDigit(iter, err);
- return static_cast(n);
- }
-
- template
- inline ParserError::Type parseStringImpl(Iterator& iter, std::basic_string& str)
- {
- str.clear();
- auto add = [&str](CharType type, char32_t c1, char32_t c2)
- {
- addToStr(str, type, c1, c2);
- };
-
- for (;;)
- {
- auto ch = static_cast(*iter);
- ++iter;
- if (ch == '"')
- return ParserError::NoError;
-
- if (ch == '\\')
- {
- ch = static_cast(*iter);
- ++iter;
- switch (ch)
- {
- case '\\': case '"': case '/':
- break;
-
- case 'b': ch = '\b'; break;
- case 'f': ch = '\f'; break;
- case 'n': ch = '\n'; break;
- case 'r': ch = '\r'; break;
- case 't': ch = '\t'; break;
-
- case 'u':
- {
- ParserError::Type err{ParserError::NoError};
- auto codeUnit = parseUTF16CodeUnit(iter, err);
- if (err != ParserError::NoError)
- return err;
-
- if (codeUnit >= 0xD800 && codeUnit < 0xDC00)
- {
- if (*iter != '\\') return ParserError::NoTrailSurrogate;
- ++iter;
- if (*iter != 'u') return ParserError::NoTrailSurrogate;
- ++iter;
-
- auto trailSurrogate = parseUTF16CodeUnit(iter, err);
- if (err != ParserError::NoError)
- return err;
-
- add(CharType::UTF16Pair, codeUnit, trailSurrogate);
- }
- else
- {
- add(CharType::CodePoint, codeUnit, 0);
- }
- }
- continue;
-
- default:
- return ParserError::InvalidEscapeSequence;
- }
- }
-
- add(CharType::Raw, ch, 0);
- }
- }
-}}
-
-// json-cpp/details/string_parser.hpp end
-//----------------------------------------------------------------------
-
-namespace jsoncpp
-{
- template
- class Stream>>
- {
- public:
- using this_type = Parser>;
-
- explicit Stream(InputIterator first, InputIterator last)
- : m_reader{first, last}
- {
- nextValue();
- }
-
- Type getType() const { return m_type; }
- bool getBoolean() const { return m_boolean; }
- double getNumber() const { return m_number; }
- const std::string& getFieldName() const { return m_fieldName; }
-
- void checkType(Type type) const
- {
- if (getType() != type)
- throw makeError(ParserError::UnexpectedType);
- }
-
- bool isListEnd(char terminator)
- {
- eatWhitespace();
- if (*m_reader != terminator)
- return false;
-
- ++m_reader;
- return true;
- }
-
- void eatListSeparator()
- {
- eatWhitespace();
- check(',');
- eatWhitespace();
- }
-
- void nextNameValuePair()
- {
- eatWhitespace();
- check('"');
- parseString(m_fieldName);
- eatWhitespace();
- check(':');
- nextValue();
- }
-
- void nextValue()
- {
- eatWhitespace();
- m_type = nextValueImpl();
- }
-
- template
- void parseString(std::basic_string& str)
- {
- auto err = parseStringImpl(m_reader, str);
- if (err != ParserError::NoError)
- throw m_reader.m_diag.makeError(err);
- }
-
- ParserError makeError(ParserError::Type type) const
- {
- return m_reader.m_diag.makeError(type);
- }
-
- private:
- Type nextValueImpl()
- {
- switch (*m_reader)
- {
- case '{': ++m_reader; return Type::Object;
- case '[': ++m_reader; return Type::Array;
- case 't': ++m_reader; checkLiteral("true"); m_boolean = true; return Type::Boolean;
- case 'f': ++m_reader; checkLiteral("false"); m_boolean = false; return Type::Boolean;
- case 'n': ++m_reader; checkLiteral("null"); return Type::Null;
- case '"': ++m_reader; return Type::String;
-
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- m_number = parseRealNumber(m_reader);
- return Type::Number;
-
- case '-':
- ++m_reader;
- m_number = -parseRealNumber(m_reader);
- return Type::Number;
- }
-
- throw unexpectedCharacter();
- }
-
- ParserError unexpectedCharacter() const
- {
- return makeError(ParserError::UnexpectedCharacter);
- }
-
- void check(char expectedChar)
- {
- if (*m_reader != expectedChar)
- throw unexpectedCharacter();
-
- ++m_reader;
- }
-
- template
- void checkLiteral(const char(&literal)[N])
- {
- static_assert(N > 2, "");
- for (auto i = 1; i != N - 1; ++i, ++m_reader)
- if (*m_reader != literal[i])
- throw unexpectedCharacter();
- }
-
- void eatWhitespace()
- {
- for (;; ++m_reader)
- {
- switch (*m_reader)
- {
- case '/':
- ++m_reader;
- check('/');
- while (*m_reader != '\n')
- ++m_reader;
-
- // no break here
- case '\n':
- m_reader.m_diag.newLine();
- break;
-
- case ' ': case '\t': case '\r':
- break;
-
- default:
- return;
- }
- }
- }
-
- details::Reader m_reader;
-
- Type m_type;
- double m_number;
- bool m_boolean;
- std::string m_fieldName;
- };
-
- template
- inline void serialize(Parser& parser, bool& value)
- {
- parser.checkType(Type::Boolean);
- value = parser.getBoolean();
- }
-
- template
- inline typename std::enable_if::value>::type
- serialize(Parser& parser, T& value)
- {
- parser.checkType(Type::Number);
- auto number = parser.getNumber();
- value = static_cast(number);
- if (value != number)
- throw parser.makeError(ParserError::NumberIsOutOfRange);
- }
-
- template
- inline void serialize(Parser& parser, std::basic_string& value)
- {
- parser.checkType(Type::String);
- parser.parseString(value);
- }
-
- namespace details
- {
- template
- inline void parseList(Parser& parser, Type type, char terminator, Callback&& callback)
- {
- parser.checkType(type);
-
- while (!parser.isListEnd(terminator))
- {
- callback();
-
- if (parser.isListEnd(terminator))
- return;
-
- parser.eatListSeparator();
- }
- }
- }
-
- template
- inline void parseObject(Parser& parser, Callback&& callback)
- {
- details::parseList(parser, Type::Object, '}', [&]
- {
- parser.nextNameValuePair();
- callback(parser.getFieldName());
- });
- }
-
- template
- void parseArray(Parser& parser, Callback&& callback)
- {
- details::parseList(parser, Type::Array, ']', [&]
- {
- parser.nextValue();
- callback();
- });
- }
-
- template
- inline void parse(T& object, InputIterator first, InputIterator last)
- {
- Parser> stream{first, last};
- serialize(stream, object);
- }
-
- template
- inline void parse(T& object, const CharT* str)
- {
- details::CStrIterator first{str}, last;
- parse(object, first, last);
- }
-
- template
- inline void parse(T& object, std::basic_string& str)
- {
- parse(object, std::begin(str), std::end(str));
- }
-
- template
- inline void parse(T& object, std::basic_istream& stream)
- {
- std::istreambuf_iterator first{stream}, last;
- parse(object, first, last);
- }
-}
-
-// json-cpp/parse.hpp end
-//----------------------------------------------------------------------
-
-//----------------------------------------------------------------------
-// json-cpp/std_types.hpp begin
-
-#include
-#include
-#include
-#include