From 41046f6ac02a1d947d656d033ecb53a249e7cc7e Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Sun, 20 Jul 2014 15:25:43 +0200 Subject: cleanup --- .gitignore | 5 + fast/.gitignore | 5 - fast/Makefile | 11 +- fast/hypergraph.cc | 16 +- fast/hypergraph.hh | 2 +- fast/json-cpp.hpp | 1231 ---------------------------------------------------- fast/main.cc | 7 - fast/make_paks.cc | 110 ----- fast/read_pak.cc | 27 -- util/Makefile | 14 + util/json-cpp.hpp | 1231 ++++++++++++++++++++++++++++++++++++++++++++++++++++ util/make_pak.cc | 108 +++++ util/read_pak.cc | 28 ++ 13 files changed, 1403 insertions(+), 1392 deletions(-) create mode 100644 .gitignore delete mode 100644 fast/.gitignore delete mode 100644 fast/json-cpp.hpp delete mode 100644 fast/make_paks.cc delete mode 100644 fast/read_pak.cc create mode 100644 util/Makefile create mode 100644 util/json-cpp.hpp create mode 100644 util/make_pak.cc create mode 100644 util/read_pak.cc diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..00b0e1a --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +*.o +fast/example/ +fast/fast_weaver +util/make_pak +util/read_pak diff --git a/fast/.gitignore b/fast/.gitignore deleted file mode 100644 index c37a566..0000000 --- a/fast/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -fast_weaver -*.o -data -make_paks -read_pak diff --git a/fast/Makefile b/fast/Makefile index 55c4df7..2d2ba68 100644 --- a/fast/Makefile +++ b/fast/Makefile @@ -1,20 +1,15 @@ COMPILER=clang + all: hypergraph.o main.cc $(COMPILER) -std=c++11 -lstdc++ -lm -lmsgpack hypergraph.o main.cc -o fast_weaver hypergraph.o: hypergraph.cc hypergraph.hh grammar.o semiring.hh - $(COMPILER) -g -std=c++11 -lmsgpack -c hypergraph.cc + $(COMPILER) -g -std=c++11 -c hypergraph.cc grammar.o: grammar.cc grammar.hh $(COMPILER) -g -std=c++11 -c grammar.cc -make_paks: make_paks.cc - $(COMPILER) -std=c++11 -lstdc++ -lm -lmsgpack make_paks.cc -o make_paks - -read_pak: read_pak.cc - $(COMPILER) -std=c++11 -lmsgpack read_pak.cc -o read_pak - clean: - rm -f fast_weaver hypergraph.o grammar.o make_paks read_pak + rm -f fast_weaver hypergraph.o grammar.o diff --git a/fast/hypergraph.cc b/fast/hypergraph.cc index c3c587c..a01fb3e 100644 --- a/fast/hypergraph.cc +++ b/fast/hypergraph.cc @@ -74,9 +74,11 @@ topological_sort(list& nodes, list::iterator root) auto to = nodes.begin(); while (to != nodes.end()) { if ((**p).is_marked()) { + cout << **p<< endl; // explore edges for (auto e = (**p).outgoing.begin(); e!=(**p).outgoing.end(); ++e) { (**e).mark++; + cout << " " << **e << endl; if ((**e).is_marked()) { (**e).head->mark++; } @@ -88,10 +90,17 @@ topological_sort(list& nodes, list::iterator root) p = to; } else { ++p; - if (p == nodes.end()) { - p = next(to); + /*if (p == nodes.end()) { + for (auto e = (**to).outgoing.begin(); e!=(**to).outgoing.end(); ++e) { + // explore edges + (**e).mark++; + if ((**e).is_marked()) { + (**e).head->mark++; + } + } to = next(to); - } + p = to; + }*/ } } cout << "---" << endl; @@ -151,6 +160,7 @@ read(Hypergraph& hg, string fn) e->head = hg.nodes_by_id[e->head_id_]; hg.edges.push_back(e); hg.nodes_by_id[e->head_id_]->incoming.push_back(e); + e->arity = 0; for (auto it = e->tails_ids_.begin(); it != e->tails_ids_.end(); ++it) { hg.nodes_by_id[*it]->outgoing.push_back(e); e->tails.push_back(hg.nodes_by_id[*it]); diff --git a/fast/hypergraph.hh b/fast/hypergraph.hh index 530fbe6..5a68742 100644 --- a/fast/hypergraph.hh +++ b/fast/hypergraph.hh @@ -32,7 +32,7 @@ struct Edge { score_t score; string rule; //FIXME DummyVector f; //FIXME - unsigned int arity; + unsigned int arity = 0; unsigned int mark = 0; inline bool is_marked() { return mark >= arity; } diff --git a/fast/json-cpp.hpp b/fast/json-cpp.hpp deleted file mode 100644 index 851a4f4..0000000 --- a/fast/json-cpp.hpp +++ /dev/null @@ -1,1231 +0,0 @@ -// -// DO NOT EDIT !!! This file was generated with a script. -// -// JSON for C++ -// https://github.com/ascheglov/json-cpp -// Version 0.1 alpha, rev. 170121e2dc099895064305e38bfb25d90a807ce3 -// Generated 2014-03-27 17:16:47.104492 UTC -// -// Belongs to the public domain - -#pragma once - -//---------------------------------------------------------------------- -// json-cpp.hpp begin - -//---------------------------------------------------------------------- -// json-cpp/parse.hpp begin - -#include -#include -#include -#include -#include - -//---------------------------------------------------------------------- -// json-cpp/ParserError.hpp begin - -#include -#include -#include -#include - -#if defined _MSC_VER -# define JSONCPP_INTERNAL_NOEXCEPT_ throw() -#else -# define JSONCPP_INTERNAL_NOEXCEPT_ noexcept -#endif - -namespace jsoncpp -{ - class ParserError : public std::exception - { - public: - enum Type - { - NoError, - Eof, UnexpectedCharacter, - InvalidEscapeSequence, NoTrailSurrogate, - UnexpectedType, UnknownField, - NumberIsOutOfRange, - }; - - ParserError(Type type, std::size_t line, std::size_t column) - : m_type{type}, m_line{line}, m_column{column} - { - assert(type != NoError); - } - - virtual const char* what() const JSONCPP_INTERNAL_NOEXCEPT_ override - { - if (m_what.empty()) - { - m_what = "JSON parser error at line "; - m_what += std::to_string(m_line); - m_what += ", column "; - m_what += std::to_string(m_column); - switch (m_type) - { - case Eof: m_what += ": unexpected end of file"; break; - case UnexpectedCharacter: m_what += ": unexpected character"; break; - case InvalidEscapeSequence: m_what += ": invalid escape sequence"; break; - case NoTrailSurrogate: m_what += ": no UTF-16 trail surrogate"; break; - case UnexpectedType: m_what += ": unexpected value type"; break; - case UnknownField: m_what += ": unknown field name"; break; - case NumberIsOutOfRange: m_what += ": number is out of range"; break; - case NoError: - default: - m_what += ": INTERNAL ERROR"; break; - } - } - - return m_what.c_str(); - } - - Type type() const { return m_type; } - std::size_t line() const { return m_line; } - std::size_t column() const { return m_column; } - - private: - Type m_type; - std::size_t m_line; - std::size_t m_column; - - mutable std::string m_what; - }; -} - -#undef JSONCPP_INTERNAL_NOEXCEPT_ - -// json-cpp/ParserError.hpp end -//---------------------------------------------------------------------- - -//---------------------------------------------------------------------- -// json-cpp/Stream.hpp begin - -namespace jsoncpp -{ - template - class Stream; - - namespace details - { - template - struct Traits2 {}; - - template - struct ParserTraits {}; - - template - struct GeneratorTraits {}; - } - - template - using Parser = Stream>; - - template - using Generator = Stream>; - - template - inline auto serialize(Stream& stream, T& value) -> decltype(value.serialize(stream), void()) - { - value.serialize(stream); - } -} -// json-cpp/Stream.hpp end -//---------------------------------------------------------------------- - -//---------------------------------------------------------------------- -// json-cpp/value_types.hpp begin - -namespace jsoncpp -{ - // Helper masks - const auto TypeIsNotFundamental = 0x40; - const auto TypeIsCollection = 0x80; - - enum class Type - { - Undefined = 0, // Helper type for debugging variant-like types - Null = 0x01, - Boolean = 0x02, - Number = 0x04, - String = 0x08 | TypeIsNotFundamental, - Array = 0x10 | TypeIsNotFundamental | TypeIsCollection, - Object = 0x20 | TypeIsNotFundamental | TypeIsCollection, - }; -} -// json-cpp/value_types.hpp end -//---------------------------------------------------------------------- - -//---------------------------------------------------------------------- -// json-cpp/details/parser_utility.hpp begin - -#include -#include -#include - -namespace jsoncpp { namespace details -{ - template - struct CStrIterator - { - using this_type = CStrIterator; - - CStrIterator() - { - static CharT null{0}; - m_ptr = &null; - } - - CStrIterator(const CharT* ptr) : m_ptr{ptr} {} - - const CharT& operator*() { return *m_ptr; } - const CharT* operator->() { return m_ptr; } - - this_type& operator++() - { - assert(!isEnd()); - ++m_ptr; - return *this; - } - - this_type operator++(int) { auto temp = *this; ++*this; return temp; } - - bool operator==(const this_type& rhs) const { return isEnd() == rhs.isEnd(); } - bool operator!=(const this_type& rhs) const { return !this->operator==(rhs); } - - private: - const CharT* m_ptr; - - bool isEnd() const { return *m_ptr == 0; } - }; - - class Diagnostics - { - public: - void nextColumn() { ++m_column; } - void newLine() { ++m_line; m_column = 0; } - - ParserError makeError(ParserError::Type type) const - { - return{type, m_line, m_column}; - } - - private: - std::size_t m_column{0}; - std::size_t m_line{1}; - }; - - template - struct Reader - { - using this_type = Reader; - - Reader(InputIterator first, InputIterator last) : m_iter(first), m_end(last) - { - checkEnd(); - } - - char operator*() { return *m_iter; } - this_type& operator++() - { - checkEnd(); - ++m_iter; - m_diag.nextColumn(); - return *this; - } - - void checkEnd() - { - if (m_iter == m_end) - throw m_diag.makeError(ParserError::Eof); - } - - char getNextChar() - { - auto prev = *m_iter; - ++*this; - return prev; - } - - Diagnostics m_diag; - InputIterator m_iter, m_end; - }; -}} - -// json-cpp/details/parser_utility.hpp end -//---------------------------------------------------------------------- - -//---------------------------------------------------------------------- -// json-cpp/details/number_parser.hpp begin - -#include - -namespace jsoncpp { namespace details -{ - inline bool isDigit(char c) { return c >= '0' && c <= '9'; } - - template - inline unsigned parseIntNumber(Iterator& iter) - { - auto intPart = 0U; // TBD: 0ULL ? - - do - { - intPart = intPart * 10 + (*iter - '0'); - - ++iter; - } - while (isDigit(*iter)); - - return intPart; - } - - template - inline double parseRealNumber(Iterator& iter) - { - double number = 0; - - if (*iter == '0') - { - ++iter; - } - else - { - number = parseIntNumber(iter); - } - - // here `ch` is a peeked character, need to call eat() - - if (*iter == '.') - { - ++iter; - - auto mul = 0.1; - while (isDigit(*iter)) - { - number += (*iter - '0') * mul; - mul /= 10; - ++iter; - } - } - - // here `ch` is a peeked character, need to call eat() - - if (*iter == 'e' || *iter == 'E') - { - ++iter; - - auto negate = *iter == '-'; - if (negate || *iter == '+') - ++iter; - // FIXME: check `ch` for non-digit - - auto e = parseIntNumber(iter); - - if (negate) - number /= std::pow(10, e); - else - number *= std::pow(10, e); - } - - return number; - } -}} -// json-cpp/details/number_parser.hpp end -//---------------------------------------------------------------------- - -//---------------------------------------------------------------------- -// json-cpp/details/string_parser.hpp begin - -#include - -namespace jsoncpp { namespace details -{ - inline char32_t utf16SurrogatePairToUtf32(char32_t lead, char32_t trail) - { - return 0x10000 | (lead - 0xD800) << 10 | (trail - 0xDC00); - } - - inline void utf32ToUtf8(char32_t c, std::string& str) - { - auto add = [&str](char32_t c){ str.push_back(static_cast(c)); }; - - if (c < 0x80) - { - add(c); - } - else if (c < 0x800) - { - add(0xC0 | c >> 6); - add(0x80 | (c & 0x3f)); - } - else if (c < 0x10000) - { - add(0xE0 | c >> 12); - add(0x80 | ((c >> 6) & 0x3f)); - add(0x80 | (c & 0x3f)); - } - else if (c < 0x200000) - { - add(0xF0 | c >> 18); - add(0x80 | ((c >> 12) & 0x3f)); - add(0x80 | ((c >> 6) & 0x3f)); - add(0x80 | (c & 0x3f)); - } - else if (c < 0x4000000) - { - add(0xF8 | c >> 24); - add(0x80 | ((c >> 18) & 0x3f)); - add(0x80 | ((c >> 12) & 0x3f)); - add(0x80 | ((c >> 6) & 0x3f)); - add(0x80 | (c & 0x3f)); - } - else - { - add(0xFC | c >> 30); - add(0x80 | ((c >> 24) & 0x3f)); - add(0x80 | ((c >> 18) & 0x3f)); - add(0x80 | ((c >> 12) & 0x3f)); - add(0x80 | ((c >> 6) & 0x3f)); - add(0x80 | (c & 0x3f)); - } - } - - enum class CharType { Raw, CodePoint, UTF16Pair }; - - template - inline void addToStr(std::basic_string& str, CharType type, char32_t c1, char32_t c2); - - template<> - inline void addToStr(std::basic_string& str, CharType type, char32_t c1, char32_t c2) - { - if (type == CharType::Raw) - { - str.push_back(static_cast(c1)); - } - else if (type == CharType::CodePoint) - { - utf32ToUtf8(c1, str); - } - else - { - auto c32 = utf16SurrogatePairToUtf32(c1, c2); - utf32ToUtf8(c32, str); - } - } - - template<> - inline void addToStr(std::basic_string& str, CharType type, char32_t c1, char32_t c2) - { - str.push_back(static_cast(c1)); - if (type == CharType::UTF16Pair) - str.push_back(static_cast(c2)); - } - - template<> - inline void addToStr(std::basic_string& str, CharType type, char32_t c1, char32_t c2) - { - auto c = (type == CharType::UTF16Pair) ? utf16SurrogatePairToUtf32(c1, c2) : c1; - str.push_back(static_cast(c)); - } - - template - inline int parseHexDigit(Iterator& iter, ParserError::Type& err) - { - auto ch = *iter; - ++iter; - if (ch >= '0' && ch <= '9') return ch - '0'; - if (ch >= 'A' && ch <= 'F') return ch - 'A' + 10; - if (ch >= 'a' && ch <= 'f') return ch - 'a' + 10; - - err = ParserError::InvalidEscapeSequence; - return 0; - } - - template - inline char32_t parseUTF16CodeUnit(Iterator& iter, ParserError::Type& err) - { - auto n = parseHexDigit(iter, err) << 12; - n |= parseHexDigit(iter, err) << 8; - n |= parseHexDigit(iter, err) << 4; - n |= parseHexDigit(iter, err); - return static_cast(n); - } - - template - inline ParserError::Type parseStringImpl(Iterator& iter, std::basic_string& str) - { - str.clear(); - auto add = [&str](CharType type, char32_t c1, char32_t c2) - { - addToStr(str, type, c1, c2); - }; - - for (;;) - { - auto ch = static_cast(*iter); - ++iter; - if (ch == '"') - return ParserError::NoError; - - if (ch == '\\') - { - ch = static_cast(*iter); - ++iter; - switch (ch) - { - case '\\': case '"': case '/': - break; - - case 'b': ch = '\b'; break; - case 'f': ch = '\f'; break; - case 'n': ch = '\n'; break; - case 'r': ch = '\r'; break; - case 't': ch = '\t'; break; - - case 'u': - { - ParserError::Type err{ParserError::NoError}; - auto codeUnit = parseUTF16CodeUnit(iter, err); - if (err != ParserError::NoError) - return err; - - if (codeUnit >= 0xD800 && codeUnit < 0xDC00) - { - if (*iter != '\\') return ParserError::NoTrailSurrogate; - ++iter; - if (*iter != 'u') return ParserError::NoTrailSurrogate; - ++iter; - - auto trailSurrogate = parseUTF16CodeUnit(iter, err); - if (err != ParserError::NoError) - return err; - - add(CharType::UTF16Pair, codeUnit, trailSurrogate); - } - else - { - add(CharType::CodePoint, codeUnit, 0); - } - } - continue; - - default: - return ParserError::InvalidEscapeSequence; - } - } - - add(CharType::Raw, ch, 0); - } - } -}} - -// json-cpp/details/string_parser.hpp end -//---------------------------------------------------------------------- - -namespace jsoncpp -{ - template - class Stream>> - { - public: - using this_type = Parser>; - - explicit Stream(InputIterator first, InputIterator last) - : m_reader{first, last} - { - nextValue(); - } - - Type getType() const { return m_type; } - bool getBoolean() const { return m_boolean; } - double getNumber() const { return m_number; } - const std::string& getFieldName() const { return m_fieldName; } - - void checkType(Type type) const - { - if (getType() != type) - throw makeError(ParserError::UnexpectedType); - } - - bool isListEnd(char terminator) - { - eatWhitespace(); - if (*m_reader != terminator) - return false; - - ++m_reader; - return true; - } - - void eatListSeparator() - { - eatWhitespace(); - check(','); - eatWhitespace(); - } - - void nextNameValuePair() - { - eatWhitespace(); - check('"'); - parseString(m_fieldName); - eatWhitespace(); - check(':'); - nextValue(); - } - - void nextValue() - { - eatWhitespace(); - m_type = nextValueImpl(); - } - - template - void parseString(std::basic_string& str) - { - auto err = parseStringImpl(m_reader, str); - if (err != ParserError::NoError) - throw m_reader.m_diag.makeError(err); - } - - ParserError makeError(ParserError::Type type) const - { - return m_reader.m_diag.makeError(type); - } - - private: - Type nextValueImpl() - { - switch (*m_reader) - { - case '{': ++m_reader; return Type::Object; - case '[': ++m_reader; return Type::Array; - case 't': ++m_reader; checkLiteral("true"); m_boolean = true; return Type::Boolean; - case 'f': ++m_reader; checkLiteral("false"); m_boolean = false; return Type::Boolean; - case 'n': ++m_reader; checkLiteral("null"); return Type::Null; - case '"': ++m_reader; return Type::String; - - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - m_number = parseRealNumber(m_reader); - return Type::Number; - - case '-': - ++m_reader; - m_number = -parseRealNumber(m_reader); - return Type::Number; - } - - throw unexpectedCharacter(); - } - - ParserError unexpectedCharacter() const - { - return makeError(ParserError::UnexpectedCharacter); - } - - void check(char expectedChar) - { - if (*m_reader != expectedChar) - throw unexpectedCharacter(); - - ++m_reader; - } - - template - void checkLiteral(const char(&literal)[N]) - { - static_assert(N > 2, ""); - for (auto i = 1; i != N - 1; ++i, ++m_reader) - if (*m_reader != literal[i]) - throw unexpectedCharacter(); - } - - void eatWhitespace() - { - for (;; ++m_reader) - { - switch (*m_reader) - { - case '/': - ++m_reader; - check('/'); - while (*m_reader != '\n') - ++m_reader; - - // no break here - case '\n': - m_reader.m_diag.newLine(); - break; - - case ' ': case '\t': case '\r': - break; - - default: - return; - } - } - } - - details::Reader m_reader; - - Type m_type; - double m_number; - bool m_boolean; - std::string m_fieldName; - }; - - template - inline void serialize(Parser& parser, bool& value) - { - parser.checkType(Type::Boolean); - value = parser.getBoolean(); - } - - template - inline typename std::enable_if::value>::type - serialize(Parser& parser, T& value) - { - parser.checkType(Type::Number); - auto number = parser.getNumber(); - value = static_cast(number); - if (value != number) - throw parser.makeError(ParserError::NumberIsOutOfRange); - } - - template - inline void serialize(Parser& parser, std::basic_string& value) - { - parser.checkType(Type::String); - parser.parseString(value); - } - - namespace details - { - template - inline void parseList(Parser& parser, Type type, char terminator, Callback&& callback) - { - parser.checkType(type); - - while (!parser.isListEnd(terminator)) - { - callback(); - - if (parser.isListEnd(terminator)) - return; - - parser.eatListSeparator(); - } - } - } - - template - inline void parseObject(Parser& parser, Callback&& callback) - { - details::parseList(parser, Type::Object, '}', [&] - { - parser.nextNameValuePair(); - callback(parser.getFieldName()); - }); - } - - template - void parseArray(Parser& parser, Callback&& callback) - { - details::parseList(parser, Type::Array, ']', [&] - { - parser.nextValue(); - callback(); - }); - } - - template - inline void parse(T& object, InputIterator first, InputIterator last) - { - Parser> stream{first, last}; - serialize(stream, object); - } - - template - inline void parse(T& object, const CharT* str) - { - details::CStrIterator first{str}, last; - parse(object, first, last); - } - - template - inline void parse(T& object, std::basic_string& str) - { - parse(object, std::begin(str), std::end(str)); - } - - template - inline void parse(T& object, std::basic_istream& stream) - { - std::istreambuf_iterator first{stream}, last; - parse(object, first, last); - } -} - -// json-cpp/parse.hpp end -//---------------------------------------------------------------------- - -//---------------------------------------------------------------------- -// json-cpp/std_types.hpp begin - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -//---------------------------------------------------------------------- -// json-cpp/generate.hpp begin - -#include -#include - -//---------------------------------------------------------------------- -// json-cpp/details/string_writer.hpp begin - -#include - -namespace jsoncpp { namespace details -{ - template - inline void writeString(const std::basic_string& str, Sink&& sink) - { - sink('"'); - for (auto iter = std::begin(str), last = std::end(str); iter != last; ++iter) - { - switch (char32_t ch = static_cast(*iter)) - { - case '"': sink('\\'); sink('"'); break; - case '\\': sink('\\'); sink('\\'); break; - case '\b': sink('\\'); sink('b'); break; - case '\f': sink('\\'); sink('f'); break; - case '\n': sink('\\'); sink('n'); break; - case '\r': sink('\\'); sink('r'); break; - case '\t': sink('\\'); sink('t'); break; - default: - if (ch < '\x20') - { - const auto table = "0123456789ABCDEF"; - unsigned n = static_cast(ch); - sink('\\'); - sink('u'); - sink('0'); - sink('0'); - sink(table[n >> 4]); - sink(table[n & 15]); - } - else - { - sink(static_cast(ch)); - } - } - } - sink('"'); - } -}} - -// json-cpp/details/string_writer.hpp end -//---------------------------------------------------------------------- - -namespace jsoncpp -{ - template - class Stream>> - { - public: - using this_type = Generator>; - - explicit Stream(Sink& sink) : m_sink(&sink) {} - - void objectBegin() - { - (*m_sink) << "{"; - } - - void fieldName(const char* name) - { - (*m_sink) << '"' << name << "\": "; - // TODO: use writeString (?) - } - - template - void fieldName(const std::basic_string& name) - { - (*m_sink) << '"' << name << "\": "; - // TODO: use writeString (?) - } - - void separator() - { - (*m_sink) << ", "; - } - - void objectEnd() - { - (*m_sink) << '}'; - } - - void arrayBegin() - { - (*m_sink) << '['; - } - - void arrayEnd() - { - (*m_sink) << ']'; - } - - friend void serialize(this_type& stream, std::nullptr_t) - { - (*stream.m_sink) << "null"; - } - - friend void serialize(this_type& stream, bool value) - { - (*stream.m_sink) << (value ? "true" : "false"); - } - - template - friend typename std::enable_if::value>::type serialize(this_type& stream, T& value) - { - (*stream.m_sink) << value; - } - - template - friend void serialize(this_type& stream, const std::basic_string& value) - { - details::writeString(value, [&stream](char c){ stream.m_sink->put(c); }); - } - - private: - Sink* m_sink; - }; - - template - inline void writePointer(Generator& generator, Pointer& ptr) - { - if (ptr) - { - serialize(generator, *ptr); - } - else - { - serialize(generator, nullptr); - } - } - - template - inline void writeRange(Generator& generator, Range& range) - { - generator.arrayBegin(); - - auto iter = std::begin(range); - const auto& last = std::end(range); - if (iter != last) - { - for (;;) - { - serialize(generator, *iter); - - ++iter; - if (iter == last) - break; - - generator.separator(); - } - } - - generator.arrayEnd(); - } - - template - inline std::string to_string(const T& object) - { - std::ostringstream rawStream; - Generator> stream{rawStream}; - serialize(stream, const_cast(object)); - return rawStream.str(); - } -} - -// json-cpp/generate.hpp end -//---------------------------------------------------------------------- - -namespace jsoncpp -{ - template - inline void serialize(Parser& parser, std::shared_ptr& obj) - { - if (parser.getType() != jsoncpp::Type::Null) - { - obj = std::make_shared(); - serialize(parser, *obj); - } - else - { - obj.reset(); - } - } - - template - inline void serialize(Generator& generator, std::shared_ptr& obj) - { - writePointer(generator, obj); - } - - template - inline void serialize(Parser& parser, std::unique_ptr& obj) - { - if (parser.getType() != jsoncpp::Type::Null) - { - obj->reset(new T()); - serialize(parser, *obj); - } - else - { - obj.reset(); - } - } - - template - inline void serialize(Generator& generator, std::unique_ptr& obj) - { - writePointer(generator, obj); - } - - namespace details - { - template - inline void serializeContainer(Parser& parser, C& c) - { - c.clear(); - - parseArray(parser, [&] - { - c.emplace_back(); - serialize(parser, c.back()); - }); - } - - template - inline void serializeContainer(Generator& generator, C& c) - { - writeRange(generator, c); - } - - template - inline void serializeSet(Parser& parser, C& c) - { - c.clear(); - - parseArray(parser, [&] - { - typename C::value_type value; - serialize(parser, value); - c.insert(value); - }); - } - - template - inline void serializeSet(Generator& generator, C& c) - { - writeRange(generator, c); - } - - template - inline void serializeStrMap(Parser& parser, C& c) - { - c.clear(); - - parseObject(parser, [&](const std::string& name) - { - serialize(parser, c[name]); - }); - } - - template - inline void serializeStrMap(Generator& generator, C& c) - { - generator.objectBegin(); - - auto iter = std::begin(c); - const auto& last = std::end(c); - if (iter != last) - { - for (;;) - { - generator.fieldName(iter->first); - serialize(generator, iter->second); - - ++iter; - if (iter == last) - break; - - generator.separator(); - } - } - - generator.objectEnd(); - } - } - - template - inline void serialize(Stream& stream, std::vector& arr) - { details::serializeContainer(stream, arr); } - - template - inline void serialize(Stream& stream, std::list& arr) - { details::serializeContainer(stream, arr); } - - template - inline void serialize(Stream& stream, std::forward_list& arr) - { details::serializeContainer(stream, arr); } - - template - inline void serialize(Stream& stream, std::deque& arr) - { details::serializeContainer(stream, arr); } - - template - inline void serialize(Stream& stream, std::set& arr) - { details::serializeSet(stream, arr); } - - template - inline void serialize(Stream& stream, std::unordered_set& arr) - { details::serializeSet(stream, arr); } - - template - inline void serialize(Stream& stream, std::map& t) - { details::serializeStrMap(stream, t); } - - template - inline void serialize(Stream& stream, std::unordered_map& t) - { details::serializeStrMap(stream, t); } -} -// json-cpp/std_types.hpp end -//---------------------------------------------------------------------- - -//---------------------------------------------------------------------- -// json-cpp/serialization_helpers.hpp begin - -#include -#include - -namespace jsoncpp -{ - namespace details - { - template - inline void writeField(Generator& generator, const char* name, T& value) - { - generator.fieldName(name); - serialize(generator, value); - } - - template - inline void writeField(Generator& generator, const char* name, T& value, F&&... fieldsDef) - { - writeField(generator, name, value); - generator.separator(); - writeField(generator, fieldsDef...); - } - - template - class FieldsTable - { - public: - template - FieldsTable(F&&... fieldsDef) - { - m_map.reserve(sizeof...(fieldsDef) / 2); - add(1, fieldsDef...); - } - - struct FieldInfo - { - template - FieldInfo(T&, std::size_t idx) - { - m_fieldIdx = idx; - m_parseFn = [](ParserT& parser, void* fieldPtr) - { - serialize(parser, static_cast(*reinterpret_cast(fieldPtr))); - }; - } - - std::size_t m_fieldIdx; - void(*m_parseFn)(ParserT& parser, void* fieldPtr); - }; - - const FieldInfo* find(const std::string& name) const - { - auto it = m_map.find(name); - return it == m_map.end() ? nullptr : &it->second; - } - - private: - template - void add(std::size_t idx, const char* name, T& value, F&&... otherFields) - { - m_map.emplace(name, FieldInfo(value, idx)); - add(idx + 2, otherFields...); - } - - void add(std::size_t /*idx*/) {} - - std::unordered_map m_map; - }; - - inline void* makePtrs(const char*) { return nullptr; } - - template - inline void* makePtrs(T& obj) { return &obj; } - } - - template - inline void fields(Cls&, Parser& parser, F&&... fieldsDef) - { - std::array ptrs{details::makePtrs(fieldsDef)...}; - - static const details::FieldsTable> table{fieldsDef...}; - - auto&& handler = [&](const std::string& fieldName) - { - auto fieldInfo = table.find(fieldName); - if (fieldInfo == nullptr) - throw parser.makeError(ParserError::UnknownField); - - auto fieldPtr = ptrs[fieldInfo->m_fieldIdx]; - fieldInfo->m_parseFn(parser, fieldPtr); - }; - - parseObject(parser, handler); - } - - template - inline void fields(Cls&, Generator& generator, F&&... fieldsDef) - { - generator.objectBegin(); - details::writeField(generator, fieldsDef...); - generator.objectEnd(); - } -} - -// json-cpp/serialization_helpers.hpp end -//---------------------------------------------------------------------- - -// json-cpp.hpp end -//---------------------------------------------------------------------- - diff --git a/fast/main.cc b/fast/main.cc index 9c64976..2a8676b 100644 --- a/fast/main.cc +++ b/fast/main.cc @@ -5,14 +5,7 @@ int main(int argc, char** argv) { Hg::Hypergraph hg; - //Hg::io::manual(hg); Hg::io::read(hg, argv[1]); - /*cout << "---" << endl; - for (auto it = hg.nodes.begin(); it!=hg.nodes.end(); it++) - cout << **it << endl; - for (auto it = hg.edges.begin(); it!=hg.edges.end(); it++) - cout << **it << endl; - cout << "---" << endl;*/ Hg::viterbi(hg); return 0; diff --git a/fast/make_paks.cc b/fast/make_paks.cc deleted file mode 100644 index c0fee90..0000000 --- a/fast/make_paks.cc +++ /dev/null @@ -1,110 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#include "json-cpp.hpp" -#include "dummyvector.h" -#include "hypergraph.hh" - -using namespace std; - - -struct DummyNode { - size_t id; - string cat; - vector span; -}; - -struct DummyEdge { - size_t head; - string rule; - vector tails; - DummyVector f; - score_t weight; -}; - -struct DummyHg { - vector nodes; - vector edges; - DummyVector weights; -}; - -template inline void -serialize(jsoncpp::Stream& stream, DummyNode& o) -{ - fields(o, stream, "id", o.id, "cat", o.cat, "span", o.span); -} - -template inline void -serialize(jsoncpp::Stream& stream, DummyEdge& o) -{ - fields(o, stream, "head", o.head, "rule", o.rule, "tails", o.tails, "f", o.f, "weight", o.weight); -} - -template inline void -serialize(jsoncpp::Stream& stream, DummyHg& o) -{ - fields(o, stream, "nodes", o.nodes, "edges", o.edges, "weights", o.weights); -} - -template inline void -serialize(jsoncpp::Stream& stream, DummyVector& o) -{ - fields(o, stream, "EgivenFCoherent", o.EgivenFCoherent, "SampleCountF", o.SampleCountF, "CountEF", o.CountEF, "MaxLexFgivenE", o.MaxLexFgivenE, "MaxLexEgivenF", o.MaxLexEgivenF, "IsSingletonF", o.IsSingletonF, "IsSingletonFE", o.IsSingletonFE, "LanguageModel", o.LanguageModel, "LanguageModel_OOV", o.LanguageModel_OOV, "PassThrough", o.PassThrough, "PassThrough_1", o.PassThrough_1, "PassThrough_2", o.PassThrough_2, "PassThrough_3", o.PassThrough_3, "PassThrough_4", o.PassThrough_4, "PassThrough_5", o.PassThrough_5, "PassThrough_6", o.PassThrough_6, "WordPenalty", o.WordPenalty, "Glue", o.Glue); -} - -int -main(int argc, char** argv) -{ - // read from json - ifstream ifs(argv[1]); - string json_str((istreambuf_iterator(ifs) ), - (istreambuf_iterator())); - DummyHg hg; - vector nodes; - hg.nodes = nodes; - vector edges; - hg.edges = edges; - DummyVector w; - hg.weights = w; - jsoncpp::parse(hg, json_str); - - // convert objects - vector nodes_; - for (auto it = hg.nodes.begin(); it != hg.nodes.end(); ++it) { - Hg::Node* n = new Hg::Node; - n->id = it->id; - n->symbol = it->cat; - n->left = it->span[0]; - n->right = it->span[1]; - nodes_.push_back(n); - } - vector edges_; - for (auto it = hg.edges.begin(); it != hg.edges.end(); ++it) { - Hg::Edge* e = new Hg::Edge; - e->head_id_ = it->head; - e->tails_ids_ = it->tails; - e->score = it->weight; - e->rule = it->rule; - e->f = it->f; - edges_.push_back(e); - } - - // write to msgpack - FILE* file = fopen(argv[2], "wb"); - msgpack::fbuffer fbuf(file); - msgpack::pack(fbuf, hg.nodes.size()); - msgpack::pack(fbuf, hg.edges.size()); - for (auto it = nodes_.begin(); it != nodes_.end(); ++it) - msgpack::pack(fbuf, **it); - for (auto it = edges_.begin(); it != edges_.end(); ++it) - msgpack::pack(fbuf, **it); - - fclose(file); - - return 0; -} - diff --git a/fast/read_pak.cc b/fast/read_pak.cc deleted file mode 100644 index c1cf761..0000000 --- a/fast/read_pak.cc +++ /dev/null @@ -1,27 +0,0 @@ -#include -#include -#include - -using namespace std; - - -int -main(int argc, char** argv) -{ - ifstream ifs(argv[1]); - size_t i = 0, nn, ne; - msgpack::unpacker pac; - while(true) { - pac.reserve_buffer(32*1024); - size_t bytes = ifs.readsome(pac.buffer(), pac.buffer_capacity()); - pac.buffer_consumed(bytes); - msgpack::unpacked result; - while(pac.next(&result)) { - msgpack::object o = result.get(); - cout << o << endl; - } - if (!bytes) break; - } - - return 0; -} diff --git a/util/Makefile b/util/Makefile new file mode 100644 index 0000000..08ead26 --- /dev/null +++ b/util/Makefile @@ -0,0 +1,14 @@ +COMPILER=clang + + +all: make_pak read_pak + +make_pak: make_pak.cc + $(COMPILER) -std=c++11 -lstdc++ -lm -lmsgpack make_pak.cc -o make_pak + +read_pak: read_pak.cc + $(COMPILER) -std=c++11 -lstdc++ -lmsgpack read_pak.cc -o read_pak + +clean: + rm -f make_pak read_pak + diff --git a/util/json-cpp.hpp b/util/json-cpp.hpp new file mode 100644 index 0000000..851a4f4 --- /dev/null +++ b/util/json-cpp.hpp @@ -0,0 +1,1231 @@ +// +// DO NOT EDIT !!! This file was generated with a script. +// +// JSON for C++ +// https://github.com/ascheglov/json-cpp +// Version 0.1 alpha, rev. 170121e2dc099895064305e38bfb25d90a807ce3 +// Generated 2014-03-27 17:16:47.104492 UTC +// +// Belongs to the public domain + +#pragma once + +//---------------------------------------------------------------------- +// json-cpp.hpp begin + +//---------------------------------------------------------------------- +// json-cpp/parse.hpp begin + +#include +#include +#include +#include +#include + +//---------------------------------------------------------------------- +// json-cpp/ParserError.hpp begin + +#include +#include +#include +#include + +#if defined _MSC_VER +# define JSONCPP_INTERNAL_NOEXCEPT_ throw() +#else +# define JSONCPP_INTERNAL_NOEXCEPT_ noexcept +#endif + +namespace jsoncpp +{ + class ParserError : public std::exception + { + public: + enum Type + { + NoError, + Eof, UnexpectedCharacter, + InvalidEscapeSequence, NoTrailSurrogate, + UnexpectedType, UnknownField, + NumberIsOutOfRange, + }; + + ParserError(Type type, std::size_t line, std::size_t column) + : m_type{type}, m_line{line}, m_column{column} + { + assert(type != NoError); + } + + virtual const char* what() const JSONCPP_INTERNAL_NOEXCEPT_ override + { + if (m_what.empty()) + { + m_what = "JSON parser error at line "; + m_what += std::to_string(m_line); + m_what += ", column "; + m_what += std::to_string(m_column); + switch (m_type) + { + case Eof: m_what += ": unexpected end of file"; break; + case UnexpectedCharacter: m_what += ": unexpected character"; break; + case InvalidEscapeSequence: m_what += ": invalid escape sequence"; break; + case NoTrailSurrogate: m_what += ": no UTF-16 trail surrogate"; break; + case UnexpectedType: m_what += ": unexpected value type"; break; + case UnknownField: m_what += ": unknown field name"; break; + case NumberIsOutOfRange: m_what += ": number is out of range"; break; + case NoError: + default: + m_what += ": INTERNAL ERROR"; break; + } + } + + return m_what.c_str(); + } + + Type type() const { return m_type; } + std::size_t line() const { return m_line; } + std::size_t column() const { return m_column; } + + private: + Type m_type; + std::size_t m_line; + std::size_t m_column; + + mutable std::string m_what; + }; +} + +#undef JSONCPP_INTERNAL_NOEXCEPT_ + +// json-cpp/ParserError.hpp end +//---------------------------------------------------------------------- + +//---------------------------------------------------------------------- +// json-cpp/Stream.hpp begin + +namespace jsoncpp +{ + template + class Stream; + + namespace details + { + template + struct Traits2 {}; + + template + struct ParserTraits {}; + + template + struct GeneratorTraits {}; + } + + template + using Parser = Stream>; + + template + using Generator = Stream>; + + template + inline auto serialize(Stream& stream, T& value) -> decltype(value.serialize(stream), void()) + { + value.serialize(stream); + } +} +// json-cpp/Stream.hpp end +//---------------------------------------------------------------------- + +//---------------------------------------------------------------------- +// json-cpp/value_types.hpp begin + +namespace jsoncpp +{ + // Helper masks + const auto TypeIsNotFundamental = 0x40; + const auto TypeIsCollection = 0x80; + + enum class Type + { + Undefined = 0, // Helper type for debugging variant-like types + Null = 0x01, + Boolean = 0x02, + Number = 0x04, + String = 0x08 | TypeIsNotFundamental, + Array = 0x10 | TypeIsNotFundamental | TypeIsCollection, + Object = 0x20 | TypeIsNotFundamental | TypeIsCollection, + }; +} +// json-cpp/value_types.hpp end +//---------------------------------------------------------------------- + +//---------------------------------------------------------------------- +// json-cpp/details/parser_utility.hpp begin + +#include +#include +#include + +namespace jsoncpp { namespace details +{ + template + struct CStrIterator + { + using this_type = CStrIterator; + + CStrIterator() + { + static CharT null{0}; + m_ptr = &null; + } + + CStrIterator(const CharT* ptr) : m_ptr{ptr} {} + + const CharT& operator*() { return *m_ptr; } + const CharT* operator->() { return m_ptr; } + + this_type& operator++() + { + assert(!isEnd()); + ++m_ptr; + return *this; + } + + this_type operator++(int) { auto temp = *this; ++*this; return temp; } + + bool operator==(const this_type& rhs) const { return isEnd() == rhs.isEnd(); } + bool operator!=(const this_type& rhs) const { return !this->operator==(rhs); } + + private: + const CharT* m_ptr; + + bool isEnd() const { return *m_ptr == 0; } + }; + + class Diagnostics + { + public: + void nextColumn() { ++m_column; } + void newLine() { ++m_line; m_column = 0; } + + ParserError makeError(ParserError::Type type) const + { + return{type, m_line, m_column}; + } + + private: + std::size_t m_column{0}; + std::size_t m_line{1}; + }; + + template + struct Reader + { + using this_type = Reader; + + Reader(InputIterator first, InputIterator last) : m_iter(first), m_end(last) + { + checkEnd(); + } + + char operator*() { return *m_iter; } + this_type& operator++() + { + checkEnd(); + ++m_iter; + m_diag.nextColumn(); + return *this; + } + + void checkEnd() + { + if (m_iter == m_end) + throw m_diag.makeError(ParserError::Eof); + } + + char getNextChar() + { + auto prev = *m_iter; + ++*this; + return prev; + } + + Diagnostics m_diag; + InputIterator m_iter, m_end; + }; +}} + +// json-cpp/details/parser_utility.hpp end +//---------------------------------------------------------------------- + +//---------------------------------------------------------------------- +// json-cpp/details/number_parser.hpp begin + +#include + +namespace jsoncpp { namespace details +{ + inline bool isDigit(char c) { return c >= '0' && c <= '9'; } + + template + inline unsigned parseIntNumber(Iterator& iter) + { + auto intPart = 0U; // TBD: 0ULL ? + + do + { + intPart = intPart * 10 + (*iter - '0'); + + ++iter; + } + while (isDigit(*iter)); + + return intPart; + } + + template + inline double parseRealNumber(Iterator& iter) + { + double number = 0; + + if (*iter == '0') + { + ++iter; + } + else + { + number = parseIntNumber(iter); + } + + // here `ch` is a peeked character, need to call eat() + + if (*iter == '.') + { + ++iter; + + auto mul = 0.1; + while (isDigit(*iter)) + { + number += (*iter - '0') * mul; + mul /= 10; + ++iter; + } + } + + // here `ch` is a peeked character, need to call eat() + + if (*iter == 'e' || *iter == 'E') + { + ++iter; + + auto negate = *iter == '-'; + if (negate || *iter == '+') + ++iter; + // FIXME: check `ch` for non-digit + + auto e = parseIntNumber(iter); + + if (negate) + number /= std::pow(10, e); + else + number *= std::pow(10, e); + } + + return number; + } +}} +// json-cpp/details/number_parser.hpp end +//---------------------------------------------------------------------- + +//---------------------------------------------------------------------- +// json-cpp/details/string_parser.hpp begin + +#include + +namespace jsoncpp { namespace details +{ + inline char32_t utf16SurrogatePairToUtf32(char32_t lead, char32_t trail) + { + return 0x10000 | (lead - 0xD800) << 10 | (trail - 0xDC00); + } + + inline void utf32ToUtf8(char32_t c, std::string& str) + { + auto add = [&str](char32_t c){ str.push_back(static_cast(c)); }; + + if (c < 0x80) + { + add(c); + } + else if (c < 0x800) + { + add(0xC0 | c >> 6); + add(0x80 | (c & 0x3f)); + } + else if (c < 0x10000) + { + add(0xE0 | c >> 12); + add(0x80 | ((c >> 6) & 0x3f)); + add(0x80 | (c & 0x3f)); + } + else if (c < 0x200000) + { + add(0xF0 | c >> 18); + add(0x80 | ((c >> 12) & 0x3f)); + add(0x80 | ((c >> 6) & 0x3f)); + add(0x80 | (c & 0x3f)); + } + else if (c < 0x4000000) + { + add(0xF8 | c >> 24); + add(0x80 | ((c >> 18) & 0x3f)); + add(0x80 | ((c >> 12) & 0x3f)); + add(0x80 | ((c >> 6) & 0x3f)); + add(0x80 | (c & 0x3f)); + } + else + { + add(0xFC | c >> 30); + add(0x80 | ((c >> 24) & 0x3f)); + add(0x80 | ((c >> 18) & 0x3f)); + add(0x80 | ((c >> 12) & 0x3f)); + add(0x80 | ((c >> 6) & 0x3f)); + add(0x80 | (c & 0x3f)); + } + } + + enum class CharType { Raw, CodePoint, UTF16Pair }; + + template + inline void addToStr(std::basic_string& str, CharType type, char32_t c1, char32_t c2); + + template<> + inline void addToStr(std::basic_string& str, CharType type, char32_t c1, char32_t c2) + { + if (type == CharType::Raw) + { + str.push_back(static_cast(c1)); + } + else if (type == CharType::CodePoint) + { + utf32ToUtf8(c1, str); + } + else + { + auto c32 = utf16SurrogatePairToUtf32(c1, c2); + utf32ToUtf8(c32, str); + } + } + + template<> + inline void addToStr(std::basic_string& str, CharType type, char32_t c1, char32_t c2) + { + str.push_back(static_cast(c1)); + if (type == CharType::UTF16Pair) + str.push_back(static_cast(c2)); + } + + template<> + inline void addToStr(std::basic_string& str, CharType type, char32_t c1, char32_t c2) + { + auto c = (type == CharType::UTF16Pair) ? utf16SurrogatePairToUtf32(c1, c2) : c1; + str.push_back(static_cast(c)); + } + + template + inline int parseHexDigit(Iterator& iter, ParserError::Type& err) + { + auto ch = *iter; + ++iter; + if (ch >= '0' && ch <= '9') return ch - '0'; + if (ch >= 'A' && ch <= 'F') return ch - 'A' + 10; + if (ch >= 'a' && ch <= 'f') return ch - 'a' + 10; + + err = ParserError::InvalidEscapeSequence; + return 0; + } + + template + inline char32_t parseUTF16CodeUnit(Iterator& iter, ParserError::Type& err) + { + auto n = parseHexDigit(iter, err) << 12; + n |= parseHexDigit(iter, err) << 8; + n |= parseHexDigit(iter, err) << 4; + n |= parseHexDigit(iter, err); + return static_cast(n); + } + + template + inline ParserError::Type parseStringImpl(Iterator& iter, std::basic_string& str) + { + str.clear(); + auto add = [&str](CharType type, char32_t c1, char32_t c2) + { + addToStr(str, type, c1, c2); + }; + + for (;;) + { + auto ch = static_cast(*iter); + ++iter; + if (ch == '"') + return ParserError::NoError; + + if (ch == '\\') + { + ch = static_cast(*iter); + ++iter; + switch (ch) + { + case '\\': case '"': case '/': + break; + + case 'b': ch = '\b'; break; + case 'f': ch = '\f'; break; + case 'n': ch = '\n'; break; + case 'r': ch = '\r'; break; + case 't': ch = '\t'; break; + + case 'u': + { + ParserError::Type err{ParserError::NoError}; + auto codeUnit = parseUTF16CodeUnit(iter, err); + if (err != ParserError::NoError) + return err; + + if (codeUnit >= 0xD800 && codeUnit < 0xDC00) + { + if (*iter != '\\') return ParserError::NoTrailSurrogate; + ++iter; + if (*iter != 'u') return ParserError::NoTrailSurrogate; + ++iter; + + auto trailSurrogate = parseUTF16CodeUnit(iter, err); + if (err != ParserError::NoError) + return err; + + add(CharType::UTF16Pair, codeUnit, trailSurrogate); + } + else + { + add(CharType::CodePoint, codeUnit, 0); + } + } + continue; + + default: + return ParserError::InvalidEscapeSequence; + } + } + + add(CharType::Raw, ch, 0); + } + } +}} + +// json-cpp/details/string_parser.hpp end +//---------------------------------------------------------------------- + +namespace jsoncpp +{ + template + class Stream>> + { + public: + using this_type = Parser>; + + explicit Stream(InputIterator first, InputIterator last) + : m_reader{first, last} + { + nextValue(); + } + + Type getType() const { return m_type; } + bool getBoolean() const { return m_boolean; } + double getNumber() const { return m_number; } + const std::string& getFieldName() const { return m_fieldName; } + + void checkType(Type type) const + { + if (getType() != type) + throw makeError(ParserError::UnexpectedType); + } + + bool isListEnd(char terminator) + { + eatWhitespace(); + if (*m_reader != terminator) + return false; + + ++m_reader; + return true; + } + + void eatListSeparator() + { + eatWhitespace(); + check(','); + eatWhitespace(); + } + + void nextNameValuePair() + { + eatWhitespace(); + check('"'); + parseString(m_fieldName); + eatWhitespace(); + check(':'); + nextValue(); + } + + void nextValue() + { + eatWhitespace(); + m_type = nextValueImpl(); + } + + template + void parseString(std::basic_string& str) + { + auto err = parseStringImpl(m_reader, str); + if (err != ParserError::NoError) + throw m_reader.m_diag.makeError(err); + } + + ParserError makeError(ParserError::Type type) const + { + return m_reader.m_diag.makeError(type); + } + + private: + Type nextValueImpl() + { + switch (*m_reader) + { + case '{': ++m_reader; return Type::Object; + case '[': ++m_reader; return Type::Array; + case 't': ++m_reader; checkLiteral("true"); m_boolean = true; return Type::Boolean; + case 'f': ++m_reader; checkLiteral("false"); m_boolean = false; return Type::Boolean; + case 'n': ++m_reader; checkLiteral("null"); return Type::Null; + case '"': ++m_reader; return Type::String; + + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + m_number = parseRealNumber(m_reader); + return Type::Number; + + case '-': + ++m_reader; + m_number = -parseRealNumber(m_reader); + return Type::Number; + } + + throw unexpectedCharacter(); + } + + ParserError unexpectedCharacter() const + { + return makeError(ParserError::UnexpectedCharacter); + } + + void check(char expectedChar) + { + if (*m_reader != expectedChar) + throw unexpectedCharacter(); + + ++m_reader; + } + + template + void checkLiteral(const char(&literal)[N]) + { + static_assert(N > 2, ""); + for (auto i = 1; i != N - 1; ++i, ++m_reader) + if (*m_reader != literal[i]) + throw unexpectedCharacter(); + } + + void eatWhitespace() + { + for (;; ++m_reader) + { + switch (*m_reader) + { + case '/': + ++m_reader; + check('/'); + while (*m_reader != '\n') + ++m_reader; + + // no break here + case '\n': + m_reader.m_diag.newLine(); + break; + + case ' ': case '\t': case '\r': + break; + + default: + return; + } + } + } + + details::Reader m_reader; + + Type m_type; + double m_number; + bool m_boolean; + std::string m_fieldName; + }; + + template + inline void serialize(Parser& parser, bool& value) + { + parser.checkType(Type::Boolean); + value = parser.getBoolean(); + } + + template + inline typename std::enable_if::value>::type + serialize(Parser& parser, T& value) + { + parser.checkType(Type::Number); + auto number = parser.getNumber(); + value = static_cast(number); + if (value != number) + throw parser.makeError(ParserError::NumberIsOutOfRange); + } + + template + inline void serialize(Parser& parser, std::basic_string& value) + { + parser.checkType(Type::String); + parser.parseString(value); + } + + namespace details + { + template + inline void parseList(Parser& parser, Type type, char terminator, Callback&& callback) + { + parser.checkType(type); + + while (!parser.isListEnd(terminator)) + { + callback(); + + if (parser.isListEnd(terminator)) + return; + + parser.eatListSeparator(); + } + } + } + + template + inline void parseObject(Parser& parser, Callback&& callback) + { + details::parseList(parser, Type::Object, '}', [&] + { + parser.nextNameValuePair(); + callback(parser.getFieldName()); + }); + } + + template + void parseArray(Parser& parser, Callback&& callback) + { + details::parseList(parser, Type::Array, ']', [&] + { + parser.nextValue(); + callback(); + }); + } + + template + inline void parse(T& object, InputIterator first, InputIterator last) + { + Parser> stream{first, last}; + serialize(stream, object); + } + + template + inline void parse(T& object, const CharT* str) + { + details::CStrIterator first{str}, last; + parse(object, first, last); + } + + template + inline void parse(T& object, std::basic_string& str) + { + parse(object, std::begin(str), std::end(str)); + } + + template + inline void parse(T& object, std::basic_istream& stream) + { + std::istreambuf_iterator first{stream}, last; + parse(object, first, last); + } +} + +// json-cpp/parse.hpp end +//---------------------------------------------------------------------- + +//---------------------------------------------------------------------- +// json-cpp/std_types.hpp begin + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +//---------------------------------------------------------------------- +// json-cpp/generate.hpp begin + +#include +#include + +//---------------------------------------------------------------------- +// json-cpp/details/string_writer.hpp begin + +#include + +namespace jsoncpp { namespace details +{ + template + inline void writeString(const std::basic_string& str, Sink&& sink) + { + sink('"'); + for (auto iter = std::begin(str), last = std::end(str); iter != last; ++iter) + { + switch (char32_t ch = static_cast(*iter)) + { + case '"': sink('\\'); sink('"'); break; + case '\\': sink('\\'); sink('\\'); break; + case '\b': sink('\\'); sink('b'); break; + case '\f': sink('\\'); sink('f'); break; + case '\n': sink('\\'); sink('n'); break; + case '\r': sink('\\'); sink('r'); break; + case '\t': sink('\\'); sink('t'); break; + default: + if (ch < '\x20') + { + const auto table = "0123456789ABCDEF"; + unsigned n = static_cast(ch); + sink('\\'); + sink('u'); + sink('0'); + sink('0'); + sink(table[n >> 4]); + sink(table[n & 15]); + } + else + { + sink(static_cast(ch)); + } + } + } + sink('"'); + } +}} + +// json-cpp/details/string_writer.hpp end +//---------------------------------------------------------------------- + +namespace jsoncpp +{ + template + class Stream>> + { + public: + using this_type = Generator>; + + explicit Stream(Sink& sink) : m_sink(&sink) {} + + void objectBegin() + { + (*m_sink) << "{"; + } + + void fieldName(const char* name) + { + (*m_sink) << '"' << name << "\": "; + // TODO: use writeString (?) + } + + template + void fieldName(const std::basic_string& name) + { + (*m_sink) << '"' << name << "\": "; + // TODO: use writeString (?) + } + + void separator() + { + (*m_sink) << ", "; + } + + void objectEnd() + { + (*m_sink) << '}'; + } + + void arrayBegin() + { + (*m_sink) << '['; + } + + void arrayEnd() + { + (*m_sink) << ']'; + } + + friend void serialize(this_type& stream, std::nullptr_t) + { + (*stream.m_sink) << "null"; + } + + friend void serialize(this_type& stream, bool value) + { + (*stream.m_sink) << (value ? "true" : "false"); + } + + template + friend typename std::enable_if::value>::type serialize(this_type& stream, T& value) + { + (*stream.m_sink) << value; + } + + template + friend void serialize(this_type& stream, const std::basic_string& value) + { + details::writeString(value, [&stream](char c){ stream.m_sink->put(c); }); + } + + private: + Sink* m_sink; + }; + + template + inline void writePointer(Generator& generator, Pointer& ptr) + { + if (ptr) + { + serialize(generator, *ptr); + } + else + { + serialize(generator, nullptr); + } + } + + template + inline void writeRange(Generator& generator, Range& range) + { + generator.arrayBegin(); + + auto iter = std::begin(range); + const auto& last = std::end(range); + if (iter != last) + { + for (;;) + { + serialize(generator, *iter); + + ++iter; + if (iter == last) + break; + + generator.separator(); + } + } + + generator.arrayEnd(); + } + + template + inline std::string to_string(const T& object) + { + std::ostringstream rawStream; + Generator> stream{rawStream}; + serialize(stream, const_cast(object)); + return rawStream.str(); + } +} + +// json-cpp/generate.hpp end +//---------------------------------------------------------------------- + +namespace jsoncpp +{ + template + inline void serialize(Parser& parser, std::shared_ptr& obj) + { + if (parser.getType() != jsoncpp::Type::Null) + { + obj = std::make_shared(); + serialize(parser, *obj); + } + else + { + obj.reset(); + } + } + + template + inline void serialize(Generator& generator, std::shared_ptr& obj) + { + writePointer(generator, obj); + } + + template + inline void serialize(Parser& parser, std::unique_ptr& obj) + { + if (parser.getType() != jsoncpp::Type::Null) + { + obj->reset(new T()); + serialize(parser, *obj); + } + else + { + obj.reset(); + } + } + + template + inline void serialize(Generator& generator, std::unique_ptr& obj) + { + writePointer(generator, obj); + } + + namespace details + { + template + inline void serializeContainer(Parser& parser, C& c) + { + c.clear(); + + parseArray(parser, [&] + { + c.emplace_back(); + serialize(parser, c.back()); + }); + } + + template + inline void serializeContainer(Generator& generator, C& c) + { + writeRange(generator, c); + } + + template + inline void serializeSet(Parser& parser, C& c) + { + c.clear(); + + parseArray(parser, [&] + { + typename C::value_type value; + serialize(parser, value); + c.insert(value); + }); + } + + template + inline void serializeSet(Generator& generator, C& c) + { + writeRange(generator, c); + } + + template + inline void serializeStrMap(Parser& parser, C& c) + { + c.clear(); + + parseObject(parser, [&](const std::string& name) + { + serialize(parser, c[name]); + }); + } + + template + inline void serializeStrMap(Generator& generator, C& c) + { + generator.objectBegin(); + + auto iter = std::begin(c); + const auto& last = std::end(c); + if (iter != last) + { + for (;;) + { + generator.fieldName(iter->first); + serialize(generator, iter->second); + + ++iter; + if (iter == last) + break; + + generator.separator(); + } + } + + generator.objectEnd(); + } + } + + template + inline void serialize(Stream& stream, std::vector& arr) + { details::serializeContainer(stream, arr); } + + template + inline void serialize(Stream& stream, std::list& arr) + { details::serializeContainer(stream, arr); } + + template + inline void serialize(Stream& stream, std::forward_list& arr) + { details::serializeContainer(stream, arr); } + + template + inline void serialize(Stream& stream, std::deque& arr) + { details::serializeContainer(stream, arr); } + + template + inline void serialize(Stream& stream, std::set& arr) + { details::serializeSet(stream, arr); } + + template + inline void serialize(Stream& stream, std::unordered_set& arr) + { details::serializeSet(stream, arr); } + + template + inline void serialize(Stream& stream, std::map& t) + { details::serializeStrMap(stream, t); } + + template + inline void serialize(Stream& stream, std::unordered_map& t) + { details::serializeStrMap(stream, t); } +} +// json-cpp/std_types.hpp end +//---------------------------------------------------------------------- + +//---------------------------------------------------------------------- +// json-cpp/serialization_helpers.hpp begin + +#include +#include + +namespace jsoncpp +{ + namespace details + { + template + inline void writeField(Generator& generator, const char* name, T& value) + { + generator.fieldName(name); + serialize(generator, value); + } + + template + inline void writeField(Generator& generator, const char* name, T& value, F&&... fieldsDef) + { + writeField(generator, name, value); + generator.separator(); + writeField(generator, fieldsDef...); + } + + template + class FieldsTable + { + public: + template + FieldsTable(F&&... fieldsDef) + { + m_map.reserve(sizeof...(fieldsDef) / 2); + add(1, fieldsDef...); + } + + struct FieldInfo + { + template + FieldInfo(T&, std::size_t idx) + { + m_fieldIdx = idx; + m_parseFn = [](ParserT& parser, void* fieldPtr) + { + serialize(parser, static_cast(*reinterpret_cast(fieldPtr))); + }; + } + + std::size_t m_fieldIdx; + void(*m_parseFn)(ParserT& parser, void* fieldPtr); + }; + + const FieldInfo* find(const std::string& name) const + { + auto it = m_map.find(name); + return it == m_map.end() ? nullptr : &it->second; + } + + private: + template + void add(std::size_t idx, const char* name, T& value, F&&... otherFields) + { + m_map.emplace(name, FieldInfo(value, idx)); + add(idx + 2, otherFields...); + } + + void add(std::size_t /*idx*/) {} + + std::unordered_map m_map; + }; + + inline void* makePtrs(const char*) { return nullptr; } + + template + inline void* makePtrs(T& obj) { return &obj; } + } + + template + inline void fields(Cls&, Parser& parser, F&&... fieldsDef) + { + std::array ptrs{details::makePtrs(fieldsDef)...}; + + static const details::FieldsTable> table{fieldsDef...}; + + auto&& handler = [&](const std::string& fieldName) + { + auto fieldInfo = table.find(fieldName); + if (fieldInfo == nullptr) + throw parser.makeError(ParserError::UnknownField); + + auto fieldPtr = ptrs[fieldInfo->m_fieldIdx]; + fieldInfo->m_parseFn(parser, fieldPtr); + }; + + parseObject(parser, handler); + } + + template + inline void fields(Cls&, Generator& generator, F&&... fieldsDef) + { + generator.objectBegin(); + details::writeField(generator, fieldsDef...); + generator.objectEnd(); + } +} + +// json-cpp/serialization_helpers.hpp end +//---------------------------------------------------------------------- + +// json-cpp.hpp end +//---------------------------------------------------------------------- + diff --git a/util/make_pak.cc b/util/make_pak.cc new file mode 100644 index 0000000..f09c17d --- /dev/null +++ b/util/make_pak.cc @@ -0,0 +1,108 @@ +#include +#include +#include +#include +#include + +#include "json-cpp.hpp" +#include "../fast/dummyvector.h" +#include "../fast/hypergraph.hh" + +using namespace std; + + +struct DummyNode { + size_t id; + string cat; + vector span; +}; + +struct DummyEdge { + size_t head; + string rule; + vector tails; + DummyVector f; + score_t weight; +}; + +struct DummyHg { + vector nodes; + vector edges; + DummyVector weights; +}; + +template inline void +serialize(jsoncpp::Stream& stream, DummyNode& o) +{ + fields(o, stream, "id", o.id, "cat", o.cat, "span", o.span); +} + +template inline void +serialize(jsoncpp::Stream& stream, DummyEdge& o) +{ + fields(o, stream, "head", o.head, "rule", o.rule, "tails", o.tails, "f", o.f, "weight", o.weight); +} + +template inline void +serialize(jsoncpp::Stream& stream, DummyHg& o) +{ + fields(o, stream, "nodes", o.nodes, "edges", o.edges, "weights", o.weights); +} + +template inline void +serialize(jsoncpp::Stream& stream, DummyVector& o) +{ + fields(o, stream, "EgivenFCoherent", o.EgivenFCoherent, "SampleCountF", o.SampleCountF, "CountEF", o.CountEF, "MaxLexFgivenE", o.MaxLexFgivenE, "MaxLexEgivenF", o.MaxLexEgivenF, "IsSingletonF", o.IsSingletonF, "IsSingletonFE", o.IsSingletonFE, "LanguageModel", o.LanguageModel, "LanguageModel_OOV", o.LanguageModel_OOV, "PassThrough", o.PassThrough, "PassThrough_1", o.PassThrough_1, "PassThrough_2", o.PassThrough_2, "PassThrough_3", o.PassThrough_3, "PassThrough_4", o.PassThrough_4, "PassThrough_5", o.PassThrough_5, "PassThrough_6", o.PassThrough_6, "WordPenalty", o.WordPenalty, "Glue", o.Glue); +} + +int +main(int argc, char** argv) +{ + // read from json + ifstream ifs(argv[1]); + string json_str((istreambuf_iterator(ifs) ), + (istreambuf_iterator())); + DummyHg hg; + vector nodes; + hg.nodes = nodes; + vector edges; + hg.edges = edges; + DummyVector w; + hg.weights = w; + jsoncpp::parse(hg, json_str); + + // convert objects + vector nodes_conv; + for (auto it = hg.nodes.begin(); it != hg.nodes.end(); ++it) { + Hg::Node* n = new Hg::Node; + n->id = it->id; + n->symbol = it->cat; + n->left = it->span[0]; + n->right = it->span[1]; + nodes_conv.push_back(n); + } + vector edges_conv; + for (auto it = hg.edges.begin(); it != hg.edges.end(); ++it) { + Hg::Edge* e = new Hg::Edge; + e->head_id_ = it->head; + e->tails_ids_ = it->tails; + e->score = it->weight; + e->rule = it->rule; + e->f = it->f; + edges_conv.push_back(e); + } + + // write to msgpack + FILE* file = fopen(argv[2], "wb"); + msgpack::fbuffer fbuf(file); + msgpack::pack(fbuf, hg.nodes.size()); + msgpack::pack(fbuf, hg.edges.size()); + for (auto it = nodes_conv.begin(); it != nodes_conv.end(); ++it) + msgpack::pack(fbuf, **it); + for (auto it = edges_conv.begin(); it != edges_conv.end(); ++it) + msgpack::pack(fbuf, **it); + fclose(file); + + return 0; +} + diff --git a/util/read_pak.cc b/util/read_pak.cc new file mode 100644 index 0000000..d4bff91 --- /dev/null +++ b/util/read_pak.cc @@ -0,0 +1,28 @@ +#include +#include +#include + +using namespace std; + + +int +main(int argc, char** argv) +{ + ifstream ifs(argv[1]); + size_t i = 0, nn, ne; + msgpack::unpacker pac; + while(true) { + pac.reserve_buffer(32*1024); + size_t bytes = ifs.readsome(pac.buffer(), pac.buffer_capacity()); + pac.buffer_consumed(bytes); + msgpack::unpacked result; + while(pac.next(&result)) { + msgpack::object o = result.get(); + cout << o << endl; + } + if (!bytes) break; + } + + return 0; +} + -- cgit v1.2.3