diff options
Diffstat (limited to 'fast')
-rw-r--r-- | fast/.gitignore | 6 | ||||
-rw-r--r-- | fast/Makefile | 11 | ||||
-rw-r--r-- | fast/README.md | 5 | ||||
-rw-r--r-- | fast/dummyvector.h | 27 | ||||
-rw-r--r-- | fast/grammar.hh | 5 | ||||
-rw-r--r-- | fast/hypergraph.cc | 216 | ||||
-rw-r--r-- | fast/hypergraph.hh | 110 | ||||
-rw-r--r-- | fast/json-cpp.hpp | 1231 | ||||
-rw-r--r-- | fast/main.cc | 137 | ||||
-rw-r--r-- | fast/make_paks.cc | 112 | ||||
-rw-r--r-- | fast/read_pak.cc | 26 | ||||
-rw-r--r-- | fast/semiring.hh | 29 |
12 files changed, 1683 insertions, 232 deletions
diff --git a/fast/.gitignore b/fast/.gitignore index 80d28d5..c37a566 100644 --- a/fast/.gitignore +++ b/fast/.gitignore @@ -1,3 +1,5 @@ fast_weaver -hypergraph.o -msgpack-c/ +*.o +data +make_paks +read_pak diff --git a/fast/Makefile b/fast/Makefile index f09ab21..1d88446 100644 --- a/fast/Makefile +++ b/fast/Makefile @@ -1,12 +1,19 @@ all: hypergraph.o main.cc - clang -std=c++11 -lstdc++ -lm hypergraph.o -I./msgpack-c/include/ main.cc -o fast_weaver + clang -std=c++11 -lstdc++ -lm -lmsgpack hypergraph.o main.cc -o fast_weaver hypergraph.o: hypergraph.cc hypergraph.hh grammar.o semiring.hh - clang -std=c++11 -I./msgpack-c/include/ -c hypergraph.cc + clang -std=c++11 -lmsgpack -c hypergraph.cc grammar.o: grammar.cc grammar.hh clang -std=c++11 -c grammar.cc +make_paks: make_paks.cc + g++ -std=c++11 -lmsgpack make_paks.cc -o make_paks + +read_pak: read_pak.cc + g++ -std=c++11 -lmsgpack read_pak.cc -o read_pak + + clean: rm -f fast_weaver hypergraph.o grammar.o diff --git a/fast/README.md b/fast/README.md index 3087bab..5bcc962 100644 --- a/fast/README.md +++ b/fast/README.md @@ -7,3 +7,8 @@ TODO * hg: json input (jsoncpp?) * language model: kenlm +depends on msgpack [1] +http://jscheiny.github.io/Streams/ + +[1] http://msgpack.org + diff --git a/fast/dummyvector.h b/fast/dummyvector.h new file mode 100644 index 0000000..09cf3f7 --- /dev/null +++ b/fast/dummyvector.h @@ -0,0 +1,27 @@ +#pragma once +#include <msgpack.hpp> + + +struct DummyVector { + double CountEF; + double EgivenFCoherent; + double Glue; + double IsSingletonF; + double IsSingletonFE; + double LanguageModel; + double LanguageModel_OOV; + double MaxLexFgivenE; + double MaxLexEgivenF; + double PassThrough; + double PassThrough_1; + double PassThrough_2; + double PassThrough_3; + double PassThrough_4; + double PassThrough_5; + double PassThrough_6; + double SampleCountF; + double WordPenalty; + + MSGPACK_DEFINE(CountEF, EgivenFCoherent, Glue, IsSingletonF, IsSingletonFE, LanguageModel, LanguageModel_OOV, MaxLexEgivenF, MaxLexFgivenE, PassThrough, PassThrough_1, PassThrough_2, PassThrough_3, PassThrough_4, PassThrough_5, PassThrough_6, SampleCountF, WordPenalty); +}; + diff --git a/fast/grammar.hh b/fast/grammar.hh index 5625b85..c4ef3ad 100644 --- a/fast/grammar.hh +++ b/fast/grammar.hh @@ -1,5 +1,4 @@ -#ifndef GRAMMAR_HH -#define GRAMMAR_HH +#pragma once #include <string> #include <sstream> @@ -29,5 +28,3 @@ class Rule { } // namespace -#endif - diff --git a/fast/hypergraph.cc b/fast/hypergraph.cc index 44e060e..4e6601f 100644 --- a/fast/hypergraph.cc +++ b/fast/hypergraph.cc @@ -41,7 +41,7 @@ operator<<(std::ostream& os, const Edge& e) { ostringstream _; for (auto it = e.tails.begin(); it != e.tails.end(); ++it) { - _ << (*it)->id; if (*it != e.tails.back()) _ << ","; + _ << (**it).id; if (*it != e.tails.back()) _ << ","; } os << \ "Edge<head=" << e.head->id << \ @@ -55,19 +55,26 @@ operator<<(std::ostream& os, const Edge& e) } /* - * Hypergraph - * methods + * functions * */ void -Hypergraph::reset() +reset(list<Node*> nodes, vector<Edge*> edges) { + for (auto it = nodes.begin(); it != nodes.end(); ++it) + (**it).mark = 0; + for (auto it = edges.begin(); it != edges.end(); ++it) + (**it).mark = 0; +} + +template<class Semiring> void +init(list<Node*>& nodes, list<Node*>::iterator root, Semiring& semiring) +{ + for (auto it = nodes.begin(); it != nodes.end(); ++it) + (**it).score = semiring.null; + (**root).score = semiring.one; } -/* - * functions - * - */ void topological_sort(list<Node*>& nodes, list<Node*>::iterator root) { @@ -94,37 +101,186 @@ topological_sort(list<Node*>& nodes, list<Node*>::iterator root) } } -/*void -init(vector<Node*>& nodes, ViterbiSemiring<double>& semiring, Node* root) -{ - for (auto it = nodes.begin(); it != nodes.end(); ++it) - (*it)->score = semiring.null; - root->score = semiring.one; -} - void -viterbi(vector<Node*>& nodes, map<unsigned int, Hg::Node*> nodes_by_id, Node* root) +viterbi(Hypergraph& hg) { - vector<Node*> sorted = topological_sort(nodes); - ViterbiSemiring<double> semiring; - - init(sorted, semiring, root); + list<Node*>::iterator root = hg.nodes.begin(); // FIXME? + Hg::topological_sort(hg.nodes, root); + Semiring::Viterbi<double> semiring; + Hg::init(hg.nodes, root, semiring); - for (auto n_it = sorted.begin(); n_it != sorted.end(); ++n_it) { - for (auto e_it = (*n_it)->incoming.begin(); e_it != (*n_it)->incoming.end(); ++e_it) { - cout << (*e_it)->s() << endl; + for (auto n = hg.nodes.begin(); n != hg.nodes.end(); ++n) { + for (auto e = (**n).incoming.begin(); e != (**n).incoming.end(); ++e) { + cout << **e << endl; double s = semiring.one; - for (auto m_it = (*e_it)->tails.begin(); m_it != (*e_it)->tails.end(); m_it++) { - s = semiring.multiply(s, (*m_it)->score); + for (auto m = (**e).tails.begin(); m != (**e).tails.end(); ++m) { + s = semiring.multiply(s, (**m).score); } - (*n_it)->score = semiring.add((*n_it)->score, semiring.multiply(s, (*e_it)->score)); + (**n).score = semiring.add((**n).score, semiring.multiply(s, (**e).score)); } } - for (auto it = sorted.begin(); it != sorted.end(); ++it) { - cout << (*it)->id << " " << (*it)->score << endl; + for (auto it = hg.nodes.begin(); it != hg.nodes.end(); ++it) { + cout << (**it).id << " " << (**it).score << endl; } -}*/ +} + +namespace io { + +void +read(Hypergraph& hg, string fn) +{ + ifstream ifs(fn); + size_t i = 0, nn, ne; + msgpack::unpacker pac; + while(true) { + pac.reserve_buffer(32*1024); + size_t bytes = ifs.readsome(pac.buffer(), pac.buffer_capacity()); + pac.buffer_consumed(bytes); + msgpack::unpacked result; + while(pac.next(&result)) { + msgpack::object o = result.get(); + if (i == 0) { + o.convert(&nn); + nn += 1; + } else if (i == 1) { + o.convert(&ne); + ne += 1; + } else if (i > 1 && i <= nn) { + //cout << "N " << o << endl; + Node* n = new Node; + o.convert(n); + } else if (i > nn && i <= nn+ne+1) { + //cout << "E " << o << endl; + Edge* e = new Edge; + o.convert(e); + } + i++; + } + if (!bytes) break; + } +} + +void +write(Hypergraph& hg, string fn) +{ + /*FILE* file = fopen(argv[2], "wb"); + msgpack::fbuffer fbuf(file); + msgpack::pack(fbuf, hg.nodes.size()); + msgpack::pack(fbuf, hg.edges.size()); + msgpack::pack(fbuf, hg.weights); + for (auto it = hg.nodes.begin(); it != hg.nodes.end(); it++) + msgpack::pack(fbuf, *it); + for (auto it = hg.edges.begin(); it != hg.edges.end(); it++) + msgpack::pack(fbuf, *it); + + fclose(file);*/ +} + +void +manual(Hypergraph& hg) +{ + // nodes + Node* a = new Node; a->id = 0; a->symbol = "root"; a->left = false; a->right = false; a->mark = 0; + Node* b = new Node; b->id = 1; b->symbol = "NP"; b->left = 0; b->right = 1; b->mark = 0; + Node* c = new Node; c->id = 2; c->symbol = "V"; c->left = 1; c->right = 2; c->mark = 0; + Node* d = new Node; d->id = 3; d->symbol = "JJ"; d->left = 3; d->right = 4; d->mark = 0; + Node* e = new Node; e->id = 4; e->symbol = "NN"; e->left = 3; e->right = 5; e->mark = 0; + Node* f = new Node; f->id = 5; f->symbol = "NP"; f->left = 2; f->right = 5; f->mark = 0; + Node* g = new Node; g->id = 6; g->symbol = "NP"; g->left = 1; g->right = 5; g->mark = 0; + Node* h = new Node; h->id = 7; h->symbol = "S"; h->left = 0; h->right = 6; h->mark = 0; + + hg.add_node(a); + hg.add_node(h); + hg.add_node(g); + hg.add_node(c); + hg.add_node(d); + hg.add_node(f); + hg.add_node(b); + hg.add_node(e); + + // edges + Edge* q = new Edge; q->head = hg.nodes_by_id[1]; q->tails.push_back(hg.nodes_by_id[0]); q->score = 0.367879441171; + hg.nodes_by_id[1]->incoming.push_back(q); + hg.nodes_by_id[0]->outgoing.push_back(q); + q->arity = 1; + q->mark = 0; + hg.edges.push_back(q); + + Edge* p = new Edge; p->head = hg.nodes_by_id[2]; p->tails.push_back(hg.nodes_by_id[0]); p->score = 0.606530659713; + hg.nodes_by_id[2]->incoming.push_back(p); + hg.nodes_by_id[0]->outgoing.push_back(p); + p->arity = 1; + p->mark = 0; + hg.edges.push_back(p); + + Edge* r = new Edge; r->head = hg.nodes_by_id[3]; r->tails.push_back(hg.nodes_by_id[0]); r->score = 1.0; + hg.nodes_by_id[3]->incoming.push_back(r); + hg.nodes_by_id[0]->outgoing.push_back(r); + r->arity = 1; + r->mark = 0; + hg.edges.push_back(r); + + Edge* s = new Edge; s->head = hg.nodes_by_id[3]; s->tails.push_back(hg.nodes_by_id[0]); s->score = 1.0; + hg.nodes_by_id[3]->incoming.push_back(s); + hg.nodes_by_id[0]->outgoing.push_back(s); + s->arity = 1; + s->mark = 0; + hg.edges.push_back(s); + + Edge* t = new Edge; t->head = hg.nodes_by_id[4]; t->tails.push_back(hg.nodes_by_id[0]); t->score = 1.0; + hg.nodes_by_id[4]->incoming.push_back(t); + hg.nodes_by_id[0]->outgoing.push_back(t); + t->arity = 1; + t->mark = 0; + hg.edges.push_back(t); + + Edge* u = new Edge; u->head = hg.nodes_by_id[4]; u->tails.push_back(hg.nodes_by_id[0]); u->score = 1.0; + hg.nodes_by_id[4]->incoming.push_back(u); + hg.nodes_by_id[0]->outgoing.push_back(u); + u->arity = 1; + u->mark = 0; + hg.edges.push_back(u); + + Edge* v = new Edge; v->head = hg.nodes_by_id[4]; v->tails.push_back(hg.nodes_by_id[3]); v->score = 1.0; + hg.nodes_by_id[4]->incoming.push_back(v); + hg.nodes_by_id[3]->outgoing.push_back(v); + v->arity = 1; + v->mark = 0; + hg.edges.push_back(v); + + Edge* w = new Edge; w->head = hg.nodes_by_id[4]; w->tails.push_back(hg.nodes_by_id[3]); w->score = 2.71828182846; + hg.nodes_by_id[4]->incoming.push_back(w); + hg.nodes_by_id[3]->outgoing.push_back(w); + w->arity = 1; + w->mark = 0; + hg.edges.push_back(w); + + Edge* x = new Edge; x->head = hg.nodes_by_id[5]; x->tails.push_back(hg.nodes_by_id[4]); x->score = 1.0; + hg.nodes_by_id[5]->incoming.push_back(x); + hg.nodes_by_id[4]->outgoing.push_back(x); + x->arity = 1; + x->mark = 0; + hg.edges.push_back(x); + + Edge* y = new Edge; y->head = hg.nodes_by_id[6]; y->tails.push_back(hg.nodes_by_id[2]); y->tails.push_back(hg.nodes_by_id[5]); y->score = 1.0; + hg.nodes_by_id[6]->incoming.push_back(y); + hg.nodes_by_id[2]->outgoing.push_back(y); + hg.nodes_by_id[5]->outgoing.push_back(y); + y->arity = 2; + y->mark = 0; + hg.edges.push_back(y); + + Edge* z = new Edge; z->head = hg.nodes_by_id[7]; z->tails.push_back(hg.nodes_by_id[1]); z->tails.push_back(hg.nodes_by_id[6]); z->score = 1.0; + hg.nodes_by_id[7]->incoming.push_back(z); + hg.nodes_by_id[1]->outgoing.push_back(z); + hg.nodes_by_id[6]->outgoing.push_back(z); + z->arity = 2; + z->mark = 0; + hg.edges.push_back(z); +} + +} // namespace } // namespace diff --git a/fast/hypergraph.hh b/fast/hypergraph.hh index 68cca19..2e30911 100644 --- a/fast/hypergraph.hh +++ b/fast/hypergraph.hh @@ -1,5 +1,4 @@ -#ifndef HYPERGRAPH_HH -#define HYPERGRAPH_HH +#pragma once #include "grammar.hh" #include "semiring.hh" @@ -12,8 +11,10 @@ #include <functional> #include <algorithm> #include <iterator> +#include <fstream> -#include "msgpack-c/include/msgpack.hpp" +#include "dummyvector.h" +#include <msgpack.hpp> using namespace std; @@ -23,61 +24,78 @@ typedef double weight_t; namespace Hg { -class Node; +struct Node; -class Edge { - public: - Node* head; - vector<Node*> tails; - score_t score; - //Grammar::Rule rule; FIXME - vector<weight_t> f; - unsigned int arity; - unsigned int mark; +struct Edge { + Node* head; + vector<Node*> tails; + score_t score; + string rule; //FIXME + DummyVector f; //FIXME + unsigned int arity; + unsigned int mark; - bool is_marked(); - friend std::ostream& operator<<(std::ostream& os, const Edge& s); + bool is_marked(); + friend std::ostream& operator<<(std::ostream& os, const Edge& s); - size_t head_id_; - vector<size_t> tails_ids_; // node ids - MSGPACK_DEFINE(head_id_, tails_ids_, score, f, arity); + size_t head_id_; + vector<size_t> tails_ids_; // node ids + + MSGPACK_DEFINE(head_id_, tails_ids_, score, f, arity); }; -class Node { - public: - size_t id; - string symbol; - unsigned short left; - unsigned short right; - score_t score; - vector<Edge*> incoming; - vector<Edge*> outgoing; - unsigned int mark; - - bool is_marked(); - friend std::ostream& operator<<(std::ostream& os, const Node& n); - - vector<size_t> incoming_ids_; // edge ids - vector<size_t> outgoing_ids_; // edge ids - MSGPACK_DEFINE(id, symbol, left, right, score, incoming_ids_, outgoing_ids_); +struct Node { + size_t id; + string symbol; + unsigned short left; + unsigned short right; + score_t score; + vector<Edge*> incoming; + vector<Edge*> outgoing; + unsigned int mark; + + bool is_marked(); + friend std::ostream& operator<<(std::ostream& os, const Node& n); + + vector<size_t> incoming_ids_; // edge ids + vector<size_t> outgoing_ids_; // edge ids + MSGPACK_DEFINE(id, symbol, left, right, score, incoming_ids_, outgoing_ids_); }; -class Hypergraph { - public: - list<Node*> nodes; - vector<Edge*> edges; - unordered_map<size_t, Node*> nodes_by_id; - unsigned int arity; +struct Hypergraph { + list<Node*> nodes; + vector<Edge*> edges; + unordered_map<size_t, Node*> nodes_by_id; + unsigned int arity; - void reset(); - void add_node(Node* n) { nodes.push_back(n); nodes_by_id[n->id] = n; } + void add_node(Node* n) { nodes.push_back(n); nodes_by_id[n->id] = n; } }; -void topological_sort(list<Node*>& nodes, list<Node*>::iterator root); -void viterbi(Hypergraph& hg); +void +reset(); + +template<typename Semiring> void +init(list<Node*>& nodes, list<Node*>::iterator root, Semiring& semiring); + +void +topological_sort(list<Node*>& nodes, list<Node*>::iterator root); + +void +viterbi(Hypergraph& hg); + +namespace io { +void +read(Hypergraph& hg, string fn); + +void +write(Hypergraph& hg, string fn); + +void +manual(Hypergraph& hg); } // namespace -#endif + +} // namespace diff --git a/fast/json-cpp.hpp b/fast/json-cpp.hpp new file mode 100644 index 0000000..851a4f4 --- /dev/null +++ b/fast/json-cpp.hpp @@ -0,0 +1,1231 @@ +// +// DO NOT EDIT !!! This file was generated with a script. +// +// JSON for C++ +// https://github.com/ascheglov/json-cpp +// Version 0.1 alpha, rev. 170121e2dc099895064305e38bfb25d90a807ce3 +// Generated 2014-03-27 17:16:47.104492 UTC +// +// Belongs to the public domain + +#pragma once + +//---------------------------------------------------------------------- +// json-cpp.hpp begin + +//---------------------------------------------------------------------- +// json-cpp/parse.hpp begin + +#include <memory> +#include <istream> +#include <iterator> +#include <string> +#include <type_traits> + +//---------------------------------------------------------------------- +// json-cpp/ParserError.hpp begin + +#include <cassert> +#include <cstddef> +#include <exception> +#include <string> + +#if defined _MSC_VER +# define JSONCPP_INTERNAL_NOEXCEPT_ throw() +#else +# define JSONCPP_INTERNAL_NOEXCEPT_ noexcept +#endif + +namespace jsoncpp +{ + class ParserError : public std::exception + { + public: + enum Type + { + NoError, + Eof, UnexpectedCharacter, + InvalidEscapeSequence, NoTrailSurrogate, + UnexpectedType, UnknownField, + NumberIsOutOfRange, + }; + + ParserError(Type type, std::size_t line, std::size_t column) + : m_type{type}, m_line{line}, m_column{column} + { + assert(type != NoError); + } + + virtual const char* what() const JSONCPP_INTERNAL_NOEXCEPT_ override + { + if (m_what.empty()) + { + m_what = "JSON parser error at line "; + m_what += std::to_string(m_line); + m_what += ", column "; + m_what += std::to_string(m_column); + switch (m_type) + { + case Eof: m_what += ": unexpected end of file"; break; + case UnexpectedCharacter: m_what += ": unexpected character"; break; + case InvalidEscapeSequence: m_what += ": invalid escape sequence"; break; + case NoTrailSurrogate: m_what += ": no UTF-16 trail surrogate"; break; + case UnexpectedType: m_what += ": unexpected value type"; break; + case UnknownField: m_what += ": unknown field name"; break; + case NumberIsOutOfRange: m_what += ": number is out of range"; break; + case NoError: + default: + m_what += ": INTERNAL ERROR"; break; + } + } + + return m_what.c_str(); + } + + Type type() const { return m_type; } + std::size_t line() const { return m_line; } + std::size_t column() const { return m_column; } + + private: + Type m_type; + std::size_t m_line; + std::size_t m_column; + + mutable std::string m_what; + }; +} + +#undef JSONCPP_INTERNAL_NOEXCEPT_ + +// json-cpp/ParserError.hpp end +//---------------------------------------------------------------------- + +//---------------------------------------------------------------------- +// json-cpp/Stream.hpp begin + +namespace jsoncpp +{ + template<class Traits> + class Stream; + + namespace details + { + template<typename CharT, class X> + struct Traits2 {}; + + template<class Traits> + struct ParserTraits {}; + + template<class Traits> + struct GeneratorTraits {}; + } + + template<class X> + using Parser = Stream<details::ParserTraits<X>>; + + template<class X> + using Generator = Stream<details::GeneratorTraits<X>>; + + template<typename X, typename T> + inline auto serialize(Stream<X>& stream, T& value) -> decltype(value.serialize(stream), void()) + { + value.serialize(stream); + } +} +// json-cpp/Stream.hpp end +//---------------------------------------------------------------------- + +//---------------------------------------------------------------------- +// json-cpp/value_types.hpp begin + +namespace jsoncpp +{ + // Helper masks + const auto TypeIsNotFundamental = 0x40; + const auto TypeIsCollection = 0x80; + + enum class Type + { + Undefined = 0, // Helper type for debugging variant-like types + Null = 0x01, + Boolean = 0x02, + Number = 0x04, + String = 0x08 | TypeIsNotFundamental, + Array = 0x10 | TypeIsNotFundamental | TypeIsCollection, + Object = 0x20 | TypeIsNotFundamental | TypeIsCollection, + }; +} +// json-cpp/value_types.hpp end +//---------------------------------------------------------------------- + +//---------------------------------------------------------------------- +// json-cpp/details/parser_utility.hpp begin + +#include <cassert> +#include <cstddef> +#include <utility> + +namespace jsoncpp { namespace details +{ + template<typename CharT> + struct CStrIterator + { + using this_type = CStrIterator<CharT>; + + CStrIterator() + { + static CharT null{0}; + m_ptr = &null; + } + + CStrIterator(const CharT* ptr) : m_ptr{ptr} {} + + const CharT& operator*() { return *m_ptr; } + const CharT* operator->() { return m_ptr; } + + this_type& operator++() + { + assert(!isEnd()); + ++m_ptr; + return *this; + } + + this_type operator++(int) { auto temp = *this; ++*this; return temp; } + + bool operator==(const this_type& rhs) const { return isEnd() == rhs.isEnd(); } + bool operator!=(const this_type& rhs) const { return !this->operator==(rhs); } + + private: + const CharT* m_ptr; + + bool isEnd() const { return *m_ptr == 0; } + }; + + class Diagnostics + { + public: + void nextColumn() { ++m_column; } + void newLine() { ++m_line; m_column = 0; } + + ParserError makeError(ParserError::Type type) const + { + return{type, m_line, m_column}; + } + + private: + std::size_t m_column{0}; + std::size_t m_line{1}; + }; + + template<typename InputIterator> + struct Reader + { + using this_type = Reader<InputIterator>; + + Reader(InputIterator first, InputIterator last) : m_iter(first), m_end(last) + { + checkEnd(); + } + + char operator*() { return *m_iter; } + this_type& operator++() + { + checkEnd(); + ++m_iter; + m_diag.nextColumn(); + return *this; + } + + void checkEnd() + { + if (m_iter == m_end) + throw m_diag.makeError(ParserError::Eof); + } + + char getNextChar() + { + auto prev = *m_iter; + ++*this; + return prev; + } + + Diagnostics m_diag; + InputIterator m_iter, m_end; + }; +}} + +// json-cpp/details/parser_utility.hpp end +//---------------------------------------------------------------------- + +//---------------------------------------------------------------------- +// json-cpp/details/number_parser.hpp begin + +#include <cmath> + +namespace jsoncpp { namespace details +{ + inline bool isDigit(char c) { return c >= '0' && c <= '9'; } + + template<typename Iterator> + inline unsigned parseIntNumber(Iterator& iter) + { + auto intPart = 0U; // TBD: 0ULL ? + + do + { + intPart = intPart * 10 + (*iter - '0'); + + ++iter; + } + while (isDigit(*iter)); + + return intPart; + } + + template<typename Iterator> + inline double parseRealNumber(Iterator& iter) + { + double number = 0; + + if (*iter == '0') + { + ++iter; + } + else + { + number = parseIntNumber(iter); + } + + // here `ch` is a peeked character, need to call eat() + + if (*iter == '.') + { + ++iter; + + auto mul = 0.1; + while (isDigit(*iter)) + { + number += (*iter - '0') * mul; + mul /= 10; + ++iter; + } + } + + // here `ch` is a peeked character, need to call eat() + + if (*iter == 'e' || *iter == 'E') + { + ++iter; + + auto negate = *iter == '-'; + if (negate || *iter == '+') + ++iter; + // FIXME: check `ch` for non-digit + + auto e = parseIntNumber(iter); + + if (negate) + number /= std::pow(10, e); + else + number *= std::pow(10, e); + } + + return number; + } +}} +// json-cpp/details/number_parser.hpp end +//---------------------------------------------------------------------- + +//---------------------------------------------------------------------- +// json-cpp/details/string_parser.hpp begin + +#include <string> + +namespace jsoncpp { namespace details +{ + inline char32_t utf16SurrogatePairToUtf32(char32_t lead, char32_t trail) + { + return 0x10000 | (lead - 0xD800) << 10 | (trail - 0xDC00); + } + + inline void utf32ToUtf8(char32_t c, std::string& str) + { + auto add = [&str](char32_t c){ str.push_back(static_cast<char>(c)); }; + + if (c < 0x80) + { + add(c); + } + else if (c < 0x800) + { + add(0xC0 | c >> 6); + add(0x80 | (c & 0x3f)); + } + else if (c < 0x10000) + { + add(0xE0 | c >> 12); + add(0x80 | ((c >> 6) & 0x3f)); + add(0x80 | (c & 0x3f)); + } + else if (c < 0x200000) + { + add(0xF0 | c >> 18); + add(0x80 | ((c >> 12) & 0x3f)); + add(0x80 | ((c >> 6) & 0x3f)); + add(0x80 | (c & 0x3f)); + } + else if (c < 0x4000000) + { + add(0xF8 | c >> 24); + add(0x80 | ((c >> 18) & 0x3f)); + add(0x80 | ((c >> 12) & 0x3f)); + add(0x80 | ((c >> 6) & 0x3f)); + add(0x80 | (c & 0x3f)); + } + else + { + add(0xFC | c >> 30); + add(0x80 | ((c >> 24) & 0x3f)); + add(0x80 | ((c >> 18) & 0x3f)); + add(0x80 | ((c >> 12) & 0x3f)); + add(0x80 | ((c >> 6) & 0x3f)); + add(0x80 | (c & 0x3f)); + } + } + + enum class CharType { Raw, CodePoint, UTF16Pair }; + + template<typename CharT, std::size_t CharSize> + inline void addToStr(std::basic_string<CharT>& str, CharType type, char32_t c1, char32_t c2); + + template<> + inline void addToStr<char, 1>(std::basic_string<char>& str, CharType type, char32_t c1, char32_t c2) + { + if (type == CharType::Raw) + { + str.push_back(static_cast<char>(c1)); + } + else if (type == CharType::CodePoint) + { + utf32ToUtf8(c1, str); + } + else + { + auto c32 = utf16SurrogatePairToUtf32(c1, c2); + utf32ToUtf8(c32, str); + } + } + + template<> + inline void addToStr<wchar_t, 2>(std::basic_string<wchar_t>& str, CharType type, char32_t c1, char32_t c2) + { + str.push_back(static_cast<wchar_t>(c1)); + if (type == CharType::UTF16Pair) + str.push_back(static_cast<wchar_t>(c2)); + } + + template<> + inline void addToStr<wchar_t, 4>(std::basic_string<wchar_t>& str, CharType type, char32_t c1, char32_t c2) + { + auto c = (type == CharType::UTF16Pair) ? utf16SurrogatePairToUtf32(c1, c2) : c1; + str.push_back(static_cast<wchar_t>(c)); + } + + template<typename Iterator> + inline int parseHexDigit(Iterator& iter, ParserError::Type& err) + { + auto ch = *iter; + ++iter; + if (ch >= '0' && ch <= '9') return ch - '0'; + if (ch >= 'A' && ch <= 'F') return ch - 'A' + 10; + if (ch >= 'a' && ch <= 'f') return ch - 'a' + 10; + + err = ParserError::InvalidEscapeSequence; + return 0; + } + + template<typename Iterator> + inline char32_t parseUTF16CodeUnit(Iterator& iter, ParserError::Type& err) + { + auto n = parseHexDigit(iter, err) << 12; + n |= parseHexDigit(iter, err) << 8; + n |= parseHexDigit(iter, err) << 4; + n |= parseHexDigit(iter, err); + return static_cast<char32_t>(n); + } + + template<typename Iterator, typename CharT> + inline ParserError::Type parseStringImpl(Iterator& iter, std::basic_string<CharT>& str) + { + str.clear(); + auto add = [&str](CharType type, char32_t c1, char32_t c2) + { + addToStr<CharT, sizeof(CharT)>(str, type, c1, c2); + }; + + for (;;) + { + auto ch = static_cast<char32_t>(*iter); + ++iter; + if (ch == '"') + return ParserError::NoError; + + if (ch == '\\') + { + ch = static_cast<char32_t>(*iter); + ++iter; + switch (ch) + { + case '\\': case '"': case '/': + break; + + case 'b': ch = '\b'; break; + case 'f': ch = '\f'; break; + case 'n': ch = '\n'; break; + case 'r': ch = '\r'; break; + case 't': ch = '\t'; break; + + case 'u': + { + ParserError::Type err{ParserError::NoError}; + auto codeUnit = parseUTF16CodeUnit(iter, err); + if (err != ParserError::NoError) + return err; + + if (codeUnit >= 0xD800 && codeUnit < 0xDC00) + { + if (*iter != '\\') return ParserError::NoTrailSurrogate; + ++iter; + if (*iter != 'u') return ParserError::NoTrailSurrogate; + ++iter; + + auto trailSurrogate = parseUTF16CodeUnit(iter, err); + if (err != ParserError::NoError) + return err; + + add(CharType::UTF16Pair, codeUnit, trailSurrogate); + } + else + { + add(CharType::CodePoint, codeUnit, 0); + } + } + continue; + + default: + return ParserError::InvalidEscapeSequence; + } + } + + add(CharType::Raw, ch, 0); + } + } +}} + +// json-cpp/details/string_parser.hpp end +//---------------------------------------------------------------------- + +namespace jsoncpp +{ + template<typename CharT, typename InputIterator> + class Stream<details::ParserTraits<details::Traits2<CharT, InputIterator>>> + { + public: + using this_type = Parser<details::Traits2<CharT, InputIterator>>; + + explicit Stream(InputIterator first, InputIterator last) + : m_reader{first, last} + { + nextValue(); + } + + Type getType() const { return m_type; } + bool getBoolean() const { return m_boolean; } + double getNumber() const { return m_number; } + const std::string& getFieldName() const { return m_fieldName; } + + void checkType(Type type) const + { + if (getType() != type) + throw makeError(ParserError::UnexpectedType); + } + + bool isListEnd(char terminator) + { + eatWhitespace(); + if (*m_reader != terminator) + return false; + + ++m_reader; + return true; + } + + void eatListSeparator() + { + eatWhitespace(); + check(','); + eatWhitespace(); + } + + void nextNameValuePair() + { + eatWhitespace(); + check('"'); + parseString(m_fieldName); + eatWhitespace(); + check(':'); + nextValue(); + } + + void nextValue() + { + eatWhitespace(); + m_type = nextValueImpl(); + } + + template<typename DstCharT> + void parseString(std::basic_string<DstCharT>& str) + { + auto err = parseStringImpl(m_reader, str); + if (err != ParserError::NoError) + throw m_reader.m_diag.makeError(err); + } + + ParserError makeError(ParserError::Type type) const + { + return m_reader.m_diag.makeError(type); + } + + private: + Type nextValueImpl() + { + switch (*m_reader) + { + case '{': ++m_reader; return Type::Object; + case '[': ++m_reader; return Type::Array; + case 't': ++m_reader; checkLiteral("true"); m_boolean = true; return Type::Boolean; + case 'f': ++m_reader; checkLiteral("false"); m_boolean = false; return Type::Boolean; + case 'n': ++m_reader; checkLiteral("null"); return Type::Null; + case '"': ++m_reader; return Type::String; + + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + m_number = parseRealNumber(m_reader); + return Type::Number; + + case '-': + ++m_reader; + m_number = -parseRealNumber(m_reader); + return Type::Number; + } + + throw unexpectedCharacter(); + } + + ParserError unexpectedCharacter() const + { + return makeError(ParserError::UnexpectedCharacter); + } + + void check(char expectedChar) + { + if (*m_reader != expectedChar) + throw unexpectedCharacter(); + + ++m_reader; + } + + template<std::size_t N> + void checkLiteral(const char(&literal)[N]) + { + static_assert(N > 2, ""); + for (auto i = 1; i != N - 1; ++i, ++m_reader) + if (*m_reader != literal[i]) + throw unexpectedCharacter(); + } + + void eatWhitespace() + { + for (;; ++m_reader) + { + switch (*m_reader) + { + case '/': + ++m_reader; + check('/'); + while (*m_reader != '\n') + ++m_reader; + + // no break here + case '\n': + m_reader.m_diag.newLine(); + break; + + case ' ': case '\t': case '\r': + break; + + default: + return; + } + } + } + + details::Reader<InputIterator> m_reader; + + Type m_type; + double m_number; + bool m_boolean; + std::string m_fieldName; + }; + + template<class X> + inline void serialize(Parser<X>& parser, bool& value) + { + parser.checkType(Type::Boolean); + value = parser.getBoolean(); + } + + template<class X, typename T> + inline typename std::enable_if<std::is_arithmetic<T>::value>::type + serialize(Parser<X>& parser, T& value) + { + parser.checkType(Type::Number); + auto number = parser.getNumber(); + value = static_cast<T>(number); + if (value != number) + throw parser.makeError(ParserError::NumberIsOutOfRange); + } + + template<class X, typename DstCharT> + inline void serialize(Parser<X>& parser, std::basic_string<DstCharT>& value) + { + parser.checkType(Type::String); + parser.parseString(value); + } + + namespace details + { + template<class X, typename Callback> + inline void parseList(Parser<X>& parser, Type type, char terminator, Callback&& callback) + { + parser.checkType(type); + + while (!parser.isListEnd(terminator)) + { + callback(); + + if (parser.isListEnd(terminator)) + return; + + parser.eatListSeparator(); + } + } + } + + template<class X, typename Callback> + inline void parseObject(Parser<X>& parser, Callback&& callback) + { + details::parseList(parser, Type::Object, '}', [&] + { + parser.nextNameValuePair(); + callback(parser.getFieldName()); + }); + } + + template<class X, typename Callback> + void parseArray(Parser<X>& parser, Callback&& callback) + { + details::parseList(parser, Type::Array, ']', [&] + { + parser.nextValue(); + callback(); + }); + } + + template<typename CharT, class T, typename InputIterator> + inline void parse(T& object, InputIterator first, InputIterator last) + { + Parser<details::Traits2<CharT, InputIterator>> stream{first, last}; + serialize(stream, object); + } + + template<typename T, typename CharT> + inline void parse(T& object, const CharT* str) + { + details::CStrIterator<CharT> first{str}, last; + parse<CharT>(object, first, last); + } + + template<typename T, typename CharT> + inline void parse(T& object, std::basic_string<CharT>& str) + { + parse<CharT>(object, std::begin(str), std::end(str)); + } + + template<typename T, typename CharT> + inline void parse(T& object, std::basic_istream<CharT>& stream) + { + std::istreambuf_iterator<CharT> first{stream}, last; + parse<CharT>(object, first, last); + } +} + +// json-cpp/parse.hpp end +//---------------------------------------------------------------------- + +//---------------------------------------------------------------------- +// json-cpp/std_types.hpp begin + +#include <deque> +#include <forward_list> +#include <list> +#include <map> +#include <memory> +#include <set> +#include <type_traits> +#include <unordered_map> +#include <unordered_set> +#include <vector> + +//---------------------------------------------------------------------- +// json-cpp/generate.hpp begin + +#include <sstream> +#include <string> + +//---------------------------------------------------------------------- +// json-cpp/details/string_writer.hpp begin + +#include <string> + +namespace jsoncpp { namespace details +{ + template<typename SrcCharT, typename Sink> + inline void writeString(const std::basic_string<SrcCharT>& str, Sink&& sink) + { + sink('"'); + for (auto iter = std::begin(str), last = std::end(str); iter != last; ++iter) + { + switch (char32_t ch = static_cast<unsigned char>(*iter)) + { + case '"': sink('\\'); sink('"'); break; + case '\\': sink('\\'); sink('\\'); break; + case '\b': sink('\\'); sink('b'); break; + case '\f': sink('\\'); sink('f'); break; + case '\n': sink('\\'); sink('n'); break; + case '\r': sink('\\'); sink('r'); break; + case '\t': sink('\\'); sink('t'); break; + default: + if (ch < '\x20') + { + const auto table = "0123456789ABCDEF"; + unsigned n = static_cast<unsigned char>(ch); + sink('\\'); + sink('u'); + sink('0'); + sink('0'); + sink(table[n >> 4]); + sink(table[n & 15]); + } + else + { + sink(static_cast<char>(ch)); + } + } + } + sink('"'); + } +}} + +// json-cpp/details/string_writer.hpp end +//---------------------------------------------------------------------- + +namespace jsoncpp +{ + template<typename CharT, typename Sink> + class Stream<details::GeneratorTraits<details::Traits2<CharT, Sink>>> + { + public: + using this_type = Generator<details::Traits2<CharT, Sink>>; + + explicit Stream(Sink& sink) : m_sink(&sink) {} + + void objectBegin() + { + (*m_sink) << "{"; + } + + void fieldName(const char* name) + { + (*m_sink) << '"' << name << "\": "; + // TODO: use writeString (?) + } + + template<typename StrCharT> + void fieldName(const std::basic_string<StrCharT>& name) + { + (*m_sink) << '"' << name << "\": "; + // TODO: use writeString (?) + } + + void separator() + { + (*m_sink) << ", "; + } + + void objectEnd() + { + (*m_sink) << '}'; + } + + void arrayBegin() + { + (*m_sink) << '['; + } + + void arrayEnd() + { + (*m_sink) << ']'; + } + + friend void serialize(this_type& stream, std::nullptr_t) + { + (*stream.m_sink) << "null"; + } + + friend void serialize(this_type& stream, bool value) + { + (*stream.m_sink) << (value ? "true" : "false"); + } + + template<typename T> + friend typename std::enable_if<std::is_arithmetic<T>::value>::type serialize(this_type& stream, T& value) + { + (*stream.m_sink) << value; + } + + template<typename SrcCharT> + friend void serialize(this_type& stream, const std::basic_string<SrcCharT>& value) + { + details::writeString(value, [&stream](char c){ stream.m_sink->put(c); }); + } + + private: + Sink* m_sink; + }; + + template<class X, typename Pointer> + inline void writePointer(Generator<X>& generator, Pointer& ptr) + { + if (ptr) + { + serialize(generator, *ptr); + } + else + { + serialize(generator, nullptr); + } + } + + template<class X, typename Range> + inline void writeRange(Generator<X>& generator, Range& range) + { + generator.arrayBegin(); + + auto iter = std::begin(range); + const auto& last = std::end(range); + if (iter != last) + { + for (;;) + { + serialize(generator, *iter); + + ++iter; + if (iter == last) + break; + + generator.separator(); + } + } + + generator.arrayEnd(); + } + + template<class T> + inline std::string to_string(const T& object) + { + std::ostringstream rawStream; + Generator<details::Traits2<char, std::ostream>> stream{rawStream}; + serialize(stream, const_cast<T&>(object)); + return rawStream.str(); + } +} + +// json-cpp/generate.hpp end +//---------------------------------------------------------------------- + +namespace jsoncpp +{ + template<class X, typename T> + inline void serialize(Parser<X>& parser, std::shared_ptr<T>& obj) + { + if (parser.getType() != jsoncpp::Type::Null) + { + obj = std::make_shared<T>(); + serialize(parser, *obj); + } + else + { + obj.reset(); + } + } + + template<class X, typename T> + inline void serialize(Generator<X>& generator, std::shared_ptr<T>& obj) + { + writePointer(generator, obj); + } + + template<class X, typename T> + inline void serialize(Parser<X>& parser, std::unique_ptr<T>& obj) + { + if (parser.getType() != jsoncpp::Type::Null) + { + obj->reset(new T()); + serialize(parser, *obj); + } + else + { + obj.reset(); + } + } + + template<class X, typename T> + inline void serialize(Generator<X>& generator, std::unique_ptr<T>& obj) + { + writePointer(generator, obj); + } + + namespace details + { + template<class X, typename C> + inline void serializeContainer(Parser<X>& parser, C& c) + { + c.clear(); + + parseArray(parser, [&] + { + c.emplace_back(); + serialize(parser, c.back()); + }); + } + + template<class X, typename C> + inline void serializeContainer(Generator<X>& generator, C& c) + { + writeRange(generator, c); + } + + template<class X, typename C> + inline void serializeSet(Parser<X>& parser, C& c) + { + c.clear(); + + parseArray(parser, [&] + { + typename C::value_type value; + serialize(parser, value); + c.insert(value); + }); + } + + template<class X, typename C> + inline void serializeSet(Generator<X>& generator, C& c) + { + writeRange(generator, c); + } + + template<class X, typename C> + inline void serializeStrMap(Parser<X>& parser, C& c) + { + c.clear(); + + parseObject(parser, [&](const std::string& name) + { + serialize(parser, c[name]); + }); + } + + template<class X, typename C> + inline void serializeStrMap(Generator<X>& generator, C& c) + { + generator.objectBegin(); + + auto iter = std::begin(c); + const auto& last = std::end(c); + if (iter != last) + { + for (;;) + { + generator.fieldName(iter->first); + serialize(generator, iter->second); + + ++iter; + if (iter == last) + break; + + generator.separator(); + } + } + + generator.objectEnd(); + } + } + + template<class X, typename T> + inline void serialize(Stream<X>& stream, std::vector<T>& arr) + { details::serializeContainer(stream, arr); } + + template<class X, typename T> + inline void serialize(Stream<X>& stream, std::list<T>& arr) + { details::serializeContainer(stream, arr); } + + template<class X, typename T> + inline void serialize(Stream<X>& stream, std::forward_list<T>& arr) + { details::serializeContainer(stream, arr); } + + template<class X, typename T> + inline void serialize(Stream<X>& stream, std::deque<T>& arr) + { details::serializeContainer(stream, arr); } + + template<class X, typename T> + inline void serialize(Stream<X>& stream, std::set<T>& arr) + { details::serializeSet(stream, arr); } + + template<class X, typename T> + inline void serialize(Stream<X>& stream, std::unordered_set<T>& arr) + { details::serializeSet(stream, arr); } + + template<class X, typename T> + inline void serialize(Stream<X>& stream, std::map<std::string, T>& t) + { details::serializeStrMap(stream, t); } + + template<class X, typename T> + inline void serialize(Stream<X>& stream, std::unordered_map<std::string, T>& t) + { details::serializeStrMap(stream, t); } +} +// json-cpp/std_types.hpp end +//---------------------------------------------------------------------- + +//---------------------------------------------------------------------- +// json-cpp/serialization_helpers.hpp begin + +#include <array> +#include <unordered_map> + +namespace jsoncpp +{ + namespace details + { + template<class X, typename T> + inline void writeField(Generator<X>& generator, const char* name, T& value) + { + generator.fieldName(name); + serialize(generator, value); + } + + template<class X, typename T, typename... F> + inline void writeField(Generator<X>& generator, const char* name, T& value, F&&... fieldsDef) + { + writeField(generator, name, value); + generator.separator(); + writeField(generator, fieldsDef...); + } + + template<typename ParserT> + class FieldsTable + { + public: + template<typename... F> + FieldsTable(F&&... fieldsDef) + { + m_map.reserve(sizeof...(fieldsDef) / 2); + add(1, fieldsDef...); + } + + struct FieldInfo + { + template<typename T> + FieldInfo(T&, std::size_t idx) + { + m_fieldIdx = idx; + m_parseFn = [](ParserT& parser, void* fieldPtr) + { + serialize(parser, static_cast<T&>(*reinterpret_cast<T*>(fieldPtr))); + }; + } + + std::size_t m_fieldIdx; + void(*m_parseFn)(ParserT& parser, void* fieldPtr); + }; + + const FieldInfo* find(const std::string& name) const + { + auto it = m_map.find(name); + return it == m_map.end() ? nullptr : &it->second; + } + + private: + template<typename T, typename... F> + void add(std::size_t idx, const char* name, T& value, F&&... otherFields) + { + m_map.emplace(name, FieldInfo(value, idx)); + add(idx + 2, otherFields...); + } + + void add(std::size_t /*idx*/) {} + + std::unordered_map<std::string, FieldInfo> m_map; + }; + + inline void* makePtrs(const char*) { return nullptr; } + + template<typename T> + inline void* makePtrs(T& obj) { return &obj; } + } + + template<class Cls, class X, typename... F> + inline void fields(Cls&, Parser<X>& parser, F&&... fieldsDef) + { + std::array<void*, sizeof...(fieldsDef)> ptrs{details::makePtrs(fieldsDef)...}; + + static const details::FieldsTable<Parser<X>> table{fieldsDef...}; + + auto&& handler = [&](const std::string& fieldName) + { + auto fieldInfo = table.find(fieldName); + if (fieldInfo == nullptr) + throw parser.makeError(ParserError::UnknownField); + + auto fieldPtr = ptrs[fieldInfo->m_fieldIdx]; + fieldInfo->m_parseFn(parser, fieldPtr); + }; + + parseObject(parser, handler); + } + + template<class Cls, class X, typename... F> + inline void fields(Cls&, Generator<X>& generator, F&&... fieldsDef) + { + generator.objectBegin(); + details::writeField(generator, fieldsDef...); + generator.objectEnd(); + } +} + +// json-cpp/serialization_helpers.hpp end +//---------------------------------------------------------------------- + +// json-cpp.hpp end +//---------------------------------------------------------------------- + diff --git a/fast/main.cc b/fast/main.cc index 372f0f1..a7b5837 100644 --- a/fast/main.cc +++ b/fast/main.cc @@ -2,140 +2,11 @@ int -main(void) +main(int argc, char** argv) { -/* -{ -"weights":{"logp":2.0,"use_house":0.0,"use_shell":1.0}, -"nodes": -[ -{ "id":0, "cat":"root", "span":[0,0] }, -{ "id":1, "cat":"NP", "span":[1,2] }, -{ "id":2, "cat":"V", "span":[2,3] }, -{ "id":3, "cat":"JJ", "span":[4,5] }, -{ "id":4, "cat":"NN", "span":[4,6] }, -{ "id":5, "cat":"NP", "span":[3,6] }, -{ "id":6, "cat":"VP", "span":[2,6] }, -{ "id":7, "cat":"S", "span":[1,6] } -], -"edges": -[ -{ "head":1, "rule":"[NP] ||| ich ||| i", "tails":[0], "f":{"logp":-0.5,"use_i":1.0} }, -{ "head":2, "rule":"[V] ||| sah ||| saw", "tails":[0], "f":{"logp":-0.25,"use_saw":1.0} }, -{ "head":3, "rule":"[JJ] ||| kleines ||| small", "tails":[0], "f":{"logp":0.0,"use_small":1.0} }, -{ "head":3, "rule":"[JJ] ||| kleines ||| little", "tails":[0], "f":{"logp":0.0,"use_little":1.0} }, -{ "head":4, "rule":"[NN] ||| kleines haus ||| small house", "tails":[0], "f":{"logp":0.0,"use_house":1.0} }, -{ "head":4, "rule":"[NN] ||| kleines haus ||| little house", "tails":[0], "f":{"logp":0.0,"use_house":1.0} }, -{ "head":4, "rule":"[NN] ||| [JJ,1] haus ||| [JJ,1] shell", "tails":[3], "f":{"logp":0.0,"use_shell":1.0} }, -{ "head":4, "rule":"[NN] ||| [JJ,1] haus ||| [JJ,1] house", "tails":[3], "f":{"logp":0.0,"use_house":1.0} }, -{ "head":5, "rule":"[NP] ||| ein [NN,1] ||| a [NN,1]", "tails":[4], "f":{"logp":0.0,"use_a":1.0} }, -{ "head":6, "rule":"[VP] ||| [V,1] [NP,2] ||| [V,1] [NP,2]", "tails":[2, 5], "f":{"logp":0.0} }, -{ "head":7, "rule":"[S] ||| [NP,1] [VP,2] ||| [NP,1] [VP,2]", "tails":[1, 6], "f":{"logp":0.0} } -] -} -*/ Hg::Hypergraph hg; - - // nodes - Hg::Node a; a.id = 0; a.symbol = "root"; a.left = false; a.right = false; a.mark = 0; - Hg::Node b; b.id = 1; b.symbol = "NP"; b.left = 0; b.right = 1; b.mark = 0; - Hg::Node c; c.id = 2; c.symbol = "V"; c.left = 1; c.right = 2; c.mark = 0; - Hg::Node d; d.id = 3; d.symbol = "JJ"; d.left = 3; d.right = 4; d.mark = 0; - Hg::Node e; e.id = 4; e.symbol = "NN"; e.left = 3; e.right = 5; e.mark = 0; - Hg::Node f; f.id = 5; f.symbol = "NP"; f.left = 2; f.right = 5; f.mark = 0; - Hg::Node g; g.id = 6; g.symbol = "NP"; g.left = 1; g.right = 5; g.mark = 0; - Hg::Node h; h.id = 7; h.symbol = "S"; h.left = 0; h.right = 6; h.mark = 0; - - hg.add_node(&a); - hg.add_node(&h); - hg.add_node(&g); - hg.add_node(&c); - hg.add_node(&d); - hg.add_node(&f); - hg.add_node(&b); - hg.add_node(&e); - - // edges - Hg::Edge q; q.head = hg.nodes_by_id[1]; q.tails.push_back(hg.nodes_by_id[0]); q.score = 0.367879441171; - hg.nodes_by_id[1]->incoming.push_back(&q); - hg.nodes_by_id[0]->outgoing.push_back(&q); - q.arity = 1; - q.mark = 0; - hg.edges.push_back(&q); - - Hg::Edge p; p.head = hg.nodes_by_id[2]; p.tails.push_back(hg.nodes_by_id[0]); p.score = 0.606530659713; - hg.nodes_by_id[2]->incoming.push_back(&p); - hg.nodes_by_id[0]->outgoing.push_back(&p); - p.arity = 1; - p.mark = 0; - hg.edges.push_back(&p); - - Hg::Edge r; r.head = hg.nodes_by_id[3]; r.tails.push_back(hg.nodes_by_id[0]); r.score = 1.0; - hg.nodes_by_id[3]->incoming.push_back(&r); - hg.nodes_by_id[0]->outgoing.push_back(&r); - r.arity = 1; - r.mark = 0; - hg.edges.push_back(&r); - - Hg::Edge s; s.head = hg.nodes_by_id[3]; s.tails.push_back(hg.nodes_by_id[0]); s.score = 1.0; - hg.nodes_by_id[3]->incoming.push_back(&s); - hg.nodes_by_id[0]->outgoing.push_back(&s); - s.arity = 1; - s.mark = 0; - hg.edges.push_back(&s); - - Hg::Edge t; t.head = hg.nodes_by_id[4]; t.tails.push_back(hg.nodes_by_id[0]); t.score = 1.0; - hg.nodes_by_id[4]->incoming.push_back(&t); - hg.nodes_by_id[0]->outgoing.push_back(&t); - t.arity = 1; - t.mark = 0; - hg.edges.push_back(&t); - - Hg::Edge u; u.head = hg.nodes_by_id[4]; u.tails.push_back(hg.nodes_by_id[0]); u.score = 1.0; - hg.nodes_by_id[4]->incoming.push_back(&u); - hg.nodes_by_id[0]->outgoing.push_back(&u); - u.arity = 1; - u.mark = 0; - hg.edges.push_back(&u); - - Hg::Edge v; v.head = hg.nodes_by_id[4]; v.tails.push_back(hg.nodes_by_id[3]); v.score = 1.0; - hg.nodes_by_id[4]->incoming.push_back(&v); - hg.nodes_by_id[3]->outgoing.push_back(&v); - v.arity = 1; - v.mark = 0; - hg.edges.push_back(&v); - - Hg::Edge w; w.head = hg.nodes_by_id[4]; w.tails.push_back(hg.nodes_by_id[3]); w.score = 2.71828182846; - hg.nodes_by_id[4]->incoming.push_back(&w); - hg.nodes_by_id[3]->outgoing.push_back(&w); - w.arity = 1; - w.mark = 0; - hg.edges.push_back(&w); - - Hg::Edge x; x.head = hg.nodes_by_id[5]; x.tails.push_back(hg.nodes_by_id[4]); x.score = 1.0; - hg.nodes_by_id[5]->incoming.push_back(&x); - hg.nodes_by_id[4]->outgoing.push_back(&x); - x.arity = 1; - x.mark = 0; - hg.edges.push_back(&x); - - Hg::Edge y; y.head = hg.nodes_by_id[6]; y.tails.push_back(hg.nodes_by_id[2]); y.tails.push_back(hg.nodes_by_id[5]); y.score = 1.0; - hg.nodes_by_id[6]->incoming.push_back(&y); - hg.nodes_by_id[2]->outgoing.push_back(&y); - hg.nodes_by_id[5]->outgoing.push_back(&y); - y.arity = 2; - y.mark = 0; - hg.edges.push_back(&y); - - Hg::Edge z; z.head = hg.nodes_by_id[7]; z.tails.push_back(hg.nodes_by_id[1]); z.tails.push_back(hg.nodes_by_id[6]); z.score = 1.0; - hg.nodes_by_id[7]->incoming.push_back(&z); - hg.nodes_by_id[1]->outgoing.push_back(&z); - hg.nodes_by_id[6]->outgoing.push_back(&z); - z.arity = 2; - z.mark = 0; - hg.edges.push_back(&z); - - Hg::topological_sort(hg.nodes, hg.nodes.begin()); - //Hg::viterbi(nodes, hg.nodes, hg.nodes_by_id(0]); + //Hg::io::manual(hg); + Hg::io::read(hg, argv[1]); + //Hg::viterbi(hg); } diff --git a/fast/make_paks.cc b/fast/make_paks.cc new file mode 100644 index 0000000..6fe7fae --- /dev/null +++ b/fast/make_paks.cc @@ -0,0 +1,112 @@ +#include <iostream> +#include <fstream> +#include <string> +#include <msgpack.hpp> +#include <msgpack/fbuffer.h> +#include <msgpack/fbuffer.hpp> +#include <unordered_map> + +#include "json-cpp.hpp" +#include "hypergraph.hh" +#include "dummyvector.h" + +using namespace std; + + +struct DummyNode { + int id; + string cat; + vector<int> span; +}; + +struct DummyEdge { + int head; + string rule; + vector<size_t> tails; + DummyVector f; + score_t weight; +}; + +struct DummyHg { + vector<DummyNode> nodes; + vector<DummyEdge> edges; + DummyVector weights; +}; + +template<typename X> inline void +serialize(jsoncpp::Stream<X>& stream, DummyNode& o) +{ + fields(o, stream, "id", o.id, "cat", o.cat, "span", o.span); +} + +template<typename X> inline void +serialize(jsoncpp::Stream<X>& stream, DummyEdge& o) +{ + fields(o, stream, "head", o.head, "rule", o.rule, "tails", o.tails, "f", o.f, "weight", o.weight); +} + +template<typename X> inline void +serialize(jsoncpp::Stream<X>& stream, DummyHg& o) +{ + fields(o, stream, "nodes", o.nodes, "edges", o.edges, "weights", o.weights); +} + +template<typename X> inline void +serialize(jsoncpp::Stream<X>& stream, DummyVector& o) +{ + fields(o, stream, "EgivenFCoherent", o.EgivenFCoherent, "SampleCountF", o.SampleCountF, "CountEF", o.CountEF, "MaxLexFgivenE", o.MaxLexFgivenE, "MaxLexEgivenF", o.MaxLexEgivenF, "IsSingletonF", o.IsSingletonF, "IsSingletonFE", o.IsSingletonFE, "LanguageModel", o.LanguageModel, "LanguageModel_OOV", o.LanguageModel_OOV, "PassThrough", o.PassThrough, "PassThrough_1", o.PassThrough_1, "PassThrough_2", o.PassThrough_2, "PassThrough_3", o.PassThrough_3, "PassThrough_4", o.PassThrough_4, "PassThrough_5", o.PassThrough_5, "PassThrough_6", o.PassThrough_6, "WordPenalty", o.WordPenalty, "Glue", o.Glue); +} + + + +int +main(int argc, char** argv) +{ + ifstream ifs(argv[1]); + string json_str((istreambuf_iterator<char>(ifs) ), + (istreambuf_iterator<char>())); + + DummyHg hg; + vector<DummyNode> nodes; + hg.nodes = nodes; + vector<DummyEdge> edges; + hg.edges = edges; + DummyVector w; + hg.weights = w; + jsoncpp::parse(hg, json_str); + + vector<Hg::Node*> nodes_; + for (auto it = hg.nodes.begin(); it != hg.nodes.end(); ++it) { + Hg::Node* n = new Hg::Node; + n->id = it->id; + n->symbol = it->cat; + n->left = it->span[0]; + n->right = it->span[1]; + nodes_.push_back(n); + } + + vector<Hg::Edge*> edges_; + for (auto it = hg.edges.begin(); it != hg.edges.end(); ++it) { + Hg::Edge* e = new Hg::Edge; + e->head_id_ = it->head; + e->tails_ids_ = it->tails; + e->score = it->weight; + e->rule = it->rule; + e->f = it->f; + edges_.push_back(e); + } + + FILE* file = fopen(argv[2], "wb"); + msgpack::fbuffer fbuf(file); + msgpack::pack(fbuf, hg.nodes.size()); + msgpack::pack(fbuf, hg.edges.size()); + for (auto it = nodes_.begin(); it != nodes_.end(); ++it) + msgpack::pack(fbuf, **it); + for (auto it = edges_.begin(); it != edges_.end(); ++it) + msgpack::pack(fbuf, **it); + + fclose(file); + + return 0; +} + diff --git a/fast/read_pak.cc b/fast/read_pak.cc new file mode 100644 index 0000000..81eed5d --- /dev/null +++ b/fast/read_pak.cc @@ -0,0 +1,26 @@ +#include <msgpack.hpp> +#include <iostream> +#include <fstream> + +using namespace std; + + +int +main(int argc, char** argv) +{ + ifstream ifs(argv[1]); + size_t i = 0, nn, ne; + msgpack::unpacker pac; + while(true) { + pac.reserve_buffer(32*1024); + size_t bytes = ifs.readsome(pac.buffer(), pac.buffer_capacity()); + pac.buffer_consumed(bytes); + msgpack::unpacked result; + while(pac.next(&result)) { + msgpack::object o = result.get(); + cout << o << endl; + } + + if (!bytes) break; + } +} diff --git a/fast/semiring.hh b/fast/semiring.hh index 2be19ea..5874e88 100644 --- a/fast/semiring.hh +++ b/fast/semiring.hh @@ -1,37 +1,36 @@ -#ifndef SEMIRING_HH -#define SEMIRING_HH -//#pragma once +#pragma once + +namespace Semiring { template<typename T> -class ViterbiSemiring { - public: - T one = 1.0; - T null = 0.0; - - T add(T x, T y); - T multiply(T x, T y); - T convert(T x); +struct Viterbi { + T one = 1.0; + T null = 0.0; + + T add(T x, T y); + T multiply(T x, T y); + T convert(T x); }; template<typename T> T -ViterbiSemiring<T>::add(T x, T y) +Viterbi<T>::add(T x, T y) { return max(x, y); } template<typename T> T -ViterbiSemiring<T>::multiply(T x, T y) +Viterbi<T>::multiply(T x, T y) { return x * y; } template<typename T> T -ViterbiSemiring<T>::convert(T x) +Viterbi<T>::convert(T x) { return (T)x; } -#endif +} // namespace |