diff options
Diffstat (limited to 'util')
-rw-r--r-- | util/Makefile | 14 | ||||
-rwxr-xr-x | util/cdec2json.py | 96 | ||||
m--------- | util/json-cpp | 0 | ||||
-rw-r--r-- | util/make_pak.cc | 104 | ||||
-rw-r--r-- | util/read_pak.cc | 27 |
5 files changed, 0 insertions, 241 deletions
diff --git a/util/Makefile b/util/Makefile deleted file mode 100644 index 30564fe..0000000 --- a/util/Makefile +++ /dev/null @@ -1,14 +0,0 @@ -COMPILER=clang - - -all: make_pak read_pak - -make_pak: make_pak.cc json-cpp/single_include/json-cpp.hpp ../fast/hypergraph.hh ../fast/weaver.hh - $(COMPILER) -std=c++11 -lstdc++ -lm -lmsgpack make_pak.cc -o make_pak - -read_pak: read_pak.cc - $(COMPILER) -std=c++11 -lstdc++ -lmsgpack read_pak.cc -o read_pak - -clean: - rm -f make_pak read_pak - diff --git a/util/cdec2json.py b/util/cdec2json.py deleted file mode 100755 index e7c8e93..0000000 --- a/util/cdec2json.py +++ /dev/null @@ -1,96 +0,0 @@ -#!/usr/bin/env python2 - -import cdec -import sys, argparse -import json -import gzip - - -#FIXME new format -# strings? -# map? -def hg2json(hg, weights): - """ - output a JSON representation of a cdec hypegraph - """ - res = '' - res += "{\n" - res += '"rules":[\n' - rules = [] - for i in hg.edges: - s = json.dumps(str(i.trule)) - try: - rules.index(s) - except: - rules.append(s) - res += ",\n".join(rules) - res += "\n],\n" - res += '"nodes":'+"\n" - res += "[\n" - a = [] - a.append( '{ "id":0, "symbol":"root", "span":[-1,-1] }' ) - for i in hg.nodes: - a.append('{ "id":%d, "symbol":"%s", "span":[%d,%d] }'%(i.id+1, i.cat, i.span[0], i.span[1])) - res += ",\n".join(a)+"\n" - res += "],\n" - res += '"edges":'+"\n" - res += "[\n" - a = [] - for i in hg.edges: - s = "{" - s += '"head":%d'%(i.head_node.id+1) - s += ', "rule":%s'%(rules.index(json.dumps(str(i.trule)))) - # f - #xs = ' "f":{' - #b = [] - #for j in i.feature_values: - # b.append( '"%s":%s'%(j[0], j[1]) ) - #xs += ", ".join(b) - #xs += "}," - # tails - if len(list(i.tail_nodes)) > 0: - s += ', "tails":[ %s ],'%(",".join([str(n.id+1) for n in i.tail_nodes])) - else: - s += ', "tails":[ 0 ],' - #s += xs - s += ' "score":%s }'%(i.prob) - a.append(s) - res += ",\n".join(a)+"\n" - res += "]\n" - res += "}\n" - return res - -def main(): - parser = argparse.ArgumentParser(description='get a proper json representation of cdec hypergraphs') - parser.add_argument('-c', '--config', required=True, help='decoder configuration') - parser.add_argument('-w', '--weights', required=True, help='feature weights') - parser.add_argument('-g', '--grammar', required=False, help='grammar') - args = parser.parse_args() - with open(args.config) as config: - config = config.read() - decoder = cdec.Decoder(config) - decoder.read_weights(args.weights) - ins = sys.stdin.readline().strip() - if args.grammar: - with gzip.open(args.grammar) as grammar: - grammar = grammar.read() - hg = decoder.translate(ins, grammar=grammar) - else: - hg = decoder.translate(ins) - - sys.stderr.write( "input:\n '%s'\n"%(ins) ) - sys.stderr.write( "viterbi translation:\n '%s'\n"%(hg.viterbi()) ) - num_nodes = 0 - for i in hg.nodes: num_nodes+=1 - sys.stderr.write( "# nodes = %s\n"%(num_nodes) ) - num_edges = 0 - for i in hg.edges: num_edges+=1 - sys.stderr.write( "# edges = %s\n"%(num_edges) ) - sys.stderr.write( "viterbi score = %s\n"%(round(hg.viterbi_features().dot(decoder.weights), 2)) ) - - print hg2json(hg, decoder.weights) - - -if __name__=="__main__": - main() - diff --git a/util/json-cpp b/util/json-cpp deleted file mode 160000 -Subproject 4eb4b47cf4d622bc7bf34071d6b68fc5beb3705 diff --git a/util/make_pak.cc b/util/make_pak.cc deleted file mode 100644 index e858155..0000000 --- a/util/make_pak.cc +++ /dev/null @@ -1,104 +0,0 @@ -#include <iostream> -#include <fstream> -#include <msgpack.hpp> -#include <msgpack/fbuffer.hpp> -#include <string> - -#include "json-cpp/single_include/json-cpp.hpp" -#include "../fast/hypergraph.hh" -#include "../fast/weaver.hh" - -using namespace std; - - -struct DummyNode { - size_t id; - string symbol; - vector<short> span; -}; - -struct DummyEdge { - size_t head_id; - size_t rule_id; - vector<size_t> tails_ids; - string f; - score_t score; -}; - -struct DummyHg { - vector<string> rules; - vector<DummyNode> nodes; - vector<DummyEdge> edges; -}; - -template<typename X> inline void -serialize(jsoncpp::Stream<X>& stream, DummyNode& o) -{ - fields(o, stream, "id", o.id, "symbol", o.symbol, "span", o.span); -} - -template<typename X> inline void -serialize(jsoncpp::Stream<X>& stream, DummyEdge& o) -{ - fields(o, stream, "head", o.head_id, "rule", o.rule_id, "tails", o.tails_ids, "score", o.score); -} - -template<typename X> inline void -serialize(jsoncpp::Stream<X>& stream, DummyHg& o) -{ - fields(o, stream, "rules", o.rules, "nodes", o.nodes, "edges", o.edges); -} - -int -main(int argc, char** argv) -{ - // read from json - ifstream ifs(argv[1]); - string json_str((istreambuf_iterator<char>(ifs) ), - (istreambuf_iterator<char>())); - DummyHg hg; - vector<string> rules; - hg.rules = rules; - vector<DummyNode> nodes; - hg.nodes = nodes; - vector<DummyEdge> edges; - hg.edges = edges; - jsoncpp::parse(hg, json_str); - - // convert to proper objects - vector<Hg::Node*> nodes_conv; - for (const auto it: hg.nodes) { - Hg::Node* n = new Hg::Node; - n->id = it.id; - n->symbol = it.symbol; - n->left = it.span[0]; - n->right = it.span[1]; - nodes_conv.push_back(n); - } - vector<Hg::Edge*> edges_conv; - for (const auto it: hg.edges) { - Hg::Edge* e = new Hg::Edge; - e->head_id_ = it.head_id; - e->tails_ids_ = it.tails_ids; - e->score = it.score; - e->rule_id_ = it.rule_id; - edges_conv.push_back(e); - } - - // write to msgpack - FILE* file = fopen(argv[2], "wb"); - msgpack::fbuffer fbuf(file); - msgpack::pack(fbuf, hg.rules.size()); - msgpack::pack(fbuf, hg.nodes.size()); - msgpack::pack(fbuf, hg.edges.size()); - for (const auto it: hg.rules) - msgpack::pack(fbuf, it); - for (const auto it: nodes_conv) - msgpack::pack(fbuf, *it); - for (const auto it: edges_conv) - msgpack::pack(fbuf, *it); - fclose(file); - - return 0; -} - diff --git a/util/read_pak.cc b/util/read_pak.cc deleted file mode 100644 index afd6e6a..0000000 --- a/util/read_pak.cc +++ /dev/null @@ -1,27 +0,0 @@ -#include <iostream> -#include <fstream> -#include <msgpack.hpp> - -using namespace std; - - -int -main(int argc, char** argv) -{ - ifstream ifs(argv[1]); - msgpack::unpacker pac; - while(true) { - pac.reserve_buffer(32*1024); - size_t bytes = ifs.readsome(pac.buffer(), pac.buffer_capacity()); - pac.buffer_consumed(bytes); - msgpack::unpacked result; - while(pac.next(&result)) { - msgpack::object o = result.get(); - cout << o << endl; - } - if (!bytes) break; - } - - return 0; -} - |