summaryrefslogtreecommitdiff
path: root/util
diff options
context:
space:
mode:
Diffstat (limited to 'util')
-rw-r--r--util/Makefile14
-rwxr-xr-xutil/cdec2json.py96
m---------util/json-cpp0
-rw-r--r--util/make_pak.cc104
-rw-r--r--util/read_pak.cc27
5 files changed, 0 insertions, 241 deletions
diff --git a/util/Makefile b/util/Makefile
deleted file mode 100644
index 30564fe..0000000
--- a/util/Makefile
+++ /dev/null
@@ -1,14 +0,0 @@
-COMPILER=clang
-
-
-all: make_pak read_pak
-
-make_pak: make_pak.cc json-cpp/single_include/json-cpp.hpp ../fast/hypergraph.hh ../fast/weaver.hh
- $(COMPILER) -std=c++11 -lstdc++ -lm -lmsgpack make_pak.cc -o make_pak
-
-read_pak: read_pak.cc
- $(COMPILER) -std=c++11 -lstdc++ -lmsgpack read_pak.cc -o read_pak
-
-clean:
- rm -f make_pak read_pak
-
diff --git a/util/cdec2json.py b/util/cdec2json.py
deleted file mode 100755
index e7c8e93..0000000
--- a/util/cdec2json.py
+++ /dev/null
@@ -1,96 +0,0 @@
-#!/usr/bin/env python2
-
-import cdec
-import sys, argparse
-import json
-import gzip
-
-
-#FIXME new format
-# strings?
-# map?
-def hg2json(hg, weights):
- """
- output a JSON representation of a cdec hypegraph
- """
- res = ''
- res += "{\n"
- res += '"rules":[\n'
- rules = []
- for i in hg.edges:
- s = json.dumps(str(i.trule))
- try:
- rules.index(s)
- except:
- rules.append(s)
- res += ",\n".join(rules)
- res += "\n],\n"
- res += '"nodes":'+"\n"
- res += "[\n"
- a = []
- a.append( '{ "id":0, "symbol":"root", "span":[-1,-1] }' )
- for i in hg.nodes:
- a.append('{ "id":%d, "symbol":"%s", "span":[%d,%d] }'%(i.id+1, i.cat, i.span[0], i.span[1]))
- res += ",\n".join(a)+"\n"
- res += "],\n"
- res += '"edges":'+"\n"
- res += "[\n"
- a = []
- for i in hg.edges:
- s = "{"
- s += '"head":%d'%(i.head_node.id+1)
- s += ', "rule":%s'%(rules.index(json.dumps(str(i.trule))))
- # f
- #xs = ' "f":{'
- #b = []
- #for j in i.feature_values:
- # b.append( '"%s":%s'%(j[0], j[1]) )
- #xs += ", ".join(b)
- #xs += "},"
- # tails
- if len(list(i.tail_nodes)) > 0:
- s += ', "tails":[ %s ],'%(",".join([str(n.id+1) for n in i.tail_nodes]))
- else:
- s += ', "tails":[ 0 ],'
- #s += xs
- s += ' "score":%s }'%(i.prob)
- a.append(s)
- res += ",\n".join(a)+"\n"
- res += "]\n"
- res += "}\n"
- return res
-
-def main():
- parser = argparse.ArgumentParser(description='get a proper json representation of cdec hypergraphs')
- parser.add_argument('-c', '--config', required=True, help='decoder configuration')
- parser.add_argument('-w', '--weights', required=True, help='feature weights')
- parser.add_argument('-g', '--grammar', required=False, help='grammar')
- args = parser.parse_args()
- with open(args.config) as config:
- config = config.read()
- decoder = cdec.Decoder(config)
- decoder.read_weights(args.weights)
- ins = sys.stdin.readline().strip()
- if args.grammar:
- with gzip.open(args.grammar) as grammar:
- grammar = grammar.read()
- hg = decoder.translate(ins, grammar=grammar)
- else:
- hg = decoder.translate(ins)
-
- sys.stderr.write( "input:\n '%s'\n"%(ins) )
- sys.stderr.write( "viterbi translation:\n '%s'\n"%(hg.viterbi()) )
- num_nodes = 0
- for i in hg.nodes: num_nodes+=1
- sys.stderr.write( "# nodes = %s\n"%(num_nodes) )
- num_edges = 0
- for i in hg.edges: num_edges+=1
- sys.stderr.write( "# edges = %s\n"%(num_edges) )
- sys.stderr.write( "viterbi score = %s\n"%(round(hg.viterbi_features().dot(decoder.weights), 2)) )
-
- print hg2json(hg, decoder.weights)
-
-
-if __name__=="__main__":
- main()
-
diff --git a/util/json-cpp b/util/json-cpp
deleted file mode 160000
-Subproject 4eb4b47cf4d622bc7bf34071d6b68fc5beb3705
diff --git a/util/make_pak.cc b/util/make_pak.cc
deleted file mode 100644
index e858155..0000000
--- a/util/make_pak.cc
+++ /dev/null
@@ -1,104 +0,0 @@
-#include <iostream>
-#include <fstream>
-#include <msgpack.hpp>
-#include <msgpack/fbuffer.hpp>
-#include <string>
-
-#include "json-cpp/single_include/json-cpp.hpp"
-#include "../fast/hypergraph.hh"
-#include "../fast/weaver.hh"
-
-using namespace std;
-
-
-struct DummyNode {
- size_t id;
- string symbol;
- vector<short> span;
-};
-
-struct DummyEdge {
- size_t head_id;
- size_t rule_id;
- vector<size_t> tails_ids;
- string f;
- score_t score;
-};
-
-struct DummyHg {
- vector<string> rules;
- vector<DummyNode> nodes;
- vector<DummyEdge> edges;
-};
-
-template<typename X> inline void
-serialize(jsoncpp::Stream<X>& stream, DummyNode& o)
-{
- fields(o, stream, "id", o.id, "symbol", o.symbol, "span", o.span);
-}
-
-template<typename X> inline void
-serialize(jsoncpp::Stream<X>& stream, DummyEdge& o)
-{
- fields(o, stream, "head", o.head_id, "rule", o.rule_id, "tails", o.tails_ids, "score", o.score);
-}
-
-template<typename X> inline void
-serialize(jsoncpp::Stream<X>& stream, DummyHg& o)
-{
- fields(o, stream, "rules", o.rules, "nodes", o.nodes, "edges", o.edges);
-}
-
-int
-main(int argc, char** argv)
-{
- // read from json
- ifstream ifs(argv[1]);
- string json_str((istreambuf_iterator<char>(ifs) ),
- (istreambuf_iterator<char>()));
- DummyHg hg;
- vector<string> rules;
- hg.rules = rules;
- vector<DummyNode> nodes;
- hg.nodes = nodes;
- vector<DummyEdge> edges;
- hg.edges = edges;
- jsoncpp::parse(hg, json_str);
-
- // convert to proper objects
- vector<Hg::Node*> nodes_conv;
- for (const auto it: hg.nodes) {
- Hg::Node* n = new Hg::Node;
- n->id = it.id;
- n->symbol = it.symbol;
- n->left = it.span[0];
- n->right = it.span[1];
- nodes_conv.push_back(n);
- }
- vector<Hg::Edge*> edges_conv;
- for (const auto it: hg.edges) {
- Hg::Edge* e = new Hg::Edge;
- e->head_id_ = it.head_id;
- e->tails_ids_ = it.tails_ids;
- e->score = it.score;
- e->rule_id_ = it.rule_id;
- edges_conv.push_back(e);
- }
-
- // write to msgpack
- FILE* file = fopen(argv[2], "wb");
- msgpack::fbuffer fbuf(file);
- msgpack::pack(fbuf, hg.rules.size());
- msgpack::pack(fbuf, hg.nodes.size());
- msgpack::pack(fbuf, hg.edges.size());
- for (const auto it: hg.rules)
- msgpack::pack(fbuf, it);
- for (const auto it: nodes_conv)
- msgpack::pack(fbuf, *it);
- for (const auto it: edges_conv)
- msgpack::pack(fbuf, *it);
- fclose(file);
-
- return 0;
-}
-
diff --git a/util/read_pak.cc b/util/read_pak.cc
deleted file mode 100644
index afd6e6a..0000000
--- a/util/read_pak.cc
+++ /dev/null
@@ -1,27 +0,0 @@
-#include <iostream>
-#include <fstream>
-#include <msgpack.hpp>
-
-using namespace std;
-
-
-int
-main(int argc, char** argv)
-{
- ifstream ifs(argv[1]);
- msgpack::unpacker pac;
- while(true) {
- pac.reserve_buffer(32*1024);
- size_t bytes = ifs.readsome(pac.buffer(), pac.buffer_capacity());
- pac.buffer_consumed(bytes);
- msgpack::unpacked result;
- while(pac.next(&result)) {
- msgpack::object o = result.get();
- cout << o << endl;
- }
- if (!bytes) break;
- }
-
- return 0;
-}
-