diff options
Diffstat (limited to 'data')
-rw-r--r-- | data/Makefile | 9 | ||||
-rw-r--r-- | data/cdec.ini | 4 | ||||
-rwxr-xr-x | data/make.sh | 8 | ||||
-rw-r--r-- | data/make_paks.cc | 125 | ||||
-rw-r--r-- | data/make_paks2.cc | 121 | ||||
-rwxr-xr-x | data/to_ascii.rb | 13 | ||||
-rw-r--r-- | data/weights.init | 12 |
7 files changed, 0 insertions, 292 deletions
diff --git a/data/Makefile b/data/Makefile deleted file mode 100644 index 24d85a3..0000000 --- a/data/Makefile +++ /dev/null @@ -1,9 +0,0 @@ -make_paks: make_paks.cc - g++ -std=c++11 make_paks.cc -I../msgpack-c/include/ ../msgpack-c/lib/libmsgpack.a -o make_paks - -make_paks2: make_paks2.cc - g++ -std=c++11 make_paks2.cc -I../msgpack-c/include/ ../msgpack-c/lib/libmsgpack.a -o make_paks2 - -clean: - rm -f make_paks - diff --git a/data/cdec.ini b/data/cdec.ini deleted file mode 100644 index ddbe54c..0000000 --- a/data/cdec.ini +++ /dev/null @@ -1,4 +0,0 @@ -formalism=scfg -intersection_strategy=full -add_pass_through_rules=true - diff --git a/data/make.sh b/data/make.sh deleted file mode 100755 index 5e0c31b..0000000 --- a/data/make.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/zsh - - -# wmt/14/newstest2008 data -for i in 1020 1391 1495 1570 1889 2002 429 748; do - ~/src/weaver/util/cdec2json.py -c cdec.ini -w weights.init -g grammar.$i.gz < $i.in | ./to_ascii.rb > $i.json -done - diff --git a/data/make_paks.cc b/data/make_paks.cc deleted file mode 100644 index ca6c9b2..0000000 --- a/data/make_paks.cc +++ /dev/null @@ -1,125 +0,0 @@ -#include <iostream> -#include <fstream> -#include <string> -#include <msgpack.hpp> -#include <msgpack/fbuffer.h> -#include <msgpack/fbuffer.hpp> - - -/* - * https://github.com/ascheglov/json-cpp - * - */ -#include "../json-cpp.hpp" - -using namespace std; - - -struct Node { - int id; - string cat; - vector<int> span; - - MSGPACK_DEFINE(id, cat, span); -}; - -struct Vector { - double CountEF; - double EgivenFCoherent; - double Glue; - double IsSingletonF; - double IsSingletonFE; - double LanguageModel; - double LanguageModel_OOV; - double MaxLexFgivenE; - double MaxLexEgivenF; - double PassThrough; - double PassThrough_1; - double PassThrough_2; - double PassThrough_3; - double PassThrough_4; - double PassThrough_5; - double PassThrough_6; - double SampleCountF; - double WordPenalty; - - MSGPACK_DEFINE(CountEF, EgivenFCoherent, Glue, IsSingletonF, IsSingletonFE, LanguageModel, LanguageModel_OOV, MaxLexEgivenF, MaxLexFgivenE, PassThrough, PassThrough_1, PassThrough_2, PassThrough_3, PassThrough_4, PassThrough_5, PassThrough_6, SampleCountF, WordPenalty); -}; - -struct Edge { - int head; - string rule; - vector<int> tails; - Vector f; - double weight; - - MSGPACK_DEFINE(head, rule, tails, f, weight); -}; - -struct Hg { - Vector weights; - vector<Node> nodes; - vector<Edge> edges; - - MSGPACK_DEFINE(weights, nodes, edges); -}; - -template<typename X> inline void -serialize(jsoncpp::Stream<X>& stream, Hg& o) -{ - fields(o, stream, "weights", o.weights, "nodes", o.nodes, "edges", o.edges); -} - -template<typename X> inline void -serialize(jsoncpp::Stream<X>& stream, Edge& o) -{ - fields(o, stream, "head", o.head, "rule", o.rule, "tails", o.tails, "f", o.f, "weight", o.weight); -} - -template<typename X> inline void -serialize(jsoncpp::Stream<X>& stream, Vector& o) -{ - fields(o, stream, "EgivenFCoherent", o.EgivenFCoherent, "SampleCountF", o.SampleCountF, "CountEF", o.CountEF, "MaxLexFgivenE", o.MaxLexFgivenE, "MaxLexEgivenF", o.MaxLexEgivenF, "IsSingletonF", o.IsSingletonF, "IsSingletonFE", o.IsSingletonFE, "LanguageModel", o.LanguageModel, "LanguageModel_OOV", o.LanguageModel_OOV, "PassThrough", o.PassThrough, "PassThrough_1", o.PassThrough_1, "PassThrough_2", o.PassThrough_2, "PassThrough_3", o.PassThrough_3, "PassThrough_4", o.PassThrough_4, "PassThrough_5", o.PassThrough_5, "PassThrough_6", o.PassThrough_6, "WordPenalty", o.WordPenalty, "Glue", o.Glue); -} - -template<typename X> inline void -serialize(jsoncpp::Stream<X>& stream, Node& o) -{ - fields(o, stream, "id", o.id, "cat", o.cat, "span", o.span); -} - -int -main(int argc, char** argv) -{ - ifstream ifs(argv[1]); - string json_str((istreambuf_iterator<char>(ifs) ), - (istreambuf_iterator<char>())); - - Hg hg; - Vector w; - hg.weights = w; - vector<Node> nodes; - hg.nodes = nodes; - vector<Edge> edges; - hg.edges = edges; - jsoncpp::parse(hg, json_str); - - FILE* file = fopen(argv[2], "wb"); - msgpack::fbuffer fbuf(file); - msgpack::pack(fbuf, hg); - fclose(file); - - /*ifstream ifs1(argv[2]); - string str1((istreambuf_iterator<char>(jfs1)), - (istreambuf_iterator<char>())); - - msgpack::zone zone; - msgpack::object obj; - msgpack::unpack(str1.data(), str1.size(), NULL, &zone, &obj); - - Hg hg; - obj.convert(&hg);*/ - - return 0; -} - diff --git a/data/make_paks2.cc b/data/make_paks2.cc deleted file mode 100644 index 1b5895b..0000000 --- a/data/make_paks2.cc +++ /dev/null @@ -1,121 +0,0 @@ -#include <iostream> -#include <fstream> -#include <string> -#include <msgpack.hpp> -#include <msgpack/fbuffer.h> -#include <msgpack/fbuffer.hpp> - - -/* - * https://github.com/ascheglov/json-cpp - * - */ -#include "../json-cpp.hpp" - -using namespace std; - - -struct Node { - int id; - string cat; - vector<int> span; - - MSGPACK_DEFINE(id, cat, span); -}; - -struct Vector { - double CountEF; - double EgivenFCoherent; - double Glue; - double IsSingletonF; - double IsSingletonFE; - double LanguageModel; - double LanguageModel_OOV; - double MaxLexFgivenE; - double MaxLexEgivenF; - double PassThrough; - double PassThrough_1; - double PassThrough_2; - double PassThrough_3; - double PassThrough_4; - double PassThrough_5; - double PassThrough_6; - double SampleCountF; - double WordPenalty; - - MSGPACK_DEFINE(CountEF, EgivenFCoherent, Glue, IsSingletonF, IsSingletonFE, LanguageModel, LanguageModel_OOV, MaxLexEgivenF, MaxLexFgivenE, PassThrough, PassThrough_1, PassThrough_2, PassThrough_3, PassThrough_4, PassThrough_5, PassThrough_6, SampleCountF, WordPenalty); -}; - -struct Edge { - int head; - string rule; - vector<int> tails; - Vector f; - double weight; - - MSGPACK_DEFINE(head, rule, tails, f, weight); -}; - -struct Hg { - Vector weights; - vector<Node> nodes; - vector<Edge> edges; - - MSGPACK_DEFINE(weights, nodes, edges); -}; - -template<typename X> inline void -serialize(jsoncpp::Stream<X>& stream, Hg& o) -{ - fields(o, stream, "weights", o.weights, "nodes", o.nodes, "edges", o.edges); -} - -template<typename X> inline void -serialize(jsoncpp::Stream<X>& stream, Edge& o) -{ - fields(o, stream, "head", o.head, "rule", o.rule, "tails", o.tails, "f", o.f, "weight", o.weight); -} - -template<typename X> inline void -serialize(jsoncpp::Stream<X>& stream, Vector& o) -{ - fields(o, stream, "EgivenFCoherent", o.EgivenFCoherent, "SampleCountF", o.SampleCountF, "CountEF", o.CountEF, "MaxLexFgivenE", o.MaxLexFgivenE, "MaxLexEgivenF", o.MaxLexEgivenF, "IsSingletonF", o.IsSingletonF, "IsSingletonFE", o.IsSingletonFE, "LanguageModel", o.LanguageModel, "LanguageModel_OOV", o.LanguageModel_OOV, "PassThrough", o.PassThrough, "PassThrough_1", o.PassThrough_1, "PassThrough_2", o.PassThrough_2, "PassThrough_3", o.PassThrough_3, "PassThrough_4", o.PassThrough_4, "PassThrough_5", o.PassThrough_5, "PassThrough_6", o.PassThrough_6, "WordPenalty", o.WordPenalty, "Glue", o.Glue); -} - -template<typename X> inline void -serialize(jsoncpp::Stream<X>& stream, Node& o) -{ - fields(o, stream, "id", o.id, "cat", o.cat, "span", o.span); -} - -int -main(int argc, char** argv) -{ - ifstream ifs(argv[1]); - string json_str((istreambuf_iterator<char>(ifs) ), - (istreambuf_iterator<char>())); - - Hg hg; - Vector w; - hg.weights = w; - vector<Node> nodes; - hg.nodes = nodes; - vector<Edge> edges; - hg.edges = edges; - jsoncpp::parse(hg, json_str); - - FILE* file = fopen(argv[2], "wb"); - msgpack::fbuffer fbuf(file); - msgpack::pack(fbuf, hg.nodes.size()); - msgpack::pack(fbuf, hg.edges.size()); - msgpack::pack(fbuf, hg.weights); - for (auto it = hg.nodes.begin(); it != hg.nodes.end(); it++) - msgpack::pack(fbuf, *it); - for (auto it = hg.edges.begin(); it != hg.edges.end(); it++) - msgpack::pack(fbuf, *it); - - fclose(file); - - return 0; -} - diff --git a/data/to_ascii.rb b/data/to_ascii.rb deleted file mode 100755 index 6c1d23e..0000000 --- a/data/to_ascii.rb +++ /dev/null @@ -1,13 +0,0 @@ -#!/usr/bin/env ruby - - -while line = STDIN.gets - encoding_options = { - :invalid => :replace, - :undef => :replace, - :replace => '?', - :universal_newline => true - } - puts line.encode 'ASCII', encoding_options -end - diff --git a/data/weights.init b/data/weights.init deleted file mode 100644 index 0d09f9f..0000000 --- a/data/weights.init +++ /dev/null @@ -1,12 +0,0 @@ -CountEF 0.1 -EgivenFCoherent -0.1 -Glue 0.01 -IsSingletonF -0.01 -IsSingletonFE -0.01 -LanguageModel 0.1 -LanguageModel_OOV -1 -MaxLexFgivenE -0.1 -MaxLexEgivenF -0.1 -PassThrough -0.1 -SampleCountF -0.1 -WordPenalty -0.1 |