summaryrefslogtreecommitdiff
path: root/data
diff options
context:
space:
mode:
Diffstat (limited to 'data')
-rw-r--r--data/Makefile9
-rw-r--r--data/cdec.ini4
-rwxr-xr-xdata/make.sh8
-rw-r--r--data/make_paks.cc125
-rw-r--r--data/make_paks2.cc121
-rwxr-xr-xdata/to_ascii.rb13
-rw-r--r--data/weights.init12
7 files changed, 0 insertions, 292 deletions
diff --git a/data/Makefile b/data/Makefile
deleted file mode 100644
index 24d85a3..0000000
--- a/data/Makefile
+++ /dev/null
@@ -1,9 +0,0 @@
-make_paks: make_paks.cc
- g++ -std=c++11 make_paks.cc -I../msgpack-c/include/ ../msgpack-c/lib/libmsgpack.a -o make_paks
-
-make_paks2: make_paks2.cc
- g++ -std=c++11 make_paks2.cc -I../msgpack-c/include/ ../msgpack-c/lib/libmsgpack.a -o make_paks2
-
-clean:
- rm -f make_paks
-
diff --git a/data/cdec.ini b/data/cdec.ini
deleted file mode 100644
index ddbe54c..0000000
--- a/data/cdec.ini
+++ /dev/null
@@ -1,4 +0,0 @@
-formalism=scfg
-intersection_strategy=full
-add_pass_through_rules=true
-
diff --git a/data/make.sh b/data/make.sh
deleted file mode 100755
index 5e0c31b..0000000
--- a/data/make.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/bin/zsh
-
-
-# wmt/14/newstest2008 data
-for i in 1020 1391 1495 1570 1889 2002 429 748; do
- ~/src/weaver/util/cdec2json.py -c cdec.ini -w weights.init -g grammar.$i.gz < $i.in | ./to_ascii.rb > $i.json
-done
-
diff --git a/data/make_paks.cc b/data/make_paks.cc
deleted file mode 100644
index ca6c9b2..0000000
--- a/data/make_paks.cc
+++ /dev/null
@@ -1,125 +0,0 @@
-#include <iostream>
-#include <fstream>
-#include <string>
-#include <msgpack.hpp>
-#include <msgpack/fbuffer.h>
-#include <msgpack/fbuffer.hpp>
-
-
-/*
- * https://github.com/ascheglov/json-cpp
- *
- */
-#include "../json-cpp.hpp"
-
-using namespace std;
-
-
-struct Node {
- int id;
- string cat;
- vector<int> span;
-
- MSGPACK_DEFINE(id, cat, span);
-};
-
-struct Vector {
- double CountEF;
- double EgivenFCoherent;
- double Glue;
- double IsSingletonF;
- double IsSingletonFE;
- double LanguageModel;
- double LanguageModel_OOV;
- double MaxLexFgivenE;
- double MaxLexEgivenF;
- double PassThrough;
- double PassThrough_1;
- double PassThrough_2;
- double PassThrough_3;
- double PassThrough_4;
- double PassThrough_5;
- double PassThrough_6;
- double SampleCountF;
- double WordPenalty;
-
- MSGPACK_DEFINE(CountEF, EgivenFCoherent, Glue, IsSingletonF, IsSingletonFE, LanguageModel, LanguageModel_OOV, MaxLexEgivenF, MaxLexFgivenE, PassThrough, PassThrough_1, PassThrough_2, PassThrough_3, PassThrough_4, PassThrough_5, PassThrough_6, SampleCountF, WordPenalty);
-};
-
-struct Edge {
- int head;
- string rule;
- vector<int> tails;
- Vector f;
- double weight;
-
- MSGPACK_DEFINE(head, rule, tails, f, weight);
-};
-
-struct Hg {
- Vector weights;
- vector<Node> nodes;
- vector<Edge> edges;
-
- MSGPACK_DEFINE(weights, nodes, edges);
-};
-
-template<typename X> inline void
-serialize(jsoncpp::Stream<X>& stream, Hg& o)
-{
- fields(o, stream, "weights", o.weights, "nodes", o.nodes, "edges", o.edges);
-}
-
-template<typename X> inline void
-serialize(jsoncpp::Stream<X>& stream, Edge& o)
-{
- fields(o, stream, "head", o.head, "rule", o.rule, "tails", o.tails, "f", o.f, "weight", o.weight);
-}
-
-template<typename X> inline void
-serialize(jsoncpp::Stream<X>& stream, Vector& o)
-{
- fields(o, stream, "EgivenFCoherent", o.EgivenFCoherent, "SampleCountF", o.SampleCountF, "CountEF", o.CountEF, "MaxLexFgivenE", o.MaxLexFgivenE, "MaxLexEgivenF", o.MaxLexEgivenF, "IsSingletonF", o.IsSingletonF, "IsSingletonFE", o.IsSingletonFE, "LanguageModel", o.LanguageModel, "LanguageModel_OOV", o.LanguageModel_OOV, "PassThrough", o.PassThrough, "PassThrough_1", o.PassThrough_1, "PassThrough_2", o.PassThrough_2, "PassThrough_3", o.PassThrough_3, "PassThrough_4", o.PassThrough_4, "PassThrough_5", o.PassThrough_5, "PassThrough_6", o.PassThrough_6, "WordPenalty", o.WordPenalty, "Glue", o.Glue);
-}
-
-template<typename X> inline void
-serialize(jsoncpp::Stream<X>& stream, Node& o)
-{
- fields(o, stream, "id", o.id, "cat", o.cat, "span", o.span);
-}
-
-int
-main(int argc, char** argv)
-{
- ifstream ifs(argv[1]);
- string json_str((istreambuf_iterator<char>(ifs) ),
- (istreambuf_iterator<char>()));
-
- Hg hg;
- Vector w;
- hg.weights = w;
- vector<Node> nodes;
- hg.nodes = nodes;
- vector<Edge> edges;
- hg.edges = edges;
- jsoncpp::parse(hg, json_str);
-
- FILE* file = fopen(argv[2], "wb");
- msgpack::fbuffer fbuf(file);
- msgpack::pack(fbuf, hg);
- fclose(file);
-
- /*ifstream ifs1(argv[2]);
- string str1((istreambuf_iterator<char>(jfs1)),
- (istreambuf_iterator<char>()));
-
- msgpack::zone zone;
- msgpack::object obj;
- msgpack::unpack(str1.data(), str1.size(), NULL, &zone, &obj);
-
- Hg hg;
- obj.convert(&hg);*/
-
- return 0;
-}
-
diff --git a/data/make_paks2.cc b/data/make_paks2.cc
deleted file mode 100644
index 1b5895b..0000000
--- a/data/make_paks2.cc
+++ /dev/null
@@ -1,121 +0,0 @@
-#include <iostream>
-#include <fstream>
-#include <string>
-#include <msgpack.hpp>
-#include <msgpack/fbuffer.h>
-#include <msgpack/fbuffer.hpp>
-
-
-/*
- * https://github.com/ascheglov/json-cpp
- *
- */
-#include "../json-cpp.hpp"
-
-using namespace std;
-
-
-struct Node {
- int id;
- string cat;
- vector<int> span;
-
- MSGPACK_DEFINE(id, cat, span);
-};
-
-struct Vector {
- double CountEF;
- double EgivenFCoherent;
- double Glue;
- double IsSingletonF;
- double IsSingletonFE;
- double LanguageModel;
- double LanguageModel_OOV;
- double MaxLexFgivenE;
- double MaxLexEgivenF;
- double PassThrough;
- double PassThrough_1;
- double PassThrough_2;
- double PassThrough_3;
- double PassThrough_4;
- double PassThrough_5;
- double PassThrough_6;
- double SampleCountF;
- double WordPenalty;
-
- MSGPACK_DEFINE(CountEF, EgivenFCoherent, Glue, IsSingletonF, IsSingletonFE, LanguageModel, LanguageModel_OOV, MaxLexEgivenF, MaxLexFgivenE, PassThrough, PassThrough_1, PassThrough_2, PassThrough_3, PassThrough_4, PassThrough_5, PassThrough_6, SampleCountF, WordPenalty);
-};
-
-struct Edge {
- int head;
- string rule;
- vector<int> tails;
- Vector f;
- double weight;
-
- MSGPACK_DEFINE(head, rule, tails, f, weight);
-};
-
-struct Hg {
- Vector weights;
- vector<Node> nodes;
- vector<Edge> edges;
-
- MSGPACK_DEFINE(weights, nodes, edges);
-};
-
-template<typename X> inline void
-serialize(jsoncpp::Stream<X>& stream, Hg& o)
-{
- fields(o, stream, "weights", o.weights, "nodes", o.nodes, "edges", o.edges);
-}
-
-template<typename X> inline void
-serialize(jsoncpp::Stream<X>& stream, Edge& o)
-{
- fields(o, stream, "head", o.head, "rule", o.rule, "tails", o.tails, "f", o.f, "weight", o.weight);
-}
-
-template<typename X> inline void
-serialize(jsoncpp::Stream<X>& stream, Vector& o)
-{
- fields(o, stream, "EgivenFCoherent", o.EgivenFCoherent, "SampleCountF", o.SampleCountF, "CountEF", o.CountEF, "MaxLexFgivenE", o.MaxLexFgivenE, "MaxLexEgivenF", o.MaxLexEgivenF, "IsSingletonF", o.IsSingletonF, "IsSingletonFE", o.IsSingletonFE, "LanguageModel", o.LanguageModel, "LanguageModel_OOV", o.LanguageModel_OOV, "PassThrough", o.PassThrough, "PassThrough_1", o.PassThrough_1, "PassThrough_2", o.PassThrough_2, "PassThrough_3", o.PassThrough_3, "PassThrough_4", o.PassThrough_4, "PassThrough_5", o.PassThrough_5, "PassThrough_6", o.PassThrough_6, "WordPenalty", o.WordPenalty, "Glue", o.Glue);
-}
-
-template<typename X> inline void
-serialize(jsoncpp::Stream<X>& stream, Node& o)
-{
- fields(o, stream, "id", o.id, "cat", o.cat, "span", o.span);
-}
-
-int
-main(int argc, char** argv)
-{
- ifstream ifs(argv[1]);
- string json_str((istreambuf_iterator<char>(ifs) ),
- (istreambuf_iterator<char>()));
-
- Hg hg;
- Vector w;
- hg.weights = w;
- vector<Node> nodes;
- hg.nodes = nodes;
- vector<Edge> edges;
- hg.edges = edges;
- jsoncpp::parse(hg, json_str);
-
- FILE* file = fopen(argv[2], "wb");
- msgpack::fbuffer fbuf(file);
- msgpack::pack(fbuf, hg.nodes.size());
- msgpack::pack(fbuf, hg.edges.size());
- msgpack::pack(fbuf, hg.weights);
- for (auto it = hg.nodes.begin(); it != hg.nodes.end(); it++)
- msgpack::pack(fbuf, *it);
- for (auto it = hg.edges.begin(); it != hg.edges.end(); it++)
- msgpack::pack(fbuf, *it);
-
- fclose(file);
-
- return 0;
-}
-
diff --git a/data/to_ascii.rb b/data/to_ascii.rb
deleted file mode 100755
index 6c1d23e..0000000
--- a/data/to_ascii.rb
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env ruby
-
-
-while line = STDIN.gets
- encoding_options = {
- :invalid => :replace,
- :undef => :replace,
- :replace => '?',
- :universal_newline => true
- }
- puts line.encode 'ASCII', encoding_options
-end
-
diff --git a/data/weights.init b/data/weights.init
deleted file mode 100644
index 0d09f9f..0000000
--- a/data/weights.init
+++ /dev/null
@@ -1,12 +0,0 @@
-CountEF 0.1
-EgivenFCoherent -0.1
-Glue 0.01
-IsSingletonF -0.01
-IsSingletonFE -0.01
-LanguageModel 0.1
-LanguageModel_OOV -1
-MaxLexFgivenE -0.1
-MaxLexEgivenF -0.1
-PassThrough -0.1
-SampleCountF -0.1
-WordPenalty -0.1