summaryrefslogtreecommitdiff
path: root/training
diff options
context:
space:
mode:
authorWu, Ke <wuke@cs.umd.edu>2014-12-17 16:11:38 -0500
committerWu, Ke <wuke@cs.umd.edu>2014-12-17 16:11:38 -0500
commit7468e8d85e99b4619442c7afaf4a0d92870111bb (patch)
treea6f17da7c69048c8900260b5490bb9d8611be3bb /training
parentb6dd5a683db9dda2d634dd2fdb76606819594901 (diff)
parent1a79175f9a101d46cf27ca921213d5dd9300518f (diff)
Merge with upstream
Diffstat (limited to 'training')
-rw-r--r--training/dpmert/lo_test.cc22
-rw-r--r--training/dpmert/mr_dpmert_generate_mapper_input.cc2
-rw-r--r--training/dpmert/mr_dpmert_map.cc4
-rw-r--r--training/dpmert/test_data/0.bin.gzbin0 -> 24904 bytes
-rw-r--r--training/dpmert/test_data/0.json.gzbin13709 -> 0 bytes
-rw-r--r--training/dpmert/test_data/1.bin.gzbin0 -> 339220 bytes
-rw-r--r--training/dpmert/test_data/1.json.gzbin204803 -> 0 bytes
-rw-r--r--training/dpmert/test_data/test-ch-inside.bin.gzbin0 -> 340 bytes
-rw-r--r--training/dpmert/test_data/test-zero-origin.bin.gzbin0 -> 923 bytes
-rw-r--r--training/minrisk/minrisk_optimize.cc2
-rw-r--r--training/pro/mr_pro_map.cc2
-rw-r--r--training/rampion/rampion_cccp.cc2
-rw-r--r--training/utils/Makefile.am6
-rw-r--r--training/utils/grammar_convert.cc9
14 files changed, 27 insertions, 22 deletions
diff --git a/training/dpmert/lo_test.cc b/training/dpmert/lo_test.cc
index b8776169..69e5aa3f 100644
--- a/training/dpmert/lo_test.cc
+++ b/training/dpmert/lo_test.cc
@@ -56,10 +56,11 @@ BOOST_AUTO_TEST_CASE(TestConvexHull) {
}
BOOST_AUTO_TEST_CASE(TestConvexHullInside) {
- const string json = "{\"rules\":[1,\"[X] ||| a ||| a\",2,\"[X] ||| A [X] ||| A [1]\",3,\"[X] ||| c ||| c\",4,\"[X] ||| C [X] ||| C [1]\",5,\"[X] ||| [X] B [X] ||| [1] B [2]\",6,\"[X] ||| [X] b [X] ||| [1] b [2]\",7,\"[X] ||| X [X] ||| X [1]\",8,\"[X] ||| Z [X] ||| Z [1]\"],\"features\":[\"f1\",\"f2\",\"Feature_1\",\"Feature_0\",\"Model_0\",\"Model_1\",\"Model_2\",\"Model_3\",\"Model_4\",\"Model_5\",\"Model_6\",\"Model_7\"],\"edges\":[{\"tail\":[],\"feats\":[],\"rule\":1}],\"node\":{\"in_edges\":[0]},\"edges\":[{\"tail\":[0],\"feats\":[0,-0.8,1,-0.1],\"rule\":2}],\"node\":{\"in_edges\":[1]},\"edges\":[{\"tail\":[],\"feats\":[1,-1],\"rule\":3}],\"node\":{\"in_edges\":[2]},\"edges\":[{\"tail\":[2],\"feats\":[0,-0.2,1,-0.1],\"rule\":4}],\"node\":{\"in_edges\":[3]},\"edges\":[{\"tail\":[1,3],\"feats\":[0,-1.2,1,-0.2],\"rule\":5},{\"tail\":[1,3],\"feats\":[0,-0.5,1,-1.3],\"rule\":6}],\"node\":{\"in_edges\":[4,5]},\"edges\":[{\"tail\":[4],\"feats\":[0,-0.5,1,-0.8],\"rule\":7},{\"tail\":[4],\"feats\":[0,-0.7,1,-0.9],\"rule\":8}],\"node\":{\"in_edges\":[6,7]}}";
+ std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA);
Hypergraph hg;
- istringstream instr(json);
- HypergraphIO::ReadFromJSON(&instr, &hg);
+ ReadFile rf(path + "/test-ch-inside.bin.gz");
+ assert(rf);
+ HypergraphIO::ReadFromBinary(rf.stream(), &hg);
SparseVector<double> wts;
wts.set_value(FD::Convert("f1"), 0.4);
wts.set_value(FD::Convert("f2"), 1.0);
@@ -121,13 +122,13 @@ BOOST_AUTO_TEST_CASE( TestS1) {
std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA);
Hypergraph hg;
- ReadFile rf(path + "/0.json.gz");
- HypergraphIO::ReadFromJSON(rf.stream(), &hg);
+ ReadFile rf(path + "/0.bin.gz");
+ HypergraphIO::ReadFromBinary(rf.stream(), &hg);
hg.Reweight(wts);
Hypergraph hg2;
- ReadFile rf2(path + "/1.json.gz");
- HypergraphIO::ReadFromJSON(rf2.stream(), &hg2);
+ ReadFile rf2(path + "/1.bin.gz");
+ HypergraphIO::ReadFromBinary(rf2.stream(), &hg2);
hg2.Reweight(wts);
vector<vector<WordID> > refs1(4);
@@ -193,10 +194,11 @@ BOOST_AUTO_TEST_CASE( TestS1) {
}
BOOST_AUTO_TEST_CASE(TestZeroOrigin) {
- const string json = "{\"rules\":[1,\"[X7] ||| blA ||| without ||| LHSProb=3.92173 LexE2F=2.90799 LexF2E=1.85003 GenerativeProb=10.5381 RulePenalty=1 XFE=2.77259 XEF=0.441833 LabelledEF=2.63906 LabelledFE=4.96981 LogRuleCount=0.693147\",2,\"[X7] ||| blA ||| except ||| LHSProb=4.92173 LexE2F=3.90799 LexF2E=1.85003 GenerativeProb=11.5381 RulePenalty=1 XFE=2.77259 XEF=1.44183 LabelledEF=2.63906 LabelledFE=4.96981 LogRuleCount=1.69315\",3,\"[S] ||| [X7,1] ||| [1] ||| GlueTop=1\",4,\"[X28] ||| EnwAn ||| title ||| LHSProb=3.96802 LexE2F=2.22462 LexF2E=1.83258 GenerativeProb=10.0863 RulePenalty=1 XFE=0 XEF=1.20397 LabelledEF=1.20397 LabelledFE=-1.98341e-08 LogRuleCount=1.09861\",5,\"[X0] ||| EnwAn ||| funny ||| LHSProb=3.98479 LexE2F=1.79176 LexF2E=3.21888 GenerativeProb=11.1681 RulePenalty=1 XFE=0 XEF=2.30259 LabelledEF=2.30259 LabelledFE=0 LogRuleCount=0 SingletonRule=1\",6,\"[X8] ||| [X7,1] EnwAn ||| entitled [1] ||| LHSProb=3.82533 LexE2F=3.21888 LexF2E=2.52573 GenerativeProb=11.3276 RulePenalty=1 XFE=1.20397 XEF=1.20397 LabelledEF=2.30259 LabelledFE=2.30259 LogRuleCount=0 SingletonRule=1\",7,\"[S] ||| [S,1] [X28,2] ||| [1] [2] ||| Glue=1\",8,\"[S] ||| [S,1] [X0,2] ||| [1] [2] ||| Glue=1\",9,\"[S] ||| [X8,1] ||| [1] ||| GlueTop=1\",10,\"[Goal] ||| [S,1] ||| [1]\"],\"features\":[\"PassThrough\",\"Glue\",\"GlueTop\",\"LanguageModel\",\"WordPenalty\",\"LHSProb\",\"LexE2F\",\"LexF2E\",\"GenerativeProb\",\"RulePenalty\",\"XFE\",\"XEF\",\"LabelledEF\",\"LabelledFE\",\"LogRuleCount\",\"SingletonRule\"],\"edges\":[{\"tail\":[],\"spans\":[0,1,-1,-1],\"feats\":[5,3.92173,6,2.90799,7,1.85003,8,10.5381,9,1,10,2.77259,11,0.441833,12,2.63906,13,4.96981,14,0.693147],\"rule\":1},{\"tail\":[],\"spans\":[0,1,-1,-1],\"feats\":[5,4.92173,6,3.90799,7,1.85003,8,11.5381,9,1,10,2.77259,11,1.44183,12,2.63906,13,4.96981,14,1.69315],\"rule\":2}],\"node\":{\"in_edges\":[0,1],\"cat\":\"X7\"},\"edges\":[{\"tail\":[0],\"spans\":[0,1,-1,-1],\"feats\":[2,1],\"rule\":3}],\"node\":{\"in_edges\":[2],\"cat\":\"S\"},\"edges\":[{\"tail\":[],\"spans\":[1,2,-1,-1],\"feats\":[5,3.96802,6,2.22462,7,1.83258,8,10.0863,9,1,11,1.20397,12,1.20397,13,-1.98341e-08,14,1.09861],\"rule\":4}],\"node\":{\"in_edges\":[3],\"cat\":\"X28\"},\"edges\":[{\"tail\":[],\"spans\":[1,2,-1,-1],\"feats\":[5,3.98479,6,1.79176,7,3.21888,8,11.1681,9,1,11,2.30259,12,2.30259,15,1],\"rule\":5}],\"node\":{\"in_edges\":[4],\"cat\":\"X0\"},\"edges\":[{\"tail\":[0],\"spans\":[0,2,-1,-1],\"feats\":[5,3.82533,6,3.21888,7,2.52573,8,11.3276,9,1,10,1.20397,11,1.20397,12,2.30259,13,2.30259,15,1],\"rule\":6}],\"node\":{\"in_edges\":[5],\"cat\":\"X8\"},\"edges\":[{\"tail\":[1,2],\"spans\":[0,2,-1,-1],\"feats\":[1,1],\"rule\":7},{\"tail\":[1,3],\"spans\":[0,2,-1,-1],\"feats\":[1,1],\"rule\":8},{\"tail\":[4],\"spans\":[0,2,-1,-1],\"feats\":[2,1],\"rule\":9}],\"node\":{\"in_edges\":[6,7,8],\"cat\":\"S\"},\"edges\":[{\"tail\":[5],\"spans\":[0,2,-1,-1],\"feats\":[],\"rule\":10}],\"node\":{\"in_edges\":[9],\"cat\":\"Goal\"}}";
+ std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA);
+ ReadFile rf(path + "/test-zero-origin.bin.gz");
+ assert(rf);
Hypergraph hg;
- istringstream instr(json);
- HypergraphIO::ReadFromJSON(&instr, &hg);
+ HypergraphIO::ReadFromBinary(rf.stream(), &hg);
SparseVector<double> wts;
wts.set_value(FD::Convert("PassThrough"), -0.929201533002898);
hg.Reweight(wts);
diff --git a/training/dpmert/mr_dpmert_generate_mapper_input.cc b/training/dpmert/mr_dpmert_generate_mapper_input.cc
index 199cd23a..3fa2f476 100644
--- a/training/dpmert/mr_dpmert_generate_mapper_input.cc
+++ b/training/dpmert/mr_dpmert_generate_mapper_input.cc
@@ -70,7 +70,7 @@ int main(int argc, char** argv) {
unsigned dev_set_size = conf["dev_set_size"].as<unsigned>();
for (unsigned i = 0; i < dev_set_size; ++i) {
for (unsigned j = 0; j < directions.size(); ++j) {
- cout << forest_repository << '/' << i << ".json.gz " << i << ' ';
+ cout << forest_repository << '/' << i << ".bin.gz " << i << ' ';
print(cout, origin, "=", ";");
cout << ' ';
print(cout, directions[j], "=", ";");
diff --git a/training/dpmert/mr_dpmert_map.cc b/training/dpmert/mr_dpmert_map.cc
index d1efcf96..2bf3f8fc 100644
--- a/training/dpmert/mr_dpmert_map.cc
+++ b/training/dpmert/mr_dpmert_map.cc
@@ -83,7 +83,7 @@ int main(int argc, char** argv) {
istringstream is(line);
int sent_id;
string file, s_origin, s_direction;
- // path-to-file (JSON) sent_ed starting-point search-direction
+ // path-to-file sent_ed starting-point search-direction
is >> file >> sent_id >> s_origin >> s_direction;
SparseVector<double> origin;
ReadSparseVectorString(s_origin, &origin);
@@ -93,7 +93,7 @@ int main(int argc, char** argv) {
if (last_file != file) {
last_file = file;
ReadFile rf(file);
- HypergraphIO::ReadFromJSON(rf.stream(), &hg);
+ HypergraphIO::ReadFromBinary(rf.stream(), &hg);
}
const ConvexHullWeightFunction wf(origin, direction);
const ConvexHull hull = Inside<ConvexHull, ConvexHullWeightFunction>(hg, NULL, wf);
diff --git a/training/dpmert/test_data/0.bin.gz b/training/dpmert/test_data/0.bin.gz
new file mode 100644
index 00000000..388298e9
--- /dev/null
+++ b/training/dpmert/test_data/0.bin.gz
Binary files differ
diff --git a/training/dpmert/test_data/0.json.gz b/training/dpmert/test_data/0.json.gz
deleted file mode 100644
index 30f8dd77..00000000
--- a/training/dpmert/test_data/0.json.gz
+++ /dev/null
Binary files differ
diff --git a/training/dpmert/test_data/1.bin.gz b/training/dpmert/test_data/1.bin.gz
new file mode 100644
index 00000000..44f9e0ff
--- /dev/null
+++ b/training/dpmert/test_data/1.bin.gz
Binary files differ
diff --git a/training/dpmert/test_data/1.json.gz b/training/dpmert/test_data/1.json.gz
deleted file mode 100644
index c82cc179..00000000
--- a/training/dpmert/test_data/1.json.gz
+++ /dev/null
Binary files differ
diff --git a/training/dpmert/test_data/test-ch-inside.bin.gz b/training/dpmert/test_data/test-ch-inside.bin.gz
new file mode 100644
index 00000000..392f08c6
--- /dev/null
+++ b/training/dpmert/test_data/test-ch-inside.bin.gz
Binary files differ
diff --git a/training/dpmert/test_data/test-zero-origin.bin.gz b/training/dpmert/test_data/test-zero-origin.bin.gz
new file mode 100644
index 00000000..c641faaf
--- /dev/null
+++ b/training/dpmert/test_data/test-zero-origin.bin.gz
Binary files differ
diff --git a/training/minrisk/minrisk_optimize.cc b/training/minrisk/minrisk_optimize.cc
index da8b5260..a2938fb0 100644
--- a/training/minrisk/minrisk_optimize.cc
+++ b/training/minrisk/minrisk_optimize.cc
@@ -178,7 +178,7 @@ int main(int argc, char** argv) {
ReadFile rf(file);
if (kis.size() % 5 == 0) { cerr << '.'; }
if (kis.size() % 200 == 0) { cerr << " [" << kis.size() << "]\n"; }
- HypergraphIO::ReadFromJSON(rf.stream(), &hg);
+ HypergraphIO::ReadFromBinary(rf.stream(), &hg);
hg.Reweight(weights);
curkbest.AddKBestCandidates(hg, kbest_size, ds[sent_id]);
if (kbest_file.size())
diff --git a/training/pro/mr_pro_map.cc b/training/pro/mr_pro_map.cc
index da58cd24..b142fd05 100644
--- a/training/pro/mr_pro_map.cc
+++ b/training/pro/mr_pro_map.cc
@@ -203,7 +203,7 @@ int main(int argc, char** argv) {
const string kbest_file = os.str();
if (FileExists(kbest_file))
J_i.ReadFromFile(kbest_file);
- HypergraphIO::ReadFromJSON(rf.stream(), &hg);
+ HypergraphIO::ReadFromBinary(rf.stream(), &hg);
hg.Reweight(weights);
J_i.AddKBestCandidates(hg, kbest_size, ds[sent_id]);
J_i.WriteToFile(kbest_file);
diff --git a/training/rampion/rampion_cccp.cc b/training/rampion/rampion_cccp.cc
index 1e36dc51..1c45bac5 100644
--- a/training/rampion/rampion_cccp.cc
+++ b/training/rampion/rampion_cccp.cc
@@ -136,7 +136,7 @@ int main(int argc, char** argv) {
ReadFile rf(file);
if (kis.size() % 5 == 0) { cerr << '.'; }
if (kis.size() % 200 == 0) { cerr << " [" << kis.size() << "]\n"; }
- HypergraphIO::ReadFromJSON(rf.stream(), &hg);
+ HypergraphIO::ReadFromBinary(rf.stream(), &hg);
hg.Reweight(weights);
curkbest.AddKBestCandidates(hg, kbest_size, ds[sent_id]);
if (kbest_file.size())
diff --git a/training/utils/Makefile.am b/training/utils/Makefile.am
index 27c6e344..edaaf3d4 100644
--- a/training/utils/Makefile.am
+++ b/training/utils/Makefile.am
@@ -12,10 +12,12 @@ noinst_PROGRAMS = \
EXTRA_DIST = decode-and-evaluate.pl libcall.pl parallelize.pl
sentserver_SOURCES = sentserver.cc
-sentserver_LDFLAGS = -pthread
+sentserver_LDFLAGS = $(PTHREAD_LIBS)
+sentserver_CXXFLAGS = $(PTHREAD_CFLAGS)
sentclient_SOURCES = sentclient.cc
-sentclient_LDFLAGS = -pthread
+sentclient_LDFLAGS = $(PTHREAD_LIBS)
+sentclient_CXXFLAGS = $(PTHREAD_CFLAGS)
TESTS = lbfgs_test optimize_test
diff --git a/training/utils/grammar_convert.cc b/training/utils/grammar_convert.cc
index 5c1b4d4a..04f1eb77 100644
--- a/training/utils/grammar_convert.cc
+++ b/training/utils/grammar_convert.cc
@@ -43,7 +43,7 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
po::notify(*conf);
if (conf->count("help") || conf->count("input") == 0) {
- cerr << "\nUsage: grammar_convert [-options]\n\nConverts a grammar file (in Hiero format) into JSON hypergraph.\n";
+ cerr << "\nUsage: grammar_convert [-options]\n\nConverts a grammar file (in Hiero format) into serialized hypergraph.\n";
cerr << dcmdline_options << endl;
exit(1);
}
@@ -254,7 +254,8 @@ void ProcessHypergraph(const vector<double>& w, const po::variables_map& conf, c
if (w.size() > 0) { hg->Reweight(w); }
if (conf.count("collapse_weights")) CollapseWeights(hg);
if (conf["output"].as<string>() == "json") {
- HypergraphIO::WriteToJSON(*hg, false, &cout);
+ cerr << "NOT IMPLEMENTED ... talk to cdyer if you need this functionality\n";
+ // HypergraphIO::WriteToBinary(*hg, &cout);
if (!ref.empty()) { cerr << "REF: " << ref << endl; }
} else {
vector<WordID> onebest;
@@ -315,11 +316,11 @@ int main(int argc, char **argv) {
line = line.substr(0, pos + 2);
}
istringstream is(line);
- if (HypergraphIO::ReadFromJSON(&is, &hg)) {
+ if (HypergraphIO::ReadFromBinary(&is, &hg)) {
ProcessHypergraph(w, conf, ref, &hg);
hg.clear();
} else {
- cerr << "Error reading grammar from JSON: line " << lc << endl;
+ cerr << "Error reading grammar line " << lc << endl;
exit(1);
}
} else {