summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2017-08-04 21:17:06 +0200
committerPatrick Simianer <p@simianer.de>2017-08-04 21:17:06 +0200
commitc3f3891a9c146483b63f3d903f9971438850bbad (patch)
treeb1188d32e2628fd84b923e47373363174bf07086
parentb88176dc4fd53480e77d601ff63bf5300cf8fc7f (diff)
json serialization, minimal decoderjson_serial
-rw-r--r--.gitignore1
-rw-r--r--0001-minimal-decoder.patch132
-rw-r--r--b9be57140b1e9cab50ff8eb0c6cd90815ab6da0e.patch95
-rw-r--r--decoder/Makefile.am5
-rw-r--r--decoder/minimal_decoder.cc79
5 files changed, 311 insertions, 1 deletions
diff --git a/.gitignore b/.gitignore
index 5463d667..3b864dfa 100644
--- a/.gitignore
+++ b/.gitignore
@@ -42,6 +42,7 @@ decoder/ff_test
decoder/grammar_test
decoder/hg_test
decoder/logval_test
+decoder/minimal_decoder
decoder/parser_test
decoder/rule_lexer.cc
decoder/small_vector_test
diff --git a/0001-minimal-decoder.patch b/0001-minimal-decoder.patch
new file mode 100644
index 00000000..35798564
--- /dev/null
+++ b/0001-minimal-decoder.patch
@@ -0,0 +1,132 @@
+From 7969b35bfdf68d41b228de6d0d3d829bdc1ed529 Mon Sep 17 00:00:00 2001
+From: Patrick Simianer <p@simianer.de>
+Date: Fri, 4 Aug 2017 21:03:54 +0200
+Subject: [PATCH] minimal decoder
+
+---
+ .gitignore | 1 +
+ decoder/Makefile.am | 5 ++-
+ decoder/minimal_decoder.cc | 79 ++++++++++++++++++++++++++++++++++++++++++++++
+ 3 files changed, 84 insertions(+), 1 deletion(-)
+ create mode 100644 decoder/minimal_decoder.cc
+
+diff --git a/.gitignore b/.gitignore
+index 5463d667..3b864dfa 100644
+--- a/.gitignore
++++ b/.gitignore
+@@ -42,6 +42,7 @@ decoder/ff_test
+ decoder/grammar_test
+ decoder/hg_test
+ decoder/logval_test
++decoder/minimal_decoder
+ decoder/parser_test
+ decoder/rule_lexer.cc
+ decoder/small_vector_test
+diff --git a/decoder/Makefile.am b/decoder/Makefile.am
+index e46a7120..b23bbad4 100644
+--- a/decoder/Makefile.am
++++ b/decoder/Makefile.am
+@@ -1,4 +1,4 @@
+-bin_PROGRAMS = cdec
++bin_PROGRAMS = cdec minimal_decoder
+
+ noinst_PROGRAMS = \
+ trule_test \
+@@ -23,6 +23,9 @@ cdec_SOURCES = cdec.cc
+ cdec_LDFLAGS= -rdynamic $(STATIC_FLAGS)
+ cdec_LDADD = libcdec.a ../mteval/libmteval.a ../utils/libutils.a ../klm/search/libksearch.a ../klm/lm/libklm.a ../klm/util/libklm_util.a ../klm/util/double-conversion/libklm_util_double.a
+
++minimal_decoder_SOURCES = minimal_decoder.cc
++minimal_decoder_LDADD = libcdec.a ../utils/libutils.a
++
+ AM_CPPFLAGS = -DTEST_DATA=\"$(top_srcdir)/decoder/test_data\" -DBOOST_TEST_DYN_LINK -W -Wno-sign-compare -I$(top_srcdir) -I$(top_srcdir)/mteval -I$(top_srcdir)/utils -I$(top_srcdir)/klm
+
+ rule_lexer.cc: rule_lexer.ll
+diff --git a/decoder/minimal_decoder.cc b/decoder/minimal_decoder.cc
+new file mode 100644
+index 00000000..25922674
+--- /dev/null
++++ b/decoder/minimal_decoder.cc
+@@ -0,0 +1,79 @@
++#include <fstream>
++#include <iostream>
++#include <sstream>
++
++#include "fdict.h"
++#include "filelib.h"
++#include "hg.h"
++#include "hg_io.h"
++#include "sparse_vector.h"
++#include "viterbi.h"
++
++using namespace std;
++
++/*
++ * Reads hypergraph from JSON file argv[1],
++ * reweights it using weights from argv[2],
++ * and outputs viterbi translation.
++ *
++ */
++int
++main(int argc, char** argv)
++{
++ clock_t begin_total = clock();
++
++ // read hg
++ clock_t begin_read = clock();
++ ReadFile rf(argv[1]);
++ Hypergraph hg;
++ HypergraphIO::ReadFromJSON(rf.stream(), &hg);
++ clock_t end_read = clock();
++ double elapsed_secs_read = double(end_read - begin_read) / CLOCKS_PER_SEC;
++ cerr << "read hg " << elapsed_secs_read << " s" << endl;
++
++ // read weights
++ clock_t begin_weights = clock();
++ SparseVector<double> v;
++ ifstream f(argv[2]);
++ string line;
++ while (getline(f, line)) {
++ istringstream ss(line);
++ string k; weight_t w;
++ ss >> k >> w;
++ v.add_value(FD::Convert(k), w);
++ }
++ clock_t end_weights = clock();
++ double elapsed_secs_weights = double(end_weights - begin_weights) / CLOCKS_PER_SEC;
++ cerr << "read weights " << elapsed_secs_weights << " s" << endl;
++
++ // reweight hg
++ clock_t begin_reweight = clock();
++ hg.Reweight(v);
++ clock_t end_reweight = clock();
++ double elapsed_secs_reweight = double(end_reweight - begin_reweight) / CLOCKS_PER_SEC;
++ cerr << "reweight " << elapsed_secs_reweight << " s" << endl;
++
++ // topsort
++ clock_t begin_top = clock();
++ hg.TopologicallySortNodesAndEdges(hg.NumberOfNodes()-1);
++ clock_t end_top = clock();
++ double elapsed_secs_top = double(end_top - begin_top) / CLOCKS_PER_SEC;
++ cerr << "topsort " << elapsed_secs_top << " s" << endl;
++
++ // viterbi
++ clock_t begin_viterbi = clock();
++ vector<WordID> trans;
++ ViterbiESentence(hg, &trans);
++ cout << TD::GetString(trans) << endl << flush;
++ clock_t end_viterbi = clock();
++ double elapsed_secs_viterbi = double(end_viterbi - begin_viterbi) / CLOCKS_PER_SEC;
++ cerr << "viterbi " << elapsed_secs_viterbi << " s" << endl;
++
++ // total
++ clock_t end_total = clock();
++ double elapsed_secs = double(end_total - begin_total) / CLOCKS_PER_SEC;
++ cerr << "total " << elapsed_secs << " s" << endl;
++
++ return 0;
++}
++
+--
+2.11.0 (Apple Git-81)
+
diff --git a/b9be57140b1e9cab50ff8eb0c6cd90815ab6da0e.patch b/b9be57140b1e9cab50ff8eb0c6cd90815ab6da0e.patch
new file mode 100644
index 00000000..8ac26dbd
--- /dev/null
+++ b/b9be57140b1e9cab50ff8eb0c6cd90815ab6da0e.patch
@@ -0,0 +1,95 @@
+From b9be57140b1e9cab50ff8eb0c6cd90815ab6da0e Mon Sep 17 00:00:00 2001
+From: Patrick Simianer <p@simianer.de>
+Date: Tue, 19 Aug 2014 21:51:44 +0100
+Subject: [PATCH] added minimal decoder
+
+---
+ .gitignore | 1 +
+ decoder/Makefile.am | 5 ++++-
+ decoder/minimal_decoder.cc | 45 +++++++++++++++++++++++++++++++++++++++++++++
+ 3 files changed, 50 insertions(+), 1 deletion(-)
+ create mode 100644 decoder/minimal_decoder.cc
+
+diff --git a/.gitignore b/.gitignore
+index 72f4997..0c78530 100644
+--- a/.gitignore
++++ b/.gitignore
+@@ -42,6 +42,7 @@ decoder/ff_test
+ decoder/grammar_test
+ decoder/hg_test
+ decoder/logval_test
++decoder/minimal_decoder
+ decoder/parser_test
+ decoder/rule_lexer.cc
+ decoder/small_vector_test
+diff --git a/decoder/Makefile.am b/decoder/Makefile.am
+index 8e61c13..9c4c714 100644
+--- a/decoder/Makefile.am
++++ b/decoder/Makefile.am
+@@ -1,4 +1,4 @@
+-bin_PROGRAMS = cdec
++bin_PROGRAMS = cdec minimal_decoder
+
+ noinst_PROGRAMS = \
+ trule_test \
+@@ -23,6 +23,9 @@ cdec_SOURCES = cdec.cc
+ cdec_LDFLAGS= -rdynamic
+ cdec_LDADD = libcdec.a ../mteval/libmteval.a ../utils/libutils.a ../klm/search/libksearch.a ../klm/lm/libklm.a ../klm/util/libklm_util.a ../klm/util/double-conversion/libklm_util_double.a
+
++minimal_decoder_SOURCES = minimal_decoder.cc
++minimal_decoder_LDADD = libcdec.a ../utils/libutils.a
++
+ AM_CPPFLAGS = -DTEST_DATA=\"$(top_srcdir)/decoder/test_data\" -DBOOST_TEST_DYN_LINK -W -Wno-sign-compare -I$(top_srcdir) -I$(top_srcdir)/mteval -I$(top_srcdir)/utils -I$(top_srcdir)/klm
+
+ rule_lexer.cc: rule_lexer.ll
+diff --git a/decoder/minimal_decoder.cc b/decoder/minimal_decoder.cc
+new file mode 100644
+index 0000000..0aa281a
+--- /dev/null
++++ b/decoder/minimal_decoder.cc
+@@ -0,0 +1,45 @@
++#include <fstream>
++#include <iostream>
++#include <sstream>
++
++#include "fdict.h"
++#include "filelib.h"
++#include "hg.h"
++#include "hg_io.h"
++#include "sparse_vector.h"
++#include "viterbi.h"
++
++
++using namespace std;
++
++/*
++ * Reads hypergraph from JSON file argv[1],
++ * reweights it using weights from argv[2],
++ * and outputs viterbi translation.
++ *
++ */
++int main(int argc, char** argv)
++{
++ ReadFile rf(argv[1]);
++ Hypergraph hg;
++ HypergraphIO::ReadFromJSON(rf.stream(), &hg);
++ SparseVector<double> v;
++ ifstream f(argv[2]);
++ string line;
++ while (getline(f, line)) {
++ istringstream ss(line);
++ string k; weight_t w;
++ ss >> k >> w;
++ v.add_value(FD::Convert(k), w);
++ }
++ hg.Reweight(v);
++ clock_t begin = clock();
++ hg.TopologicallySortNodesAndEdges(hg.NumberOfNodes()-1);
++ vector<WordID> trans;
++ ViterbiESentence(hg, &trans);
++ cout << TD::GetString(trans) << endl << flush;
++ clock_t end = clock();
++ double elapsed_secs = double(end - begin) / CLOCKS_PER_SEC;
++ cout << elapsed_secs << " s" << endl;
++}
++
diff --git a/decoder/Makefile.am b/decoder/Makefile.am
index e46a7120..b23bbad4 100644
--- a/decoder/Makefile.am
+++ b/decoder/Makefile.am
@@ -1,4 +1,4 @@
-bin_PROGRAMS = cdec
+bin_PROGRAMS = cdec minimal_decoder
noinst_PROGRAMS = \
trule_test \
@@ -23,6 +23,9 @@ cdec_SOURCES = cdec.cc
cdec_LDFLAGS= -rdynamic $(STATIC_FLAGS)
cdec_LDADD = libcdec.a ../mteval/libmteval.a ../utils/libutils.a ../klm/search/libksearch.a ../klm/lm/libklm.a ../klm/util/libklm_util.a ../klm/util/double-conversion/libklm_util_double.a
+minimal_decoder_SOURCES = minimal_decoder.cc
+minimal_decoder_LDADD = libcdec.a ../utils/libutils.a
+
AM_CPPFLAGS = -DTEST_DATA=\"$(top_srcdir)/decoder/test_data\" -DBOOST_TEST_DYN_LINK -W -Wno-sign-compare -I$(top_srcdir) -I$(top_srcdir)/mteval -I$(top_srcdir)/utils -I$(top_srcdir)/klm
rule_lexer.cc: rule_lexer.ll
diff --git a/decoder/minimal_decoder.cc b/decoder/minimal_decoder.cc
new file mode 100644
index 00000000..25922674
--- /dev/null
+++ b/decoder/minimal_decoder.cc
@@ -0,0 +1,79 @@
+#include <fstream>
+#include <iostream>
+#include <sstream>
+
+#include "fdict.h"
+#include "filelib.h"
+#include "hg.h"
+#include "hg_io.h"
+#include "sparse_vector.h"
+#include "viterbi.h"
+
+using namespace std;
+
+/*
+ * Reads hypergraph from JSON file argv[1],
+ * reweights it using weights from argv[2],
+ * and outputs viterbi translation.
+ *
+ */
+int
+main(int argc, char** argv)
+{
+ clock_t begin_total = clock();
+
+ // read hg
+ clock_t begin_read = clock();
+ ReadFile rf(argv[1]);
+ Hypergraph hg;
+ HypergraphIO::ReadFromJSON(rf.stream(), &hg);
+ clock_t end_read = clock();
+ double elapsed_secs_read = double(end_read - begin_read) / CLOCKS_PER_SEC;
+ cerr << "read hg " << elapsed_secs_read << " s" << endl;
+
+ // read weights
+ clock_t begin_weights = clock();
+ SparseVector<double> v;
+ ifstream f(argv[2]);
+ string line;
+ while (getline(f, line)) {
+ istringstream ss(line);
+ string k; weight_t w;
+ ss >> k >> w;
+ v.add_value(FD::Convert(k), w);
+ }
+ clock_t end_weights = clock();
+ double elapsed_secs_weights = double(end_weights - begin_weights) / CLOCKS_PER_SEC;
+ cerr << "read weights " << elapsed_secs_weights << " s" << endl;
+
+ // reweight hg
+ clock_t begin_reweight = clock();
+ hg.Reweight(v);
+ clock_t end_reweight = clock();
+ double elapsed_secs_reweight = double(end_reweight - begin_reweight) / CLOCKS_PER_SEC;
+ cerr << "reweight " << elapsed_secs_reweight << " s" << endl;
+
+ // topsort
+ clock_t begin_top = clock();
+ hg.TopologicallySortNodesAndEdges(hg.NumberOfNodes()-1);
+ clock_t end_top = clock();
+ double elapsed_secs_top = double(end_top - begin_top) / CLOCKS_PER_SEC;
+ cerr << "topsort " << elapsed_secs_top << " s" << endl;
+
+ // viterbi
+ clock_t begin_viterbi = clock();
+ vector<WordID> trans;
+ ViterbiESentence(hg, &trans);
+ cout << TD::GetString(trans) << endl << flush;
+ clock_t end_viterbi = clock();
+ double elapsed_secs_viterbi = double(end_viterbi - begin_viterbi) / CLOCKS_PER_SEC;
+ cerr << "viterbi " << elapsed_secs_viterbi << " s" << endl;
+
+ // total
+ clock_t end_total = clock();
+ double elapsed_secs = double(end_total - begin_total) / CLOCKS_PER_SEC;
+ cerr << "total " << elapsed_secs << " s" << endl;
+
+ return 0;
+}
+