From c3f3891a9c146483b63f3d903f9971438850bbad Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Fri, 4 Aug 2017 21:17:06 +0200 Subject: json serialization, minimal decoder --- .gitignore | 1 + 0001-minimal-decoder.patch | 132 +++++++++++++++++++++++++ b9be57140b1e9cab50ff8eb0c6cd90815ab6da0e.patch | 95 ++++++++++++++++++ decoder/Makefile.am | 5 +- decoder/minimal_decoder.cc | 79 +++++++++++++++ 5 files changed, 311 insertions(+), 1 deletion(-) create mode 100644 0001-minimal-decoder.patch create mode 100644 b9be57140b1e9cab50ff8eb0c6cd90815ab6da0e.patch create mode 100644 decoder/minimal_decoder.cc diff --git a/.gitignore b/.gitignore index 5463d667..3b864dfa 100644 --- a/.gitignore +++ b/.gitignore @@ -42,6 +42,7 @@ decoder/ff_test decoder/grammar_test decoder/hg_test decoder/logval_test +decoder/minimal_decoder decoder/parser_test decoder/rule_lexer.cc decoder/small_vector_test diff --git a/0001-minimal-decoder.patch b/0001-minimal-decoder.patch new file mode 100644 index 00000000..35798564 --- /dev/null +++ b/0001-minimal-decoder.patch @@ -0,0 +1,132 @@ +From 7969b35bfdf68d41b228de6d0d3d829bdc1ed529 Mon Sep 17 00:00:00 2001 +From: Patrick Simianer +Date: Fri, 4 Aug 2017 21:03:54 +0200 +Subject: [PATCH] minimal decoder + +--- + .gitignore | 1 + + decoder/Makefile.am | 5 ++- + decoder/minimal_decoder.cc | 79 ++++++++++++++++++++++++++++++++++++++++++++++ + 3 files changed, 84 insertions(+), 1 deletion(-) + create mode 100644 decoder/minimal_decoder.cc + +diff --git a/.gitignore b/.gitignore +index 5463d667..3b864dfa 100644 +--- a/.gitignore ++++ b/.gitignore +@@ -42,6 +42,7 @@ decoder/ff_test + decoder/grammar_test + decoder/hg_test + decoder/logval_test ++decoder/minimal_decoder + decoder/parser_test + decoder/rule_lexer.cc + decoder/small_vector_test +diff --git a/decoder/Makefile.am b/decoder/Makefile.am +index e46a7120..b23bbad4 100644 +--- a/decoder/Makefile.am ++++ b/decoder/Makefile.am +@@ -1,4 +1,4 @@ +-bin_PROGRAMS = cdec ++bin_PROGRAMS = cdec minimal_decoder + + noinst_PROGRAMS = \ + trule_test \ +@@ -23,6 +23,9 @@ cdec_SOURCES = cdec.cc + cdec_LDFLAGS= -rdynamic $(STATIC_FLAGS) + cdec_LDADD = libcdec.a ../mteval/libmteval.a ../utils/libutils.a ../klm/search/libksearch.a ../klm/lm/libklm.a ../klm/util/libklm_util.a ../klm/util/double-conversion/libklm_util_double.a + ++minimal_decoder_SOURCES = minimal_decoder.cc ++minimal_decoder_LDADD = libcdec.a ../utils/libutils.a ++ + AM_CPPFLAGS = -DTEST_DATA=\"$(top_srcdir)/decoder/test_data\" -DBOOST_TEST_DYN_LINK -W -Wno-sign-compare -I$(top_srcdir) -I$(top_srcdir)/mteval -I$(top_srcdir)/utils -I$(top_srcdir)/klm + + rule_lexer.cc: rule_lexer.ll +diff --git a/decoder/minimal_decoder.cc b/decoder/minimal_decoder.cc +new file mode 100644 +index 00000000..25922674 +--- /dev/null ++++ b/decoder/minimal_decoder.cc +@@ -0,0 +1,79 @@ ++#include ++#include ++#include ++ ++#include "fdict.h" ++#include "filelib.h" ++#include "hg.h" ++#include "hg_io.h" ++#include "sparse_vector.h" ++#include "viterbi.h" ++ ++using namespace std; ++ ++/* ++ * Reads hypergraph from JSON file argv[1], ++ * reweights it using weights from argv[2], ++ * and outputs viterbi translation. ++ * ++ */ ++int ++main(int argc, char** argv) ++{ ++ clock_t begin_total = clock(); ++ ++ // read hg ++ clock_t begin_read = clock(); ++ ReadFile rf(argv[1]); ++ Hypergraph hg; ++ HypergraphIO::ReadFromJSON(rf.stream(), &hg); ++ clock_t end_read = clock(); ++ double elapsed_secs_read = double(end_read - begin_read) / CLOCKS_PER_SEC; ++ cerr << "read hg " << elapsed_secs_read << " s" << endl; ++ ++ // read weights ++ clock_t begin_weights = clock(); ++ SparseVector v; ++ ifstream f(argv[2]); ++ string line; ++ while (getline(f, line)) { ++ istringstream ss(line); ++ string k; weight_t w; ++ ss >> k >> w; ++ v.add_value(FD::Convert(k), w); ++ } ++ clock_t end_weights = clock(); ++ double elapsed_secs_weights = double(end_weights - begin_weights) / CLOCKS_PER_SEC; ++ cerr << "read weights " << elapsed_secs_weights << " s" << endl; ++ ++ // reweight hg ++ clock_t begin_reweight = clock(); ++ hg.Reweight(v); ++ clock_t end_reweight = clock(); ++ double elapsed_secs_reweight = double(end_reweight - begin_reweight) / CLOCKS_PER_SEC; ++ cerr << "reweight " << elapsed_secs_reweight << " s" << endl; ++ ++ // topsort ++ clock_t begin_top = clock(); ++ hg.TopologicallySortNodesAndEdges(hg.NumberOfNodes()-1); ++ clock_t end_top = clock(); ++ double elapsed_secs_top = double(end_top - begin_top) / CLOCKS_PER_SEC; ++ cerr << "topsort " << elapsed_secs_top << " s" << endl; ++ ++ // viterbi ++ clock_t begin_viterbi = clock(); ++ vector trans; ++ ViterbiESentence(hg, &trans); ++ cout << TD::GetString(trans) << endl << flush; ++ clock_t end_viterbi = clock(); ++ double elapsed_secs_viterbi = double(end_viterbi - begin_viterbi) / CLOCKS_PER_SEC; ++ cerr << "viterbi " << elapsed_secs_viterbi << " s" << endl; ++ ++ // total ++ clock_t end_total = clock(); ++ double elapsed_secs = double(end_total - begin_total) / CLOCKS_PER_SEC; ++ cerr << "total " << elapsed_secs << " s" << endl; ++ ++ return 0; ++} ++ +-- +2.11.0 (Apple Git-81) + diff --git a/b9be57140b1e9cab50ff8eb0c6cd90815ab6da0e.patch b/b9be57140b1e9cab50ff8eb0c6cd90815ab6da0e.patch new file mode 100644 index 00000000..8ac26dbd --- /dev/null +++ b/b9be57140b1e9cab50ff8eb0c6cd90815ab6da0e.patch @@ -0,0 +1,95 @@ +From b9be57140b1e9cab50ff8eb0c6cd90815ab6da0e Mon Sep 17 00:00:00 2001 +From: Patrick Simianer +Date: Tue, 19 Aug 2014 21:51:44 +0100 +Subject: [PATCH] added minimal decoder + +--- + .gitignore | 1 + + decoder/Makefile.am | 5 ++++- + decoder/minimal_decoder.cc | 45 +++++++++++++++++++++++++++++++++++++++++++++ + 3 files changed, 50 insertions(+), 1 deletion(-) + create mode 100644 decoder/minimal_decoder.cc + +diff --git a/.gitignore b/.gitignore +index 72f4997..0c78530 100644 +--- a/.gitignore ++++ b/.gitignore +@@ -42,6 +42,7 @@ decoder/ff_test + decoder/grammar_test + decoder/hg_test + decoder/logval_test ++decoder/minimal_decoder + decoder/parser_test + decoder/rule_lexer.cc + decoder/small_vector_test +diff --git a/decoder/Makefile.am b/decoder/Makefile.am +index 8e61c13..9c4c714 100644 +--- a/decoder/Makefile.am ++++ b/decoder/Makefile.am +@@ -1,4 +1,4 @@ +-bin_PROGRAMS = cdec ++bin_PROGRAMS = cdec minimal_decoder + + noinst_PROGRAMS = \ + trule_test \ +@@ -23,6 +23,9 @@ cdec_SOURCES = cdec.cc + cdec_LDFLAGS= -rdynamic + cdec_LDADD = libcdec.a ../mteval/libmteval.a ../utils/libutils.a ../klm/search/libksearch.a ../klm/lm/libklm.a ../klm/util/libklm_util.a ../klm/util/double-conversion/libklm_util_double.a + ++minimal_decoder_SOURCES = minimal_decoder.cc ++minimal_decoder_LDADD = libcdec.a ../utils/libutils.a ++ + AM_CPPFLAGS = -DTEST_DATA=\"$(top_srcdir)/decoder/test_data\" -DBOOST_TEST_DYN_LINK -W -Wno-sign-compare -I$(top_srcdir) -I$(top_srcdir)/mteval -I$(top_srcdir)/utils -I$(top_srcdir)/klm + + rule_lexer.cc: rule_lexer.ll +diff --git a/decoder/minimal_decoder.cc b/decoder/minimal_decoder.cc +new file mode 100644 +index 0000000..0aa281a +--- /dev/null ++++ b/decoder/minimal_decoder.cc +@@ -0,0 +1,45 @@ ++#include ++#include ++#include ++ ++#include "fdict.h" ++#include "filelib.h" ++#include "hg.h" ++#include "hg_io.h" ++#include "sparse_vector.h" ++#include "viterbi.h" ++ ++ ++using namespace std; ++ ++/* ++ * Reads hypergraph from JSON file argv[1], ++ * reweights it using weights from argv[2], ++ * and outputs viterbi translation. ++ * ++ */ ++int main(int argc, char** argv) ++{ ++ ReadFile rf(argv[1]); ++ Hypergraph hg; ++ HypergraphIO::ReadFromJSON(rf.stream(), &hg); ++ SparseVector v; ++ ifstream f(argv[2]); ++ string line; ++ while (getline(f, line)) { ++ istringstream ss(line); ++ string k; weight_t w; ++ ss >> k >> w; ++ v.add_value(FD::Convert(k), w); ++ } ++ hg.Reweight(v); ++ clock_t begin = clock(); ++ hg.TopologicallySortNodesAndEdges(hg.NumberOfNodes()-1); ++ vector trans; ++ ViterbiESentence(hg, &trans); ++ cout << TD::GetString(trans) << endl << flush; ++ clock_t end = clock(); ++ double elapsed_secs = double(end - begin) / CLOCKS_PER_SEC; ++ cout << elapsed_secs << " s" << endl; ++} ++ diff --git a/decoder/Makefile.am b/decoder/Makefile.am index e46a7120..b23bbad4 100644 --- a/decoder/Makefile.am +++ b/decoder/Makefile.am @@ -1,4 +1,4 @@ -bin_PROGRAMS = cdec +bin_PROGRAMS = cdec minimal_decoder noinst_PROGRAMS = \ trule_test \ @@ -23,6 +23,9 @@ cdec_SOURCES = cdec.cc cdec_LDFLAGS= -rdynamic $(STATIC_FLAGS) cdec_LDADD = libcdec.a ../mteval/libmteval.a ../utils/libutils.a ../klm/search/libksearch.a ../klm/lm/libklm.a ../klm/util/libklm_util.a ../klm/util/double-conversion/libklm_util_double.a +minimal_decoder_SOURCES = minimal_decoder.cc +minimal_decoder_LDADD = libcdec.a ../utils/libutils.a + AM_CPPFLAGS = -DTEST_DATA=\"$(top_srcdir)/decoder/test_data\" -DBOOST_TEST_DYN_LINK -W -Wno-sign-compare -I$(top_srcdir) -I$(top_srcdir)/mteval -I$(top_srcdir)/utils -I$(top_srcdir)/klm rule_lexer.cc: rule_lexer.ll diff --git a/decoder/minimal_decoder.cc b/decoder/minimal_decoder.cc new file mode 100644 index 00000000..25922674 --- /dev/null +++ b/decoder/minimal_decoder.cc @@ -0,0 +1,79 @@ +#include +#include +#include + +#include "fdict.h" +#include "filelib.h" +#include "hg.h" +#include "hg_io.h" +#include "sparse_vector.h" +#include "viterbi.h" + +using namespace std; + +/* + * Reads hypergraph from JSON file argv[1], + * reweights it using weights from argv[2], + * and outputs viterbi translation. + * + */ +int +main(int argc, char** argv) +{ + clock_t begin_total = clock(); + + // read hg + clock_t begin_read = clock(); + ReadFile rf(argv[1]); + Hypergraph hg; + HypergraphIO::ReadFromJSON(rf.stream(), &hg); + clock_t end_read = clock(); + double elapsed_secs_read = double(end_read - begin_read) / CLOCKS_PER_SEC; + cerr << "read hg " << elapsed_secs_read << " s" << endl; + + // read weights + clock_t begin_weights = clock(); + SparseVector v; + ifstream f(argv[2]); + string line; + while (getline(f, line)) { + istringstream ss(line); + string k; weight_t w; + ss >> k >> w; + v.add_value(FD::Convert(k), w); + } + clock_t end_weights = clock(); + double elapsed_secs_weights = double(end_weights - begin_weights) / CLOCKS_PER_SEC; + cerr << "read weights " << elapsed_secs_weights << " s" << endl; + + // reweight hg + clock_t begin_reweight = clock(); + hg.Reweight(v); + clock_t end_reweight = clock(); + double elapsed_secs_reweight = double(end_reweight - begin_reweight) / CLOCKS_PER_SEC; + cerr << "reweight " << elapsed_secs_reweight << " s" << endl; + + // topsort + clock_t begin_top = clock(); + hg.TopologicallySortNodesAndEdges(hg.NumberOfNodes()-1); + clock_t end_top = clock(); + double elapsed_secs_top = double(end_top - begin_top) / CLOCKS_PER_SEC; + cerr << "topsort " << elapsed_secs_top << " s" << endl; + + // viterbi + clock_t begin_viterbi = clock(); + vector trans; + ViterbiESentence(hg, &trans); + cout << TD::GetString(trans) << endl << flush; + clock_t end_viterbi = clock(); + double elapsed_secs_viterbi = double(end_viterbi - begin_viterbi) / CLOCKS_PER_SEC; + cerr << "viterbi " << elapsed_secs_viterbi << " s" << endl; + + // total + clock_t end_total = clock(); + double elapsed_secs = double(end_total - begin_total) / CLOCKS_PER_SEC; + cerr << "total " << elapsed_secs << " s" << endl; + + return 0; +} + -- cgit v1.2.3