summaryrefslogtreecommitdiff
path: root/0001-minimal-decoder.patch
blob: 35798564491b285498865144d8294700dac02a67 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
From 7969b35bfdf68d41b228de6d0d3d829bdc1ed529 Mon Sep 17 00:00:00 2001
From: Patrick Simianer <p@simianer.de>
Date: Fri, 4 Aug 2017 21:03:54 +0200
Subject: [PATCH] minimal decoder

---
 .gitignore                 |  1 +
 decoder/Makefile.am        |  5 ++-
 decoder/minimal_decoder.cc | 79 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 84 insertions(+), 1 deletion(-)
 create mode 100644 decoder/minimal_decoder.cc

diff --git a/.gitignore b/.gitignore
index 5463d667..3b864dfa 100644
--- a/.gitignore
+++ b/.gitignore
@@ -42,6 +42,7 @@ decoder/ff_test
 decoder/grammar_test
 decoder/hg_test
 decoder/logval_test
+decoder/minimal_decoder
 decoder/parser_test
 decoder/rule_lexer.cc
 decoder/small_vector_test
diff --git a/decoder/Makefile.am b/decoder/Makefile.am
index e46a7120..b23bbad4 100644
--- a/decoder/Makefile.am
+++ b/decoder/Makefile.am
@@ -1,4 +1,4 @@
-bin_PROGRAMS = cdec
+bin_PROGRAMS = cdec minimal_decoder
 
 noinst_PROGRAMS = \
   trule_test \
@@ -23,6 +23,9 @@ cdec_SOURCES = cdec.cc
 cdec_LDFLAGS= -rdynamic $(STATIC_FLAGS)
 cdec_LDADD = libcdec.a ../mteval/libmteval.a ../utils/libutils.a ../klm/search/libksearch.a ../klm/lm/libklm.a ../klm/util/libklm_util.a ../klm/util/double-conversion/libklm_util_double.a
 
+minimal_decoder_SOURCES = minimal_decoder.cc
+minimal_decoder_LDADD = libcdec.a ../utils/libutils.a
+
 AM_CPPFLAGS = -DTEST_DATA=\"$(top_srcdir)/decoder/test_data\" -DBOOST_TEST_DYN_LINK -W -Wno-sign-compare -I$(top_srcdir) -I$(top_srcdir)/mteval -I$(top_srcdir)/utils -I$(top_srcdir)/klm
 
 rule_lexer.cc: rule_lexer.ll
diff --git a/decoder/minimal_decoder.cc b/decoder/minimal_decoder.cc
new file mode 100644
index 00000000..25922674
--- /dev/null
+++ b/decoder/minimal_decoder.cc
@@ -0,0 +1,79 @@
+#include <fstream>
+#include <iostream>
+#include <sstream>
+
+#include "fdict.h"
+#include "filelib.h"
+#include "hg.h"
+#include "hg_io.h"
+#include "sparse_vector.h"
+#include "viterbi.h"
+
+using namespace std;
+
+/*
+ * Reads hypergraph from JSON file argv[1],
+ * reweights it using weights from argv[2],
+ * and outputs viterbi translation.
+ *
+ */
+int
+main(int argc, char** argv)
+{
+  clock_t begin_total = clock();
+
+  // read hg
+  clock_t begin_read = clock();
+  ReadFile rf(argv[1]);
+  Hypergraph hg;
+  HypergraphIO::ReadFromJSON(rf.stream(), &hg);
+  clock_t end_read = clock();
+  double elapsed_secs_read = double(end_read - begin_read) / CLOCKS_PER_SEC;
+  cerr << "read hg " << elapsed_secs_read << " s" << endl;
+
+  // read weights
+  clock_t begin_weights = clock();
+  SparseVector<double> v;
+  ifstream f(argv[2]);
+  string line;
+  while (getline(f, line)) {
+    istringstream ss(line);
+    string k; weight_t w;
+    ss >> k >> w;
+    v.add_value(FD::Convert(k), w);
+  }
+  clock_t end_weights = clock();
+  double elapsed_secs_weights = double(end_weights - begin_weights) / CLOCKS_PER_SEC;
+  cerr << "read weights " << elapsed_secs_weights << " s" << endl;
+
+  // reweight hg
+  clock_t begin_reweight = clock();
+  hg.Reweight(v);
+  clock_t end_reweight = clock();
+  double elapsed_secs_reweight = double(end_reweight - begin_reweight) / CLOCKS_PER_SEC;
+  cerr << "reweight " << elapsed_secs_reweight << " s" << endl;
+
+  // topsort
+  clock_t begin_top = clock();
+  hg.TopologicallySortNodesAndEdges(hg.NumberOfNodes()-1);
+  clock_t end_top = clock();
+  double elapsed_secs_top = double(end_top - begin_top) / CLOCKS_PER_SEC;
+  cerr << "topsort " << elapsed_secs_top << " s" << endl;
+
+  // viterbi
+  clock_t begin_viterbi = clock();
+  vector<WordID> trans;
+  ViterbiESentence(hg, &trans);
+  cout << TD::GetString(trans) << endl << flush;
+  clock_t end_viterbi = clock();
+  double elapsed_secs_viterbi = double(end_viterbi - begin_viterbi) / CLOCKS_PER_SEC;
+  cerr << "viterbi " << elapsed_secs_viterbi << " s" << endl;
+
+  // total
+  clock_t end_total = clock();
+  double elapsed_secs = double(end_total - begin_total) / CLOCKS_PER_SEC;
+  cerr << "total " << elapsed_secs << " s" << endl;
+  
+  return 0;
+}
+
-- 
2.11.0 (Apple Git-81)