summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--decoder/Makefile.am5
-rw-r--r--decoder/minimal_decoder.cc45
-rw-r--r--extractor/Makefile.am2
-rw-r--r--extractor/run_extractor.cc7
-rwxr-xr-xtraining/dtrain/parallelize.rb3
-rwxr-xr-xtraining/pro/pro.pl23
7 files changed, 69 insertions, 17 deletions
diff --git a/.gitignore b/.gitignore
index dd8fcd7b..b8e0da4e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -44,6 +44,7 @@ decoder/ff_test
decoder/grammar_test
decoder/hg_test
decoder/logval_test
+decoder/minimal_decoder
decoder/parser_test
decoder/rule_lexer.cc
decoder/small_vector_test
diff --git a/decoder/Makefile.am b/decoder/Makefile.am
index dbec532e..e313f1f9 100644
--- a/decoder/Makefile.am
+++ b/decoder/Makefile.am
@@ -1,4 +1,4 @@
-bin_PROGRAMS = cdec
+bin_PROGRAMS = cdec minimal_decoder
noinst_PROGRAMS = \
trule_test \
@@ -23,6 +23,9 @@ cdec_SOURCES = cdec.cc
cdec_LDFLAGS= -rdynamic $(STATIC_FLAGS)
cdec_LDADD = libcdec.a ../mteval/libmteval.a ../utils/libutils.a ../klm/search/libksearch.a ../klm/lm/libklm.a ../klm/util/libklm_util.a ../klm/util/double-conversion/libklm_util_double.a
+minimal_decoder_SOURCES = minimal_decoder.cc
+minimal_decoder_LDADD = libcdec.a ../utils/libutils.a
+
AM_CPPFLAGS = -DTEST_DATA=\"$(top_srcdir)/decoder/test_data\" -DBOOST_TEST_DYN_LINK -W -Wno-sign-compare -I$(top_srcdir) -I$(top_srcdir)/mteval -I$(top_srcdir)/utils -I$(top_srcdir)/klm
rule_lexer.cc: rule_lexer.ll
diff --git a/decoder/minimal_decoder.cc b/decoder/minimal_decoder.cc
new file mode 100644
index 00000000..0aa281ae
--- /dev/null
+++ b/decoder/minimal_decoder.cc
@@ -0,0 +1,45 @@
+#include <fstream>
+#include <iostream>
+#include <sstream>
+
+#include "fdict.h"
+#include "filelib.h"
+#include "hg.h"
+#include "hg_io.h"
+#include "sparse_vector.h"
+#include "viterbi.h"
+
+
+using namespace std;
+
+/*
+ * Reads hypergraph from JSON file argv[1],
+ * reweights it using weights from argv[2],
+ * and outputs viterbi translation.
+ *
+ */
+int main(int argc, char** argv)
+{
+ ReadFile rf(argv[1]);
+ Hypergraph hg;
+ HypergraphIO::ReadFromJSON(rf.stream(), &hg);
+ SparseVector<double> v;
+ ifstream f(argv[2]);
+ string line;
+ while (getline(f, line)) {
+ istringstream ss(line);
+ string k; weight_t w;
+ ss >> k >> w;
+ v.add_value(FD::Convert(k), w);
+ }
+ hg.Reweight(v);
+ clock_t begin = clock();
+ hg.TopologicallySortNodesAndEdges(hg.NumberOfNodes()-1);
+ vector<WordID> trans;
+ ViterbiESentence(hg, &trans);
+ cout << TD::GetString(trans) << endl << flush;
+ clock_t end = clock();
+ double elapsed_secs = double(end - begin) / CLOCKS_PER_SEC;
+ cout << elapsed_secs << " s" << endl;
+}
+
diff --git a/extractor/Makefile.am b/extractor/Makefile.am
index a406d9dc..cdfbb307 100644
--- a/extractor/Makefile.am
+++ b/extractor/Makefile.am
@@ -115,7 +115,7 @@ noinst_LIBRARIES = libextractor.a
sacompile_SOURCES = sacompile.cc
sacompile_LDADD = libextractor.a
run_extractor_SOURCES = run_extractor.cc
-run_extractor_LDADD = libextractor.a
+run_extractor_LDADD = libextractor.a ../utils/libutils.a
extract_SOURCES = extract.cc
extract_LDADD = libextractor.a
diff --git a/extractor/run_extractor.cc b/extractor/run_extractor.cc
index 00564a36..75fae627 100644
--- a/extractor/run_extractor.cc
+++ b/extractor/run_extractor.cc
@@ -33,6 +33,7 @@
#include "time_util.h"
#include "translation_table.h"
#include "vocabulary.h"
+#include "../utils/filelib.h"
namespace fs = boost::filesystem;
namespace po = boost::program_options;
@@ -42,7 +43,7 @@ using namespace features;
// Returns the file path in which a given grammar should be written.
fs::path GetGrammarFilePath(const fs::path& grammar_path, int file_number) {
- string file_name = "grammar." + to_string(file_number);
+ string file_name = "grammar." + to_string(file_number) + ".gz";
return grammar_path / file_name;
}
@@ -239,8 +240,8 @@ int main(int argc, char** argv) {
}
Grammar grammar = extractor.GetGrammar(
sentences[i], blacklisted_sentence_ids);
- ofstream output(GetGrammarFilePath(grammar_path, i).c_str());
- output << grammar;
+ WriteFile output(GetGrammarFilePath(grammar_path, i).c_str());
+ *output << grammar;
}
for (size_t i = 0; i < sentences.size(); ++i) {
diff --git a/training/dtrain/parallelize.rb b/training/dtrain/parallelize.rb
index 82600009..5fc8b04e 100755
--- a/training/dtrain/parallelize.rb
+++ b/training/dtrain/parallelize.rb
@@ -4,7 +4,7 @@ require 'trollop'
def usage
STDERR.write "Usage: "
- STDERR.write "ruby parallelize.rb -c <dtrain.ini> [-e <epochs=10>] [--randomize/-z] [--reshard/-y] -s <#shards|0> [-p <at once=9999>] -i <input> -r <refs> [--qsub/-q] [--dtrain_binary <path to dtrain binary>] [-l \"l2 select_k 100000\"] [--extra_qsub \"-l virtual_free=24G\"]\n"
+ STDERR.write "ruby parallelize.rb -c <dtrain.ini> [-e <epochs=10>] [--randomize/-z] [--reshard/-y] -s <#shards|0> [-p <at once=9999>] -i <input> -r <refs> [--qsub/-q] [--dtrain_binary <path to dtrain binary>] [-l \"l2 select_k 100000\"] [--extra_qsub \"-l mem_free=24G\"]\n"
exit 1
end
@@ -87,6 +87,7 @@ def make_shards(input, refs, num_shards, epoch, rand)
refs_fns << refs_fn
0.upto(shard_sz-1) { |i|
j = index.pop
+ break if !j
shard_in.write in_lines[j]
shard_refs.write refs_lines[j]
}
diff --git a/training/pro/pro.pl b/training/pro/pro.pl
index a059477d..8ebb5864 100755
--- a/training/pro/pro.pl
+++ b/training/pro/pro.pl
@@ -69,18 +69,19 @@ my $reg_previous = 5000;
# Process command-line options
if (GetOptions(
- "config=s" => \$iniFile,
- "weights=s" => \$initial_weights,
- "devset=s" => \$devset,
- "jobs=i" => \$jobs,
- "metric=s" => \$metric,
- "pass-suffix=s" => \$pass_suffix,
- "qsub" => \$useqsub,
- "help" => \$help,
- "reg=f" => \$reg,
- "reg-previous=f" => \$reg_previous,
+ "config=s" => \$iniFile,
+ "weights=s" => \$initial_weights,
+ "devset=s" => \$devset,
+ "jobs=i" => \$jobs,
+ "max-iterations=i" => \$max_iterations,
+ "metric=s" => \$metric,
+ "pass-suffix=s" => \$pass_suffix,
+ "qsub" => \$useqsub,
+ "help" => \$help,
+ "reg=f" => \$reg,
+ "reg-previous=f" => \$reg_previous,
"pmem=s" => \$pmem,
- "output-dir=s" => \$dir,
+ "output-dir=s" => \$dir,
) == 0 || @ARGV!=0 || $help) {
print_help();
exit;