7 files changed, 69 insertions, 17 deletions
diff --git a/.gitignore b/.gitignore
index dd8fcd7b..b8e0da4e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -44,6 +44,7 @@ decoder/ff_test
 decoder/grammar_test
 decoder/hg_test
 decoder/logval_test
+decoder/minimal_decoder
 decoder/parser_test
 decoder/rule_lexer.cc
 decoder/small_vector_test
diff --git a/decoder/Makefile.am b/decoder/Makefile.am
index dbec532e..e313f1f9 100644
--- a/decoder/Makefile.am
+++ b/decoder/Makefile.am
@@ -1,4 +1,4 @@
-bin_PROGRAMS = cdec
+bin_PROGRAMS = cdec minimal_decoder
 
 noinst_PROGRAMS = \
   trule_test \
@@ -23,6 +23,9 @@ cdec_SOURCES = cdec.cc
 cdec_LDFLAGS= -rdynamic $(STATIC_FLAGS)
 cdec_LDADD = libcdec.a ../mteval/libmteval.a ../utils/libutils.a ../klm/search/libksearch.a ../klm/lm/libklm.a ../klm/util/libklm_util.a ../klm/util/double-conversion/libklm_util_double.a
 
+minimal_decoder_SOURCES = minimal_decoder.cc
+minimal_decoder_LDADD = libcdec.a ../utils/libutils.a
+
 AM_CPPFLAGS = -DTEST_DATA=\"$(top_srcdir)/decoder/test_data\" -DBOOST_TEST_DYN_LINK -W -Wno-sign-compare -I$(top_srcdir) -I$(top_srcdir)/mteval -I$(top_srcdir)/utils -I$(top_srcdir)/klm
 
 rule_lexer.cc: rule_lexer.ll
diff --git a/decoder/minimal_decoder.cc b/decoder/minimal_decoder.cc
new file mode 100644
index 00000000..0aa281ae
--- /dev/null
+++ b/decoder/minimal_decoder.cc
@@ -0,0 +1,45 @@
+#include <fstream>
+#include <iostream>
+#include <sstream>
+
+#include "fdict.h"
+#include "filelib.h"
+#include "hg.h"
+#include "hg_io.h"
+#include "sparse_vector.h"
+#include "viterbi.h"
+
+
+using namespace std;
+
+/*
+ * Reads hypergraph from JSON file argv[1],
+ * reweights it using weights from argv[2],
+ * and outputs viterbi translation.
+ *
+ */
+int main(int argc, char** argv)
+{
+  ReadFile rf(argv[1]);
+  Hypergraph hg;
+  HypergraphIO::ReadFromJSON(rf.stream(), &hg);
+  SparseVector<double> v;
+  ifstream f(argv[2]);
+  string line;
+  while (getline(f, line)) {
+    istringstream ss(line);
+    string k; weight_t w;
+    ss >> k >> w;
+    v.add_value(FD::Convert(k), w);
+  }
+  hg.Reweight(v);
+  clock_t begin = clock();
+  hg.TopologicallySortNodesAndEdges(hg.NumberOfNodes()-1);
+  vector<WordID> trans;
+  ViterbiESentence(hg, &trans);
+  cout << TD::GetString(trans) << endl << flush;
+  clock_t end = clock();
+  double elapsed_secs = double(end - begin) / CLOCKS_PER_SEC;
+  cout << elapsed_secs << " s" << endl;
+}
+
diff --git a/extractor/Makefile.am b/extractor/Makefile.am
index a406d9dc..cdfbb307 100644
--- a/extractor/Makefile.am
+++ b/extractor/Makefile.am
@@ -115,7 +115,7 @@ noinst_LIBRARIES = libextractor.a
 sacompile_SOURCES = sacompile.cc
 sacompile_LDADD = libextractor.a
 run_extractor_SOURCES = run_extractor.cc
-run_extractor_LDADD = libextractor.a
+run_extractor_LDADD = libextractor.a ../utils/libutils.a
 extract_SOURCES = extract.cc
 extract_LDADD = libextractor.a
 
diff --git a/extractor/run_extractor.cc b/extractor/run_extractor.cc
index 00564a36..75fae627 100644
--- a/extractor/run_extractor.cc
+++ b/extractor/run_extractor.cc
@@ -33,6 +33,7 @@
 #include "time_util.h"
 #include "translation_table.h"
 #include "vocabulary.h"
+#include "../utils/filelib.h"
 
 namespace fs = boost::filesystem;
 namespace po = boost::program_options;
@@ -42,7 +43,7 @@ using namespace features;
 
 // Returns the file path in which a given grammar should be written.
 fs::path GetGrammarFilePath(const fs::path& grammar_path, int file_number) {
-  string file_name = "grammar." + to_string(file_number);
+  string file_name = "grammar." + to_string(file_number) + ".gz";
   return grammar_path / file_name;
 }
 
@@ -239,8 +240,8 @@ int main(int argc, char** argv) {
     }
     Grammar grammar = extractor.GetGrammar(
         sentences[i], blacklisted_sentence_ids);
-    ofstream output(GetGrammarFilePath(grammar_path, i).c_str());
-    output << grammar;
+    WriteFile output(GetGrammarFilePath(grammar_path, i).c_str());
+    *output << grammar;
   }
 
   for (size_t i = 0; i < sentences.size(); ++i) {
diff --git a/training/dtrain/parallelize.rb b/training/dtrain/parallelize.rb
index 82600009..5fc8b04e 100755
--- a/training/dtrain/parallelize.rb
+++ b/training/dtrain/parallelize.rb
@@ -4,7 +4,7 @@ require 'trollop'
 
 def usage
   STDERR.write "Usage: "
-  STDERR.write "ruby parallelize.rb -c <dtrain.ini> [-e <epochs=10>] [--randomize/-z] [--reshard/-y] -s <#shards|0> [-p <at once=9999>] -i <input> -r <refs> [--qsub/-q] [--dtrain_binary <path to dtrain binary>] [-l \"l2 select_k 100000\"] [--extra_qsub \"-l virtual_free=24G\"]\n"
+  STDERR.write "ruby parallelize.rb -c <dtrain.ini> [-e <epochs=10>] [--randomize/-z] [--reshard/-y] -s <#shards|0> [-p <at once=9999>] -i <input> -r <refs> [--qsub/-q] [--dtrain_binary <path to dtrain binary>] [-l \"l2 select_k 100000\"] [--extra_qsub \"-l mem_free=24G\"]\n"
   exit 1
 end
 
@@ -87,6 +87,7 @@ def make_shards(input, refs, num_shards, epoch, rand)
     refs_fns << refs_fn
     0.upto(shard_sz-1) { |i|
       j = index.pop
+      break if !j
       shard_in.write in_lines[j]
       shard_refs.write refs_lines[j]
     }
diff --git a/training/pro/pro.pl b/training/pro/pro.pl
index a059477d..8ebb5864 100755
--- a/training/pro/pro.pl
+++ b/training/pro/pro.pl
@@ -69,18 +69,19 @@ my $reg_previous = 5000;
 
 # Process command-line options
 if (GetOptions(
-	"config=s" => \$iniFile,
-	"weights=s" => \$initial_weights,
-        "devset=s" => \$devset,
-	"jobs=i" => \$jobs,
-	"metric=s" => \$metric,
-	"pass-suffix=s" => \$pass_suffix,
-        "qsub" => \$useqsub,
-	"help" => \$help,
-	"reg=f" => \$reg,
-	"reg-previous=f" => \$reg_previous,
+  "config=s" => \$iniFile,
+  "weights=s" => \$initial_weights,
+  "devset=s" => \$devset,
+  "jobs=i" => \$jobs,
+  "max-iterations=i" => \$max_iterations,
+  "metric=s" => \$metric,
+  "pass-suffix=s" => \$pass_suffix,
+  "qsub" => \$useqsub,
+  "help" => \$help,
+  "reg=f" => \$reg,
+  "reg-previous=f" => \$reg_previous,
   "pmem=s" => \$pmem,
-	"output-dir=s" => \$dir,
+  "output-dir=s" => \$dir,
 ) == 0 || @ARGV!=0 || $help) {
 	print_help();
 	exit;