diff options
| -rw-r--r-- | .gitignore | 1 | ||||
| -rw-r--r-- | decoder/Makefile.am | 5 | ||||
| -rw-r--r-- | decoder/minimal_decoder.cc | 45 | ||||
| -rw-r--r-- | extractor/Makefile.am | 2 | ||||
| -rw-r--r-- | extractor/run_extractor.cc | 7 | ||||
| -rwxr-xr-x | training/dtrain/parallelize.rb | 3 | ||||
| -rwxr-xr-x | training/pro/pro.pl | 23 | 
7 files changed, 69 insertions, 17 deletions
@@ -44,6 +44,7 @@ decoder/ff_test  decoder/grammar_test  decoder/hg_test  decoder/logval_test +decoder/minimal_decoder  decoder/parser_test  decoder/rule_lexer.cc  decoder/small_vector_test diff --git a/decoder/Makefile.am b/decoder/Makefile.am index dbec532e..e313f1f9 100644 --- a/decoder/Makefile.am +++ b/decoder/Makefile.am @@ -1,4 +1,4 @@ -bin_PROGRAMS = cdec +bin_PROGRAMS = cdec minimal_decoder  noinst_PROGRAMS = \    trule_test \ @@ -23,6 +23,9 @@ cdec_SOURCES = cdec.cc  cdec_LDFLAGS= -rdynamic $(STATIC_FLAGS)  cdec_LDADD = libcdec.a ../mteval/libmteval.a ../utils/libutils.a ../klm/search/libksearch.a ../klm/lm/libklm.a ../klm/util/libklm_util.a ../klm/util/double-conversion/libklm_util_double.a +minimal_decoder_SOURCES = minimal_decoder.cc +minimal_decoder_LDADD = libcdec.a ../utils/libutils.a +  AM_CPPFLAGS = -DTEST_DATA=\"$(top_srcdir)/decoder/test_data\" -DBOOST_TEST_DYN_LINK -W -Wno-sign-compare -I$(top_srcdir) -I$(top_srcdir)/mteval -I$(top_srcdir)/utils -I$(top_srcdir)/klm  rule_lexer.cc: rule_lexer.ll diff --git a/decoder/minimal_decoder.cc b/decoder/minimal_decoder.cc new file mode 100644 index 00000000..0aa281ae --- /dev/null +++ b/decoder/minimal_decoder.cc @@ -0,0 +1,45 @@ +#include <fstream> +#include <iostream> +#include <sstream> + +#include "fdict.h" +#include "filelib.h" +#include "hg.h" +#include "hg_io.h" +#include "sparse_vector.h" +#include "viterbi.h" + + +using namespace std; + +/* + * Reads hypergraph from JSON file argv[1], + * reweights it using weights from argv[2], + * and outputs viterbi translation. + * + */ +int main(int argc, char** argv) +{ +  ReadFile rf(argv[1]); +  Hypergraph hg; +  HypergraphIO::ReadFromJSON(rf.stream(), &hg); +  SparseVector<double> v; +  ifstream f(argv[2]); +  string line; +  while (getline(f, line)) { +    istringstream ss(line); +    string k; weight_t w; +    ss >> k >> w; +    v.add_value(FD::Convert(k), w); +  } +  hg.Reweight(v); +  clock_t begin = clock(); +  hg.TopologicallySortNodesAndEdges(hg.NumberOfNodes()-1); +  vector<WordID> trans; +  ViterbiESentence(hg, &trans); +  cout << TD::GetString(trans) << endl << flush; +  clock_t end = clock(); +  double elapsed_secs = double(end - begin) / CLOCKS_PER_SEC; +  cout << elapsed_secs << " s" << endl; +} + diff --git a/extractor/Makefile.am b/extractor/Makefile.am index a406d9dc..cdfbb307 100644 --- a/extractor/Makefile.am +++ b/extractor/Makefile.am @@ -115,7 +115,7 @@ noinst_LIBRARIES = libextractor.a  sacompile_SOURCES = sacompile.cc  sacompile_LDADD = libextractor.a  run_extractor_SOURCES = run_extractor.cc -run_extractor_LDADD = libextractor.a +run_extractor_LDADD = libextractor.a ../utils/libutils.a  extract_SOURCES = extract.cc  extract_LDADD = libextractor.a diff --git a/extractor/run_extractor.cc b/extractor/run_extractor.cc index 00564a36..75fae627 100644 --- a/extractor/run_extractor.cc +++ b/extractor/run_extractor.cc @@ -33,6 +33,7 @@  #include "time_util.h"  #include "translation_table.h"  #include "vocabulary.h" +#include "../utils/filelib.h"  namespace fs = boost::filesystem;  namespace po = boost::program_options; @@ -42,7 +43,7 @@ using namespace features;  // Returns the file path in which a given grammar should be written.  fs::path GetGrammarFilePath(const fs::path& grammar_path, int file_number) { -  string file_name = "grammar." + to_string(file_number); +  string file_name = "grammar." + to_string(file_number) + ".gz";    return grammar_path / file_name;  } @@ -239,8 +240,8 @@ int main(int argc, char** argv) {      }      Grammar grammar = extractor.GetGrammar(          sentences[i], blacklisted_sentence_ids); -    ofstream output(GetGrammarFilePath(grammar_path, i).c_str()); -    output << grammar; +    WriteFile output(GetGrammarFilePath(grammar_path, i).c_str()); +    *output << grammar;    }    for (size_t i = 0; i < sentences.size(); ++i) { diff --git a/training/dtrain/parallelize.rb b/training/dtrain/parallelize.rb index 82600009..5fc8b04e 100755 --- a/training/dtrain/parallelize.rb +++ b/training/dtrain/parallelize.rb @@ -4,7 +4,7 @@ require 'trollop'  def usage    STDERR.write "Usage: " -  STDERR.write "ruby parallelize.rb -c <dtrain.ini> [-e <epochs=10>] [--randomize/-z] [--reshard/-y] -s <#shards|0> [-p <at once=9999>] -i <input> -r <refs> [--qsub/-q] [--dtrain_binary <path to dtrain binary>] [-l \"l2 select_k 100000\"] [--extra_qsub \"-l virtual_free=24G\"]\n" +  STDERR.write "ruby parallelize.rb -c <dtrain.ini> [-e <epochs=10>] [--randomize/-z] [--reshard/-y] -s <#shards|0> [-p <at once=9999>] -i <input> -r <refs> [--qsub/-q] [--dtrain_binary <path to dtrain binary>] [-l \"l2 select_k 100000\"] [--extra_qsub \"-l mem_free=24G\"]\n"    exit 1  end @@ -87,6 +87,7 @@ def make_shards(input, refs, num_shards, epoch, rand)      refs_fns << refs_fn      0.upto(shard_sz-1) { |i|        j = index.pop +      break if !j        shard_in.write in_lines[j]        shard_refs.write refs_lines[j]      } diff --git a/training/pro/pro.pl b/training/pro/pro.pl index a059477d..8ebb5864 100755 --- a/training/pro/pro.pl +++ b/training/pro/pro.pl @@ -69,18 +69,19 @@ my $reg_previous = 5000;  # Process command-line options  if (GetOptions( -	"config=s" => \$iniFile, -	"weights=s" => \$initial_weights, -        "devset=s" => \$devset, -	"jobs=i" => \$jobs, -	"metric=s" => \$metric, -	"pass-suffix=s" => \$pass_suffix, -        "qsub" => \$useqsub, -	"help" => \$help, -	"reg=f" => \$reg, -	"reg-previous=f" => \$reg_previous, +  "config=s" => \$iniFile, +  "weights=s" => \$initial_weights, +  "devset=s" => \$devset, +  "jobs=i" => \$jobs, +  "max-iterations=i" => \$max_iterations, +  "metric=s" => \$metric, +  "pass-suffix=s" => \$pass_suffix, +  "qsub" => \$useqsub, +  "help" => \$help, +  "reg=f" => \$reg, +  "reg-previous=f" => \$reg_previous,    "pmem=s" => \$pmem, -	"output-dir=s" => \$dir, +  "output-dir=s" => \$dir,  ) == 0 || @ARGV!=0 || $help) {  	print_help();  	exit;  | 
