diff options
Diffstat (limited to 'decoder')
| -rw-r--r-- | decoder/Jamfile | 72 | ||||
| -rw-r--r-- | decoder/Makefile.am | 2 | ||||
| -rw-r--r-- | decoder/cfg_test.cc | 3 | ||||
| -rw-r--r-- | decoder/decoder.cc | 4 | ||||
| -rw-r--r-- | decoder/grammar_test.cc | 6 | ||||
| -rw-r--r-- | decoder/hg_io.cc | 17 | ||||
| -rw-r--r-- | decoder/hg_io.h | 3 | ||||
| -rw-r--r-- | decoder/hg_test.cc | 3 | ||||
| -rw-r--r-- | decoder/hg_test.h | 6 | ||||
| -rw-r--r-- | decoder/rule_lexer.ll (renamed from decoder/rule_lexer.l) | 0 | 
10 files changed, 99 insertions, 17 deletions
| diff --git a/decoder/Jamfile b/decoder/Jamfile new file mode 100644 index 00000000..f8112cae --- /dev/null +++ b/decoder/Jamfile @@ -0,0 +1,72 @@ +import testing ; +import lex ; +import option ; + +if [ option.get "with-glc" ] { +  glc = ff_glc.cc string_util.cc feature-factory.cc ; +} + +lib cdec_lib :  +  forest_writer.cc +  maxtrans_blunsom.cc +  cdec_ff.cc +  cfg.cc +  dwarf.cc +  ff_dwarf.cc +  rule_lexer.ll +  fst_translator.cc +  csplit.cc +  translator.cc +  scfg_translator.cc +  hg.cc +  hg_io.cc +  decoder.cc +  hg_intersect.cc +  hg_sampler.cc +  factored_lexicon_helper.cc +  viterbi.cc +  lattice.cc +  aligner.cc +  apply_models.cc +  earley_composer.cc +  phrasetable_fst.cc +  trule.cc +  ff.cc +  ff_rules.cc +  ff_wordset.cc +  ff_context.cc +  ff_charset.cc +  ff_lm.cc +  ff_klm.cc +  ff_ngrams.cc +  ff_spans.cc +  ff_ruleshape.cc +  ff_wordalign.cc +  ff_csplit.cc +  ff_tagger.cc +  ff_source_syntax.cc +  ff_bleu.cc +  ff_factory.cc +  lexalign.cc +  lextrans.cc +  tagger.cc +  bottom_up_parser.cc +  phrasebased_translator.cc +  JSON_parser.c +  json_parse.cc +  grammar.cc +  $(glc) +  ..//utils +  ..//mteval +  ../klm/lm//kenlm +  ..//boost_program_options +  : <include>. +  ; + +exe cdec : cdec.cc cdec_lib ; + +all_tests [ glob *_test.cc : cfg_test.cc ] : cdec_lib : <testing.arg>$(TOP)/decoder/test_data ; + +install legacy : cdec +  : <location>$(TOP)/cdec <install-type>EXE <install-dependencies>on <link>shared:<dll-path>$(TOP)/cdec <link>shared:<install-type>LIB ; + diff --git a/decoder/Makefile.am b/decoder/Makefile.am index d16a9147..00d01e53 100644 --- a/decoder/Makefile.am +++ b/decoder/Makefile.am @@ -21,7 +21,7 @@ cdec_LDADD = libcdec.a ../mteval/libmteval.a ../utils/libutils.a ../klm/lm/libkl  AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wno-sign-compare $(GTEST_CPPFLAGS) -I.. -I../mteval -I../utils -I../klm -rule_lexer.cc: rule_lexer.l +rule_lexer.cc: rule_lexer.ll  	$(LEX) -s -CF -8 -o$@ $<  noinst_LIBRARIES = libcdec.a diff --git a/decoder/cfg_test.cc b/decoder/cfg_test.cc index c61f9f2c..b8f4cf11 100644 --- a/decoder/cfg_test.cc +++ b/decoder/cfg_test.cc @@ -33,7 +33,8 @@ struct CFGTest : public TestWithParam<HgW> {      istringstream ws(wts);      EXPECT_TRUE(ws>>featw);      CSHOW(featw) -    HGSetup::JsonTestFile(&hg,file); +    std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : "test_data"); +    HGSetup::JsonTestFile(&hg,path,file);      hg.Reweight(featw);      cfg.Init(hg,true,true,false);    } diff --git a/decoder/decoder.cc b/decoder/decoder.cc index ec6f75f7..4ce2ba86 100644 --- a/decoder/decoder.cc +++ b/decoder/decoder.cc @@ -407,7 +407,7 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream          ("show_partition,z", "Compute and show the partition (inside score)")          ("show_conditional_prob", "Output the conditional log prob to STDOUT instead of a translation")          ("show_cfg_search_space", "Show the search space as a CFG") -        ("show_target_graph", "Output the target hypergraph") +        ("show_target_graph", po::value<string>(), "Directory to write the target hypergraphs to")          ("coarse_to_fine_beam_prune", po::value<double>(), "Prune paths from coarse parse forest before fine parse, keeping paths within exp(alpha>=0)")          ("ctf_beam_widen", po::value<double>()->default_value(2.0), "Expand coarse pass beam by this factor if no fine parse is found")          ("ctf_num_widenings", po::value<int>()->default_value(2), "Widen coarse beam this many times before backing off to full parse") @@ -816,7 +816,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {    }    if (conf.count("show_target_graph")) -    HypergraphIO::WriteTarget(forest); +    HypergraphIO::WriteTarget(conf["show_target_graph"].as<string>(), sent_id, forest);    for (int pass = 0; pass < rescoring_passes.size(); ++pass) {      const RescoringPass& rp = rescoring_passes[pass]; diff --git a/decoder/grammar_test.cc b/decoder/grammar_test.cc index e1a94709..4500490a 100644 --- a/decoder/grammar_test.cc +++ b/decoder/grammar_test.cc @@ -17,7 +17,8 @@ using namespace std;  struct GrammarTest {    GrammarTest() { -    Weights::InitFromFile("test_data/weights.gt", &wts); +    std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : "test_data"); +    Weights::InitFromFile(path + "/weights.gt", &wts);    }    vector<weight_t> wts;  }; @@ -40,7 +41,8 @@ BOOST_AUTO_TEST_CASE(TestTextGrammar) {  }  BOOST_AUTO_TEST_CASE(TestTextGrammarFile) { -  GrammarPtr g(new TextGrammar("./test_data/grammar.prune")); +  std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : "test_data"); +  GrammarPtr g(new TextGrammar(path + "/grammar.prune"));    vector<GrammarPtr> grammars(1, g);    LatticeArc a(TD::Convert("ein"), 0.0, 1); diff --git a/decoder/hg_io.cc b/decoder/hg_io.cc index d416dbf6..734c2ce8 100644 --- a/decoder/hg_io.cc +++ b/decoder/hg_io.cc @@ -1,5 +1,6 @@  #include "hg_io.h" +#include <fstream>  #include <sstream>  #include <iostream> @@ -651,22 +652,26 @@ void HypergraphIO::WriteAsCFG(const Hypergraph& hg) {   *   for each downward edge:   *     RHS with [vertex_index] for NTs ||| scores   */ -void HypergraphIO::WriteTarget(const Hypergraph& hg) { -  cout << hg.nodes_.size() << ' ' << hg.edges_.size() << '\n'; +void HypergraphIO::WriteTarget(const std::string &base, unsigned int id, const Hypergraph& hg) { +  std::string name(base); +  name += '/'; +  name += boost::lexical_cast<std::string>(id); +  std::fstream out(name.c_str(), std::fstream::out); +  out << hg.nodes_.size() << ' ' << hg.edges_.size() << '\n';    for (unsigned int i = 0; i < hg.nodes_.size(); ++i) {      const Hypergraph::EdgesVector &edges = hg.nodes_[i].in_edges_; -    cout << edges.size() << '\n'; +    out << edges.size() << '\n';      for (unsigned int j = 0; j < edges.size(); ++j) {        const Hypergraph::Edge &edge = hg.edges_[edges[j]];        const std::vector<WordID> &e = edge.rule_->e();        for (std::vector<WordID>::const_iterator word = e.begin(); word != e.end(); ++word) {          if (*word <= 0) { -          cout << '[' << edge.tail_nodes_[-*word] << "] "; +          out << '[' << edge.tail_nodes_[-*word] << "] ";          } else { -          cout << TD::Convert(*word) << ' '; +          out << TD::Convert(*word) << ' ';          }        } -      cout << "||| " << edge.rule_->scores_ << '\n'; +      out << "||| " << edge.rule_->scores_ << '\n';      }    }  } diff --git a/decoder/hg_io.h b/decoder/hg_io.h index 4e502a0c..58af8132 100644 --- a/decoder/hg_io.h +++ b/decoder/hg_io.h @@ -2,6 +2,7 @@  #define _HG_IO_H_  #include <iostream> +#include <string>  #include "lattice.h"  class Hypergraph; @@ -24,7 +25,7 @@ struct HypergraphIO {    static void WriteAsCFG(const Hypergraph& hg);    // Write only the target size information in bottom-up order.   -  static void WriteTarget(const Hypergraph& hg); +  static void WriteTarget(const std::string &base, unsigned int sent_id, const Hypergraph& hg);    // serialization utils    static void ReadFromPLF(const std::string& in, Hypergraph* out, int line = 0); diff --git a/decoder/hg_test.cc b/decoder/hg_test.cc index 8455a865..92ed98b2 100644 --- a/decoder/hg_test.cc +++ b/decoder/hg_test.cc @@ -335,7 +335,8 @@ BOOST_AUTO_TEST_CASE(TestAddExpectations) {  BOOST_AUTO_TEST_CASE(Small) {    Hypergraph hg; -  CreateSmallHG(&hg); +  std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : "test_data"); +  CreateSmallHG(&hg, path);    SparseVector<double> wts;    wts.set_value(FD::Convert("Model_0"), -2.0);    wts.set_value(FD::Convert("Model_1"), -0.5); diff --git a/decoder/hg_test.h b/decoder/hg_test.h index 043f970a..2e308c37 100644 --- a/decoder/hg_test.h +++ b/decoder/hg_test.h @@ -46,10 +46,10 @@ struct HGSetup {      ReadFile rf(f);      HypergraphIO::ReadFromJSON(rf.stream(), hg);    } -  static void JsonTestFile(Hypergraph *hg,std::string n) { -    JsonFile(hg,"test_data/"+n); +  static void JsonTestFile(Hypergraph *hg,std::string path,std::string n) { +    JsonFile(hg,path + "/"+n);    } -  static void CreateSmallHG(Hypergraph *hg) { JsonTestFile(hg,small_json); } +  static void CreateSmallHG(Hypergraph *hg, std::string path) { JsonTestFile(hg,path,small_json); }  };  namespace { diff --git a/decoder/rule_lexer.l b/decoder/rule_lexer.ll index 083a5bb1..083a5bb1 100644 --- a/decoder/rule_lexer.l +++ b/decoder/rule_lexer.ll | 
