diff options
author | Patrick Simianer <simianer@cl.uni-heidelberg.de> | 2012-05-13 03:35:30 +0200 |
---|---|---|
committer | Patrick Simianer <simianer@cl.uni-heidelberg.de> | 2012-05-13 03:35:30 +0200 |
commit | 670a8f984fc6d8342180c59ae9e96b0b76f34d3d (patch) | |
tree | 9f2ce7eec1a77e56b3bb1ad0ad40f212d7a996b0 /decoder | |
parent | eb3ee28dc0eb1d3e5ed01ba0df843be329ae450d (diff) | |
parent | 2f64af3e06a518b93f7ca2c30a9d0aeb2c947031 (diff) |
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'decoder')
-rw-r--r-- | decoder/Jamfile | 72 | ||||
-rw-r--r-- | decoder/Makefile.am | 2 | ||||
-rw-r--r-- | decoder/cfg_test.cc | 3 | ||||
-rw-r--r-- | decoder/decoder.cc | 4 | ||||
-rw-r--r-- | decoder/grammar_test.cc | 6 | ||||
-rw-r--r-- | decoder/hg_io.cc | 17 | ||||
-rw-r--r-- | decoder/hg_io.h | 3 | ||||
-rw-r--r-- | decoder/hg_test.cc | 3 | ||||
-rw-r--r-- | decoder/hg_test.h | 6 | ||||
-rw-r--r-- | decoder/rule_lexer.ll (renamed from decoder/rule_lexer.l) | 0 |
10 files changed, 99 insertions, 17 deletions
diff --git a/decoder/Jamfile b/decoder/Jamfile new file mode 100644 index 00000000..f8112cae --- /dev/null +++ b/decoder/Jamfile @@ -0,0 +1,72 @@ +import testing ; +import lex ; +import option ; + +if [ option.get "with-glc" ] { + glc = ff_glc.cc string_util.cc feature-factory.cc ; +} + +lib cdec_lib : + forest_writer.cc + maxtrans_blunsom.cc + cdec_ff.cc + cfg.cc + dwarf.cc + ff_dwarf.cc + rule_lexer.ll + fst_translator.cc + csplit.cc + translator.cc + scfg_translator.cc + hg.cc + hg_io.cc + decoder.cc + hg_intersect.cc + hg_sampler.cc + factored_lexicon_helper.cc + viterbi.cc + lattice.cc + aligner.cc + apply_models.cc + earley_composer.cc + phrasetable_fst.cc + trule.cc + ff.cc + ff_rules.cc + ff_wordset.cc + ff_context.cc + ff_charset.cc + ff_lm.cc + ff_klm.cc + ff_ngrams.cc + ff_spans.cc + ff_ruleshape.cc + ff_wordalign.cc + ff_csplit.cc + ff_tagger.cc + ff_source_syntax.cc + ff_bleu.cc + ff_factory.cc + lexalign.cc + lextrans.cc + tagger.cc + bottom_up_parser.cc + phrasebased_translator.cc + JSON_parser.c + json_parse.cc + grammar.cc + $(glc) + ..//utils + ..//mteval + ../klm/lm//kenlm + ..//boost_program_options + : <include>. + ; + +exe cdec : cdec.cc cdec_lib ; + +all_tests [ glob *_test.cc : cfg_test.cc ] : cdec_lib : <testing.arg>$(TOP)/decoder/test_data ; + +install legacy : cdec + : <location>$(TOP)/cdec <install-type>EXE <install-dependencies>on <link>shared:<dll-path>$(TOP)/cdec <link>shared:<install-type>LIB ; + diff --git a/decoder/Makefile.am b/decoder/Makefile.am index d16a9147..00d01e53 100644 --- a/decoder/Makefile.am +++ b/decoder/Makefile.am @@ -21,7 +21,7 @@ cdec_LDADD = libcdec.a ../mteval/libmteval.a ../utils/libutils.a ../klm/lm/libkl AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wno-sign-compare $(GTEST_CPPFLAGS) -I.. -I../mteval -I../utils -I../klm -rule_lexer.cc: rule_lexer.l +rule_lexer.cc: rule_lexer.ll $(LEX) -s -CF -8 -o$@ $< noinst_LIBRARIES = libcdec.a diff --git a/decoder/cfg_test.cc b/decoder/cfg_test.cc index c61f9f2c..b8f4cf11 100644 --- a/decoder/cfg_test.cc +++ b/decoder/cfg_test.cc @@ -33,7 +33,8 @@ struct CFGTest : public TestWithParam<HgW> { istringstream ws(wts); EXPECT_TRUE(ws>>featw); CSHOW(featw) - HGSetup::JsonTestFile(&hg,file); + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : "test_data"); + HGSetup::JsonTestFile(&hg,path,file); hg.Reweight(featw); cfg.Init(hg,true,true,false); } diff --git a/decoder/decoder.cc b/decoder/decoder.cc index ec6f75f7..4ce2ba86 100644 --- a/decoder/decoder.cc +++ b/decoder/decoder.cc @@ -407,7 +407,7 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream ("show_partition,z", "Compute and show the partition (inside score)") ("show_conditional_prob", "Output the conditional log prob to STDOUT instead of a translation") ("show_cfg_search_space", "Show the search space as a CFG") - ("show_target_graph", "Output the target hypergraph") + ("show_target_graph", po::value<string>(), "Directory to write the target hypergraphs to") ("coarse_to_fine_beam_prune", po::value<double>(), "Prune paths from coarse parse forest before fine parse, keeping paths within exp(alpha>=0)") ("ctf_beam_widen", po::value<double>()->default_value(2.0), "Expand coarse pass beam by this factor if no fine parse is found") ("ctf_num_widenings", po::value<int>()->default_value(2), "Widen coarse beam this many times before backing off to full parse") @@ -816,7 +816,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) { } if (conf.count("show_target_graph")) - HypergraphIO::WriteTarget(forest); + HypergraphIO::WriteTarget(conf["show_target_graph"].as<string>(), sent_id, forest); for (int pass = 0; pass < rescoring_passes.size(); ++pass) { const RescoringPass& rp = rescoring_passes[pass]; diff --git a/decoder/grammar_test.cc b/decoder/grammar_test.cc index e1a94709..4500490a 100644 --- a/decoder/grammar_test.cc +++ b/decoder/grammar_test.cc @@ -17,7 +17,8 @@ using namespace std; struct GrammarTest { GrammarTest() { - Weights::InitFromFile("test_data/weights.gt", &wts); + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : "test_data"); + Weights::InitFromFile(path + "/weights.gt", &wts); } vector<weight_t> wts; }; @@ -40,7 +41,8 @@ BOOST_AUTO_TEST_CASE(TestTextGrammar) { } BOOST_AUTO_TEST_CASE(TestTextGrammarFile) { - GrammarPtr g(new TextGrammar("./test_data/grammar.prune")); + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : "test_data"); + GrammarPtr g(new TextGrammar(path + "/grammar.prune")); vector<GrammarPtr> grammars(1, g); LatticeArc a(TD::Convert("ein"), 0.0, 1); diff --git a/decoder/hg_io.cc b/decoder/hg_io.cc index d416dbf6..734c2ce8 100644 --- a/decoder/hg_io.cc +++ b/decoder/hg_io.cc @@ -1,5 +1,6 @@ #include "hg_io.h" +#include <fstream> #include <sstream> #include <iostream> @@ -651,22 +652,26 @@ void HypergraphIO::WriteAsCFG(const Hypergraph& hg) { * for each downward edge: * RHS with [vertex_index] for NTs ||| scores */ -void HypergraphIO::WriteTarget(const Hypergraph& hg) { - cout << hg.nodes_.size() << ' ' << hg.edges_.size() << '\n'; +void HypergraphIO::WriteTarget(const std::string &base, unsigned int id, const Hypergraph& hg) { + std::string name(base); + name += '/'; + name += boost::lexical_cast<std::string>(id); + std::fstream out(name.c_str(), std::fstream::out); + out << hg.nodes_.size() << ' ' << hg.edges_.size() << '\n'; for (unsigned int i = 0; i < hg.nodes_.size(); ++i) { const Hypergraph::EdgesVector &edges = hg.nodes_[i].in_edges_; - cout << edges.size() << '\n'; + out << edges.size() << '\n'; for (unsigned int j = 0; j < edges.size(); ++j) { const Hypergraph::Edge &edge = hg.edges_[edges[j]]; const std::vector<WordID> &e = edge.rule_->e(); for (std::vector<WordID>::const_iterator word = e.begin(); word != e.end(); ++word) { if (*word <= 0) { - cout << '[' << edge.tail_nodes_[-*word] << "] "; + out << '[' << edge.tail_nodes_[-*word] << "] "; } else { - cout << TD::Convert(*word) << ' '; + out << TD::Convert(*word) << ' '; } } - cout << "||| " << edge.rule_->scores_ << '\n'; + out << "||| " << edge.rule_->scores_ << '\n'; } } } diff --git a/decoder/hg_io.h b/decoder/hg_io.h index 4e502a0c..58af8132 100644 --- a/decoder/hg_io.h +++ b/decoder/hg_io.h @@ -2,6 +2,7 @@ #define _HG_IO_H_ #include <iostream> +#include <string> #include "lattice.h" class Hypergraph; @@ -24,7 +25,7 @@ struct HypergraphIO { static void WriteAsCFG(const Hypergraph& hg); // Write only the target size information in bottom-up order. - static void WriteTarget(const Hypergraph& hg); + static void WriteTarget(const std::string &base, unsigned int sent_id, const Hypergraph& hg); // serialization utils static void ReadFromPLF(const std::string& in, Hypergraph* out, int line = 0); diff --git a/decoder/hg_test.cc b/decoder/hg_test.cc index 8455a865..92ed98b2 100644 --- a/decoder/hg_test.cc +++ b/decoder/hg_test.cc @@ -335,7 +335,8 @@ BOOST_AUTO_TEST_CASE(TestAddExpectations) { BOOST_AUTO_TEST_CASE(Small) { Hypergraph hg; - CreateSmallHG(&hg); + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : "test_data"); + CreateSmallHG(&hg, path); SparseVector<double> wts; wts.set_value(FD::Convert("Model_0"), -2.0); wts.set_value(FD::Convert("Model_1"), -0.5); diff --git a/decoder/hg_test.h b/decoder/hg_test.h index 043f970a..2e308c37 100644 --- a/decoder/hg_test.h +++ b/decoder/hg_test.h @@ -46,10 +46,10 @@ struct HGSetup { ReadFile rf(f); HypergraphIO::ReadFromJSON(rf.stream(), hg); } - static void JsonTestFile(Hypergraph *hg,std::string n) { - JsonFile(hg,"test_data/"+n); + static void JsonTestFile(Hypergraph *hg,std::string path,std::string n) { + JsonFile(hg,path + "/"+n); } - static void CreateSmallHG(Hypergraph *hg) { JsonTestFile(hg,small_json); } + static void CreateSmallHG(Hypergraph *hg, std::string path) { JsonTestFile(hg,path,small_json); } }; namespace { diff --git a/decoder/rule_lexer.l b/decoder/rule_lexer.ll index 083a5bb1..083a5bb1 100644 --- a/decoder/rule_lexer.l +++ b/decoder/rule_lexer.ll |