summaryrefslogtreecommitdiff
path: root/decoder
diff options
context:
space:
mode:
authorPatrick Simianer <simianer@cl.uni-heidelberg.de>2012-05-13 03:35:30 +0200
committerPatrick Simianer <simianer@cl.uni-heidelberg.de>2012-05-13 03:35:30 +0200
commitd94373453c69c6cfec952a0f7b427cacc78654d8 (patch)
tree43febdf719c103d19bd5d22d0be734e1574bc1e9 /decoder
parentcc9650b8b664d1f6836a0fa86a012401b51aafa0 (diff)
parenta65a80c5d5b6fc4cbd32280f07cae9be71551b70 (diff)
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'decoder')
-rw-r--r--decoder/Jamfile72
-rw-r--r--decoder/Makefile.am2
-rw-r--r--decoder/cfg_test.cc3
-rw-r--r--decoder/decoder.cc4
-rw-r--r--decoder/grammar_test.cc6
-rw-r--r--decoder/hg_io.cc17
-rw-r--r--decoder/hg_io.h3
-rw-r--r--decoder/hg_test.cc3
-rw-r--r--decoder/hg_test.h6
-rw-r--r--decoder/rule_lexer.ll (renamed from decoder/rule_lexer.l)0
10 files changed, 99 insertions, 17 deletions
diff --git a/decoder/Jamfile b/decoder/Jamfile
new file mode 100644
index 00000000..f8112cae
--- /dev/null
+++ b/decoder/Jamfile
@@ -0,0 +1,72 @@
+import testing ;
+import lex ;
+import option ;
+
+if [ option.get "with-glc" ] {
+ glc = ff_glc.cc string_util.cc feature-factory.cc ;
+}
+
+lib cdec_lib :
+ forest_writer.cc
+ maxtrans_blunsom.cc
+ cdec_ff.cc
+ cfg.cc
+ dwarf.cc
+ ff_dwarf.cc
+ rule_lexer.ll
+ fst_translator.cc
+ csplit.cc
+ translator.cc
+ scfg_translator.cc
+ hg.cc
+ hg_io.cc
+ decoder.cc
+ hg_intersect.cc
+ hg_sampler.cc
+ factored_lexicon_helper.cc
+ viterbi.cc
+ lattice.cc
+ aligner.cc
+ apply_models.cc
+ earley_composer.cc
+ phrasetable_fst.cc
+ trule.cc
+ ff.cc
+ ff_rules.cc
+ ff_wordset.cc
+ ff_context.cc
+ ff_charset.cc
+ ff_lm.cc
+ ff_klm.cc
+ ff_ngrams.cc
+ ff_spans.cc
+ ff_ruleshape.cc
+ ff_wordalign.cc
+ ff_csplit.cc
+ ff_tagger.cc
+ ff_source_syntax.cc
+ ff_bleu.cc
+ ff_factory.cc
+ lexalign.cc
+ lextrans.cc
+ tagger.cc
+ bottom_up_parser.cc
+ phrasebased_translator.cc
+ JSON_parser.c
+ json_parse.cc
+ grammar.cc
+ $(glc)
+ ..//utils
+ ..//mteval
+ ../klm/lm//kenlm
+ ..//boost_program_options
+ : <include>.
+ ;
+
+exe cdec : cdec.cc cdec_lib ;
+
+all_tests [ glob *_test.cc : cfg_test.cc ] : cdec_lib : <testing.arg>$(TOP)/decoder/test_data ;
+
+install legacy : cdec
+ : <location>$(TOP)/cdec <install-type>EXE <install-dependencies>on <link>shared:<dll-path>$(TOP)/cdec <link>shared:<install-type>LIB ;
+
diff --git a/decoder/Makefile.am b/decoder/Makefile.am
index d16a9147..00d01e53 100644
--- a/decoder/Makefile.am
+++ b/decoder/Makefile.am
@@ -21,7 +21,7 @@ cdec_LDADD = libcdec.a ../mteval/libmteval.a ../utils/libutils.a ../klm/lm/libkl
AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wno-sign-compare $(GTEST_CPPFLAGS) -I.. -I../mteval -I../utils -I../klm
-rule_lexer.cc: rule_lexer.l
+rule_lexer.cc: rule_lexer.ll
$(LEX) -s -CF -8 -o$@ $<
noinst_LIBRARIES = libcdec.a
diff --git a/decoder/cfg_test.cc b/decoder/cfg_test.cc
index c61f9f2c..b8f4cf11 100644
--- a/decoder/cfg_test.cc
+++ b/decoder/cfg_test.cc
@@ -33,7 +33,8 @@ struct CFGTest : public TestWithParam<HgW> {
istringstream ws(wts);
EXPECT_TRUE(ws>>featw);
CSHOW(featw)
- HGSetup::JsonTestFile(&hg,file);
+ std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : "test_data");
+ HGSetup::JsonTestFile(&hg,path,file);
hg.Reweight(featw);
cfg.Init(hg,true,true,false);
}
diff --git a/decoder/decoder.cc b/decoder/decoder.cc
index ec6f75f7..4ce2ba86 100644
--- a/decoder/decoder.cc
+++ b/decoder/decoder.cc
@@ -407,7 +407,7 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream
("show_partition,z", "Compute and show the partition (inside score)")
("show_conditional_prob", "Output the conditional log prob to STDOUT instead of a translation")
("show_cfg_search_space", "Show the search space as a CFG")
- ("show_target_graph", "Output the target hypergraph")
+ ("show_target_graph", po::value<string>(), "Directory to write the target hypergraphs to")
("coarse_to_fine_beam_prune", po::value<double>(), "Prune paths from coarse parse forest before fine parse, keeping paths within exp(alpha>=0)")
("ctf_beam_widen", po::value<double>()->default_value(2.0), "Expand coarse pass beam by this factor if no fine parse is found")
("ctf_num_widenings", po::value<int>()->default_value(2), "Widen coarse beam this many times before backing off to full parse")
@@ -816,7 +816,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
}
if (conf.count("show_target_graph"))
- HypergraphIO::WriteTarget(forest);
+ HypergraphIO::WriteTarget(conf["show_target_graph"].as<string>(), sent_id, forest);
for (int pass = 0; pass < rescoring_passes.size(); ++pass) {
const RescoringPass& rp = rescoring_passes[pass];
diff --git a/decoder/grammar_test.cc b/decoder/grammar_test.cc
index e1a94709..4500490a 100644
--- a/decoder/grammar_test.cc
+++ b/decoder/grammar_test.cc
@@ -17,7 +17,8 @@ using namespace std;
struct GrammarTest {
GrammarTest() {
- Weights::InitFromFile("test_data/weights.gt", &wts);
+ std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : "test_data");
+ Weights::InitFromFile(path + "/weights.gt", &wts);
}
vector<weight_t> wts;
};
@@ -40,7 +41,8 @@ BOOST_AUTO_TEST_CASE(TestTextGrammar) {
}
BOOST_AUTO_TEST_CASE(TestTextGrammarFile) {
- GrammarPtr g(new TextGrammar("./test_data/grammar.prune"));
+ std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : "test_data");
+ GrammarPtr g(new TextGrammar(path + "/grammar.prune"));
vector<GrammarPtr> grammars(1, g);
LatticeArc a(TD::Convert("ein"), 0.0, 1);
diff --git a/decoder/hg_io.cc b/decoder/hg_io.cc
index d416dbf6..734c2ce8 100644
--- a/decoder/hg_io.cc
+++ b/decoder/hg_io.cc
@@ -1,5 +1,6 @@
#include "hg_io.h"
+#include <fstream>
#include <sstream>
#include <iostream>
@@ -651,22 +652,26 @@ void HypergraphIO::WriteAsCFG(const Hypergraph& hg) {
* for each downward edge:
* RHS with [vertex_index] for NTs ||| scores
*/
-void HypergraphIO::WriteTarget(const Hypergraph& hg) {
- cout << hg.nodes_.size() << ' ' << hg.edges_.size() << '\n';
+void HypergraphIO::WriteTarget(const std::string &base, unsigned int id, const Hypergraph& hg) {
+ std::string name(base);
+ name += '/';
+ name += boost::lexical_cast<std::string>(id);
+ std::fstream out(name.c_str(), std::fstream::out);
+ out << hg.nodes_.size() << ' ' << hg.edges_.size() << '\n';
for (unsigned int i = 0; i < hg.nodes_.size(); ++i) {
const Hypergraph::EdgesVector &edges = hg.nodes_[i].in_edges_;
- cout << edges.size() << '\n';
+ out << edges.size() << '\n';
for (unsigned int j = 0; j < edges.size(); ++j) {
const Hypergraph::Edge &edge = hg.edges_[edges[j]];
const std::vector<WordID> &e = edge.rule_->e();
for (std::vector<WordID>::const_iterator word = e.begin(); word != e.end(); ++word) {
if (*word <= 0) {
- cout << '[' << edge.tail_nodes_[-*word] << "] ";
+ out << '[' << edge.tail_nodes_[-*word] << "] ";
} else {
- cout << TD::Convert(*word) << ' ';
+ out << TD::Convert(*word) << ' ';
}
}
- cout << "||| " << edge.rule_->scores_ << '\n';
+ out << "||| " << edge.rule_->scores_ << '\n';
}
}
}
diff --git a/decoder/hg_io.h b/decoder/hg_io.h
index 4e502a0c..58af8132 100644
--- a/decoder/hg_io.h
+++ b/decoder/hg_io.h
@@ -2,6 +2,7 @@
#define _HG_IO_H_
#include <iostream>
+#include <string>
#include "lattice.h"
class Hypergraph;
@@ -24,7 +25,7 @@ struct HypergraphIO {
static void WriteAsCFG(const Hypergraph& hg);
// Write only the target size information in bottom-up order.
- static void WriteTarget(const Hypergraph& hg);
+ static void WriteTarget(const std::string &base, unsigned int sent_id, const Hypergraph& hg);
// serialization utils
static void ReadFromPLF(const std::string& in, Hypergraph* out, int line = 0);
diff --git a/decoder/hg_test.cc b/decoder/hg_test.cc
index 8455a865..92ed98b2 100644
--- a/decoder/hg_test.cc
+++ b/decoder/hg_test.cc
@@ -335,7 +335,8 @@ BOOST_AUTO_TEST_CASE(TestAddExpectations) {
BOOST_AUTO_TEST_CASE(Small) {
Hypergraph hg;
- CreateSmallHG(&hg);
+ std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : "test_data");
+ CreateSmallHG(&hg, path);
SparseVector<double> wts;
wts.set_value(FD::Convert("Model_0"), -2.0);
wts.set_value(FD::Convert("Model_1"), -0.5);
diff --git a/decoder/hg_test.h b/decoder/hg_test.h
index 043f970a..2e308c37 100644
--- a/decoder/hg_test.h
+++ b/decoder/hg_test.h
@@ -46,10 +46,10 @@ struct HGSetup {
ReadFile rf(f);
HypergraphIO::ReadFromJSON(rf.stream(), hg);
}
- static void JsonTestFile(Hypergraph *hg,std::string n) {
- JsonFile(hg,"test_data/"+n);
+ static void JsonTestFile(Hypergraph *hg,std::string path,std::string n) {
+ JsonFile(hg,path + "/"+n);
}
- static void CreateSmallHG(Hypergraph *hg) { JsonTestFile(hg,small_json); }
+ static void CreateSmallHG(Hypergraph *hg, std::string path) { JsonTestFile(hg,path,small_json); }
};
namespace {
diff --git a/decoder/rule_lexer.l b/decoder/rule_lexer.ll
index 083a5bb1..083a5bb1 100644
--- a/decoder/rule_lexer.l
+++ b/decoder/rule_lexer.ll