summaryrefslogtreecommitdiff
path: root/decoder/decoder.cc
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cab.ark.cs.cmu.edu>2012-06-19 00:05:18 -0400
committerChris Dyer <cdyer@cab.ark.cs.cmu.edu>2012-06-19 00:05:18 -0400
commit5975dcaa50adb5ce7a05b83583b8f9ddc45f3f0a (patch)
tree2bc2eb4e17576e0726d7a2fa7f20eac9061c311d /decoder/decoder.cc
parent78cc819168b2a550e52e9cac06dbbed41a3b04b2 (diff)
parentee1520c5095ea8648617a3658b20eedfd4dd2007 (diff)
Merge branch 'master' of https://github.com/pks/cdec-dtrain
Diffstat (limited to 'decoder/decoder.cc')
-rw-r--r--decoder/decoder.cc22
1 files changed, 15 insertions, 7 deletions
diff --git a/decoder/decoder.cc b/decoder/decoder.cc
index cbb97a0d..333f0fb6 100644
--- a/decoder/decoder.cc
+++ b/decoder/decoder.cc
@@ -3,6 +3,7 @@
#include <tr1/unordered_map>
#include <boost/program_options.hpp>
#include <boost/program_options/variables_map.hpp>
+#include <boost/make_shared.hpp>
#include "program_options.h"
#include "stringlib.h"
@@ -187,8 +188,8 @@ struct DecoderImpl {
}
void SetId(int next_sent_id) { sent_id = next_sent_id - 1; }
- void forest_stats(Hypergraph &forest,string name,bool show_tree,bool show_deriv=false) {
- cerr << viterbi_stats(forest,name,true,show_tree,show_deriv);
+ void forest_stats(Hypergraph &forest,string name,bool show_tree,bool show_deriv=false, bool extract_rules=false, boost::shared_ptr<WriteFile> extract_file = boost::make_shared<WriteFile>()) {
+ cerr << viterbi_stats(forest,name,true,show_tree,show_deriv,extract_rules, extract_file);
cerr << endl;
}
@@ -424,7 +425,7 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream
("tagger_tagset,t", po::value<string>(), "(Tagger) file containing tag set")
("csplit_output_plf", "(Compound splitter) Output lattice in PLF format")
("csplit_preserve_full_word", "(Compound splitter) Always include the unsegmented form in the output lattice")
- ("extract_rules", po::value<string>(), "Extract the rules used in translation (de-duped) to this file")
+ ("extract_rules", po::value<string>(), "Extract the rules used in translation (not de-duped!) to a file in this directory")
("show_derivations", po::value<string>(), "Directory to print the derivation structures to")
("graphviz","Show (constrained) translation forest in GraphViz format")
("max_translation_beam,x", po::value<int>(), "Beam approximation to get max translation from the chart")
@@ -570,6 +571,11 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream
// cube pruning pop-limit: we may want to configure this on a per-pass basis
pop_limit = conf["cubepruning_pop_limit"].as<int>();
+ if (conf.count("extract_rules")) {
+ if (!DirectoryExists(conf["extract_rules"].as<string>()))
+ MkDirP(conf["extract_rules"].as<string>());
+ }
+
// determine the number of rescoring/pruning/weighting passes configured
const int MAX_PASSES = 3;
for (int pass = 0; pass < MAX_PASSES; ++pass) {
@@ -712,9 +718,11 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream
cfg_options.Validate();
#endif
- if (conf.count("extract_rules"))
- extract_file.reset(new WriteFile(str("extract_rules",conf)));
-
+ if (conf.count("extract_rules")) {
+ stringstream ss;
+ ss << sent_id;
+ extract_file.reset(new WriteFile(str("extract_rules",conf)+"/"+ss.str()));
+ }
combine_size = conf["combine_size"].as<int>();
if (combine_size < 1) combine_size = 1;
sent_id = -1;
@@ -851,7 +859,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
#endif
forest.swap(rescored_forest);
forest.Reweight(cur_weights);
- if (!SILENT) forest_stats(forest," " + passtr +" forest",show_tree_structure,oracle.show_derivation);
+ if (!SILENT) forest_stats(forest," " + passtr +" forest",show_tree_structure,oracle.show_derivation, conf.count("extract_rules"), extract_file);
}
if (conf.count("show_partition")) {