summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorredpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-11-15 20:22:22 +0000
committerredpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-11-15 20:22:22 +0000
commitc8c315a4f78c464636ea5e3fd9a11416b2f966b9 (patch)
tree763f25d7db1c5b9add1c33d20f8d290c046ff406
parentc0afd1924cf0c228a85352b3584c64a5e00b88c7 (diff)
rescoring working
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@726 ec762483-ff6d-05da-a07a-a48fb63a330f
-rw-r--r--decoder/decoder.cc17
-rw-r--r--decoder/lextrans.cc7
-rwxr-xr-xrescore/rescore_with_cdec_model.pl5
3 files changed, 17 insertions, 12 deletions
diff --git a/decoder/decoder.cc b/decoder/decoder.cc
index 065510a7..daf82f10 100644
--- a/decoder/decoder.cc
+++ b/decoder/decoder.cc
@@ -354,7 +354,7 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream
("show_tree_structure", "Show the Viterbi derivation structure")
("show_expected_length", "Show the expected translation length under the model")
("show_partition,z", "Compute and show the partition (inside score)")
- ("show_partition_as_translation", "Output the partition to STDOUT instead of a translation")
+ ("show_conditional_prob", "Output the conditional log prob to STDOUT instead of a translation")
("show_cfg_search_space", "Show the search space as a CFG")
("show_features","Show the feature vector for the viterbi translation")
("prelm_density_prune", po::value<double>(), "Applied to -LM forest just before final LM rescoring: keep no more than this many times the number of edges used in the best derivation tree (>=1.0)")
@@ -680,7 +680,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
if (!SILENT) cerr << " NO PARSE FOUND.\n";
o->NotifySourceParseFailure(smeta);
o->NotifyDecodingComplete(smeta);
- if (conf.count("show_partition_as_translation")) {
+ if (conf.count("show_conditional_prob")) {
cout << "-Inf" << endl << flush;
}
return false;
@@ -807,6 +807,11 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
}
}
+ prob_t first_z;
+ if (conf.count("show_conditional_prob")) {
+ first_z = Inside<prob_t, EdgeProb>(forest);
+ }
+
// TODO this should be handled by an Observer
const int max_trans_beam_size = conf.count("max_translation_beam") ?
conf["max_translation_beam"].as<int>() : 0;
@@ -910,9 +915,9 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
if (conf.count("graphviz")) forest.PrintGraphviz();
if (kbest)
oracle.DumpKBest(sent_id, forest, conf["k_best"].as<int>(), unique_kbest,"-");
- if (conf.count("show_partition_as_translation")) {
- const prob_t z = Inside<prob_t, EdgeProb>(forest);
- cout << log(z) << endl << flush;
+ if (conf.count("show_conditional_prob")) {
+ const prob_t ref_z = Inside<prob_t, EdgeProb>(forest);
+ cout << (log(ref_z) - log(first_z)) << endl << flush;
}
} else {
o->NotifyAlignmentFailure(smeta);
@@ -920,7 +925,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
if (write_gradient) {
cout << endl << flush;
}
- if (conf.count("show_partition_as_translation")) {
+ if (conf.count("show_conditional_prob")) {
cout << "-Inf" << endl << flush;
}
}
diff --git a/decoder/lextrans.cc b/decoder/lextrans.cc
index 551e77e3..c3bd775f 100644
--- a/decoder/lextrans.cc
+++ b/decoder/lextrans.cc
@@ -60,7 +60,7 @@ struct LexicalTransImpl {
}
}
- void BuildTrellis(const Lattice& lattice, const SentenceMetadata& smeta, Hypergraph* forest) {
+ bool BuildTrellis(const Lattice& lattice, const SentenceMetadata& smeta, Hypergraph* forest) {
if (psg_file_) {
const string offset = smeta.GetSGMLValue("psg");
if (offset.size() < 2 || offset[0] != '@') {
@@ -86,7 +86,7 @@ struct LexicalTransImpl {
gi = sup_grammar->GetRoot()->Extend(src_sym);
if (!gi) {
cerr << "No translations found for: " << TD::Convert(src_sym) << "\n";
- abort();
+ return false;
}
}
const RuleBin* rb = gi->GetRules();
@@ -117,6 +117,7 @@ struct LexicalTransImpl {
Hypergraph::Node* goal = forest->AddNode(TD::Convert("Goal")*-1);
Hypergraph::Edge* hg_edge = forest->AddEdge(kGOAL_RULE, tail);
forest->ConnectEdgeToHeadNode(hg_edge, goal);
+ return true;
}
private:
@@ -146,7 +147,7 @@ bool LexicalTrans::TranslateImpl(const string& input,
abort();
}
smeta->SetSourceLength(lattice.size());
- pimpl_->BuildTrellis(lattice, *smeta, forest);
+ if (!pimpl_->BuildTrellis(lattice, *smeta, forest)) return false;
forest->is_linear_chain_ = true;
forest->Reweight(weights);
return true;
diff --git a/rescore/rescore_with_cdec_model.pl b/rescore/rescore_with_cdec_model.pl
index 6553fe56..cdd8c217 100755
--- a/rescore/rescore_with_cdec_model.pl
+++ b/rescore/rescore_with_cdec_model.pl
@@ -46,7 +46,7 @@ if (defined $weights_file) {
die "Can't read $weights_file" unless -f $weights_file;
$weights = "-w $weights_file";
}
-my $decoder_command = "$decoder -c $cdec_ini --quiet $weights --show_partition_as_translation";
+my $decoder_command = "$decoder -c $cdec_ini --quiet $weights --show_conditional_prob";
print STDERR "DECODER COMMAND: $decoder_command\n";
my $cdec_pid = open2(\*CDEC_IN, \*CDEC_OUT, $decoder_command)
or die "Couldn't run $decoder: $!";
@@ -108,8 +108,7 @@ sub rescore {
my $score = <CDEC_IN>;
chomp $score;
my @words = split /\s+/, $hyps[$i];
- my $norm_score = $score / scalar @words;
- print "$id ||| $hyps[$i] ||| $feats[$i] $feature_name=$score ${feature_name}_norm=$norm_score\n";
+ print "$id ||| $hyps[$i] ||| $feats[$i] $feature_name=$score\n";
}
}