summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--decoder/cdec.cc18
-rw-r--r--decoder/ff_factory.h9
-rwxr-xr-xdecoder/ff_lm_fsa.h2
-rwxr-xr-xdecoder/ff_sample_fsa.h20
-rw-r--r--decoder/hg.h81
-rwxr-xr-xdecoder/oracle_bleu.h59
-rw-r--r--decoder/viterbi.cc13
-rw-r--r--decoder/viterbi.h2
8 files changed, 154 insertions, 50 deletions
diff --git a/decoder/cdec.cc b/decoder/cdec.cc
index f366a08f..9110a234 100644
--- a/decoder/cdec.cc
+++ b/decoder/cdec.cc
@@ -313,8 +313,8 @@ bool prelm_weights_string(po::variables_map const& conf,string &s)
}
-void forest_stats(Hypergraph &forest,string name,bool show_tree,bool show_features,WeightVector *weights=0) {
- cerr << viterbi_stats(forest,name,true,show_tree);
+void forest_stats(Hypergraph &forest,string name,bool show_tree,bool show_features,WeightVector *weights=0,bool show_deriv=false) {
+ cerr << viterbi_stats(forest,name,true,show_tree,show_deriv);
if (show_features) {
cerr << name<<" features: ";
/* Hypergraph::Edge const* best=forest.ViterbiGoalEdge();
@@ -328,9 +328,9 @@ void forest_stats(Hypergraph &forest,string name,bool show_tree,bool show_featur
}
}
-void forest_stats(Hypergraph &forest,string name,bool show_tree,bool show_features,DenseWeightVector const& feature_weights) {
+void forest_stats(Hypergraph &forest,string name,bool show_tree,bool show_features,DenseWeightVector const& feature_weights,bool sd=false) {
WeightVector fw(feature_weights);
- forest_stats(forest,name,show_tree,show_features,&fw);
+ forest_stats(forest,name,show_tree,show_features,&fw,sd);
}
@@ -348,7 +348,7 @@ void maybe_prune(Hypergraph &forest,po::variables_map const& conf,string nbeam,s
}
forest.PruneInsideOutside(beam_prune,density_prune,pm,false,1,conf["promise_power"].as<double>());
if (!forestname.empty()) forestname=" "+forestname;
- forest_stats(forest," Pruned "+forestname+" forest",false,false);
+ forest_stats(forest," Pruned "+forestname+" forest",false,false,false);
cerr << " Pruned "<<forestname<<" forest portion of edges kept: "<<forest.edges_.size()/presize<<endl;
}
}
@@ -560,7 +560,7 @@ int main(int argc, char** argv) {
}
const bool show_tree_structure=conf.count("show_tree_structure");
const bool show_features=conf.count("show_features");
- forest_stats(forest," -LM forest",show_tree_structure,show_features,feature_weights);
+ forest_stats(forest," -LM forest",show_tree_structure,show_features,feature_weights,oracle.show_derivation);
if (conf.count("show_expected_length")) {
const PRPair<double, double> res =
Inside<PRPair<double, double>,
@@ -586,7 +586,7 @@ int main(int argc, char** argv) {
&prelm_forest);
forest.swap(prelm_forest);
forest.Reweight(prelm_feature_weights);
- forest_stats(forest," prelm forest",show_tree_structure,show_features,prelm_feature_weights);
+ forest_stats(forest," prelm forest",show_tree_structure,show_features,prelm_feature_weights,oracle.show_derivation);
}
maybe_prune(forest,conf,"prelm_beam_prune","prelm_density_prune","-LM",srclen);
@@ -605,7 +605,7 @@ int main(int argc, char** argv) {
&lm_forest);
forest.swap(lm_forest);
forest.Reweight(feature_weights);
- forest_stats(forest," +LM forest",show_tree_structure,show_features,feature_weights);
+ forest_stats(forest," +LM forest",show_tree_structure,show_features,feature_weights,oracle.show_derivation);
}
maybe_prune(forest,conf,"beam_prune","density_prune","+LM",srclen);
@@ -650,7 +650,7 @@ int main(int argc, char** argv) {
} else {
if (kbest) {
//TODO: does this work properly?
- oracle.DumpKBest(sent_id, forest, conf["k_best"].as<int>(), unique_kbest,"");
+ oracle.DumpKBest(sent_id, forest, conf["k_best"].as<int>(), unique_kbest,"-");
} else if (csplit_output_plf) {
cout << HypergraphIO::AsPLF(forest, false) << endl;
} else {
diff --git a/decoder/ff_factory.h b/decoder/ff_factory.h
index 12e768aa..93681c5e 100644
--- a/decoder/ff_factory.h
+++ b/decoder/ff_factory.h
@@ -1,7 +1,14 @@
#ifndef _FF_FACTORY_H_
#define _FF_FACTORY_H_
-//TODO: use http://www.boost.org/doc/libs/1_43_0/libs/functional/factory/doc/html/index.html?
+/*TODO: register state identity separately from feature function identity? as
+ * in: string registry for name of state somewhere, assert that same result is
+ * computed by all users? or, we can just require that ff sharing same state
+ * all be mashed into a single ffunc, which can just emit all the fid scores at
+ * once. that's fine.
+ */
+
+//TODO: use http://www.boost.org/doc/libs/1_43_0/libs/functional/factory/doc/html/index.html ?
#include <iostream>
#include <string>
diff --git a/decoder/ff_lm_fsa.h b/decoder/ff_lm_fsa.h
index 344cd992..01b3764e 100755
--- a/decoder/ff_lm_fsa.h
+++ b/decoder/ff_lm_fsa.h
@@ -1,6 +1,8 @@
#ifndef FF_LM_FSA_H
#define FF_LM_FSA_H
+//TODO: use SRI LM::contextBOW, LM::contextID to shorten state
+
#include "ff_lm.h"
#include "ff_from_fsa.h"
diff --git a/decoder/ff_sample_fsa.h b/decoder/ff_sample_fsa.h
index 74d9e7b5..2aeaa6de 100755
--- a/decoder/ff_sample_fsa.h
+++ b/decoder/ff_sample_fsa.h
@@ -3,11 +3,11 @@
#include "ff_from_fsa.h"
-// example: feature val = -1 * # of target words
+// example: feature val = 1 * # of target words
struct WordPenaltyFsa : public FsaFeatureFunctionBase<WordPenaltyFsa> {
static std::string usage(bool param,bool verbose) {
return FeatureFunction::usage_helper(
- "WordPenaltyFsa","","-1 per target word"
+ "WordPenaltyFsa","","1 per target word"
,param,verbose);
}
@@ -21,7 +21,7 @@ struct WordPenaltyFsa : public FsaFeatureFunctionBase<WordPenaltyFsa> {
}
// move from state to next_state after seeing word x, while emitting features->add_value(fid,val) possibly with duplicates. state and next_state may be same memory.
Featval Scan1(WordID w,void const* state,void *next_state) const {
- return -1;
+ return 1;
}
};
@@ -35,7 +35,7 @@ struct LongerThanPrev : public FsaFeatureFunctionBase<LongerThanPrev> {
return FeatureFunction::usage_helper(
"LongerThanPrev",
"",
- "stupid example stateful (bigram) feature: -1 per target word that's longer than the previous word (<s> sentence begin considered 3 chars long, </s> is sentence end.)",
+ "stupid example stateful (bigram) feature: 1 per target word that's longer than the previous word (<s> sentence begin considered 3 chars long, </s> is sentence end.)",
param,verbose);
}
@@ -49,7 +49,7 @@ struct LongerThanPrev : public FsaFeatureFunctionBase<LongerThanPrev> {
return std::strlen(TD::Convert(w));
}
int markov_order() const { return 1; }
- LongerThanPrev(std::string const& param) : Base(sizeof(int),singleton_sentence(TD::se)) {
+ LongerThanPrev(std::string const& param) : Base(sizeof(int)/* ,singleton_sentence(TD::se) */) {
Init();
if (0) { // all this is done in constructor already
set_state_bytes(sizeof(int));
@@ -61,7 +61,7 @@ struct LongerThanPrev : public FsaFeatureFunctionBase<LongerThanPrev> {
to_state(h_start.begin(),&ss,1);
}
- state(start.begin())=3;
+ state(start.begin())=999999;
state(h_start.begin())=4; // estimate: anything >4 chars is usually longer than previous
}
@@ -70,7 +70,7 @@ struct LongerThanPrev : public FsaFeatureFunctionBase<LongerThanPrev> {
int prevlen=state(from);
int len=wordlen(w);
state(next_state)=len;
- return len>prevlen ? -1 : 0;
+ return len>prevlen ? 1 : 0;
}
};
@@ -82,7 +82,7 @@ struct ShorterThanPrev : FsaTypedBase<int,ShorterThanPrev> {
return FeatureFunction::usage_helper(
"ShorterThanPrev",
"",
- "stupid example stateful (bigram) feature: -1 per target word that's shorter than the previous word (end of sentence considered '</s>')",
+ "stupid example stateful (bigram) feature: 1 per target word that's shorter than the previous word (end of sentence considered '</s>')",
param,verbose);
}
@@ -90,7 +90,7 @@ struct ShorterThanPrev : FsaTypedBase<int,ShorterThanPrev> {
return std::strlen(TD::Convert(w));
}
ShorterThanPrev(std::string const& param)
- : Base(3,4,singleton_sentence(TD::se))
+ : Base(-1,4/* ,singleton_sentence(TD::se) */)
// start, h_start, end_phrase
// estimate: anything <4 chars is usually shorter than previous
{
@@ -106,7 +106,7 @@ struct ShorterThanPrev : FsaTypedBase<int,ShorterThanPrev> {
void ScanT(SentenceMetadata const& /* smeta */,const Hypergraph::Edge& /* edge */,WordID w,int prevlen,int &len,FeatureVector *features) const {
len=wordlen(w);
if (len<prevlen)
- features->add_value(fid_,-1);
+ features->add_value(fid_,1);
}
};
diff --git a/decoder/hg.h b/decoder/hg.h
index b3bfd19c..34638e04 100644
--- a/decoder/hg.h
+++ b/decoder/hg.h
@@ -1,12 +1,24 @@
#ifndef _HG_H_
#define _HG_H_
+#define USE_INFO_EDGE 1
+#if USE_INFO_EDGE
+# include <sstream>
+# define INFO_EDGE(e,msg) do { std::ostringstream &o=e.info_;o<<msg; } while(0)
+# define INFO_EDGEw(e,msg) do { std::ostringstream &o=e.info_;if (o.empty()) o<<' ';o<<msg; } while(0)
+#else
+# define INFO_EDGE(e,msg)
+# define INFO_EDGEw(e,msg)
+#endif
+#define INFO_EDGEln(e,msg) INFO_EDGE(e,msg<<'\n')
+
#include <string>
#include <vector>
#include "feature_vector.h"
#include "small_vector.h"
#include "wordid.h"
+#include "tdict.h"
#include "trule.h"
#include "prob.h"
@@ -23,6 +35,7 @@ class Hypergraph {
// SmallVector is a fast, small vector<int> implementation for sizes <= 2
typedef SmallVectorInt TailNodeVector;
+ typedef std::vector<int> EdgesVector;
// TODO get rid of cat_?
// TODO keep cat_ and add span and/or state? :)
@@ -59,8 +72,76 @@ class Hypergraph {
short int j_;
short int prev_i_;
short int prev_j_;
+#if USE_INFO_EDGE
+ private:
+ std::ostringstream info_;
+ public:
+ Edge(Edge const& o) : head_node_(o.head_node_),tail_nodes_(o.tail_nodes_),rule_(o.rule_),feature_values_(o.feature_values_),edge_prob_(o.edge_prob_),id_(o.id_),i_(o.i_),j_(o.j_),prev_i_(o.prev_i_),prev_j_(o.prev_j_), info_(o.info_.str()) { }
+ void operator=(Edge const& o) {
+ head_node_ = o.head_node_; tail_nodes_ = o.tail_nodes_; rule_ = o.rule_; feature_values_ = o.feature_values_; edge_prob_ = o.edge_prob_; id_ = o.id_; i_ = o.i_; j_ = o.j_; prev_i_ = o.prev_i_; prev_j_ = o.prev_j_; info_.str(o.info_.str());
+ }
+ std::string info() const { return info_.str(); }
+#else
+ std::string info() const { return std::string(); }
+#endif
+ void show(std::ostream &o,unsigned mask=SPAN|RULE) const {
+ o<<'{';
+ if (mask&CATEGORY)
+ o<<TD::Convert(rule_->GetLHS());
+ if (mask&PREV_SPAN)
+ o<<'<'<<prev_i_<<','<<prev_j_<<'>';
+ if (mask&SPAN)
+ o<<'<'<<i_<<','<<j_<<'>';
+ if (mask&PROB)
+ o<<" p="<<edge_prob_;
+ if (mask&FEATURES)
+ o<<" "<<feature_values_;
+ if (mask&RULE)
+ o<<rule_->AsString(mask&RULE_LHS);
+ if (USE_INFO_EDGE) {
+ if (mask) o << ' ';
+ o<<info();
+ }
+ o<<'}';
+ }
+ std::string show(unsigned mask=SPAN|RULE) const {
+ std::ostringstream o;
+ show(o,mask);
+ return o.str();
+ }
};
+ Edge const* viterbi_edge(int node) const { // assumes sorting has best edge as first. FIXME: check. SortInEdgesByEdgeWeights appears to accomplish this
+ EdgesVector const& v=nodes_[node].in_edges_;
+ return v.empty() ? 0 : &edges_[v.front()];
+ }
+
+ enum {
+ NONE=0,CATEGORY=1,SPAN=2,PROB=4,FEATURES=8,RULE=16,RULE_LHS=32,PREV_SPAN=64,ALL=0xFFFFFFFF
+ };
+ std::string show_tree(Edge const& e, bool indent=true,unsigned show_mask=SPAN|RULE,int maxdepth=0x7FFFFFFF,int depth=0) const {
+ std::ostringstream o;
+ show_tree(o,e,show_mask,indent,maxdepth,depth);
+ return o.str();
+ }
+ void show_tree(std::ostream &o,Edge const& e,bool indent=true,unsigned show_mask=SPAN|RULE,int maxdepth=0x7FFFFFFF,int depth=0) const {
+ if (depth>maxdepth) return;
+ if (indent) for(int i=0;i<depth;++i) o<<' ';
+ o<<'(';
+ e.show(o,show_mask);
+ if (indent) o<<'\n';
+ for (int i=0;i<e.tail_nodes_.size();++i) {
+ Edge const *c=viterbi_edge(e.tail_nodes_[i]);
+ if (!c) continue;
+ assert(c); // leaf?
+ show_tree(o,*c,indent,show_mask,maxdepth,depth+1);
+ if (!indent) o<<' ';
+ }
+ if (indent) for(int i=0;i<depth;++i) o<<' ';
+ o<<")";
+ if (indent) o<<"\n";
+ }
+
// returns edge with rule_.IsGoal, returns 0 if none found. otherwise gives best edge_prob_ - note: I don't think edge_prob_ is viterbi cumulative, so this would just be the best local probability.
Edge const* ViterbiGoalEdge() const;
diff --git a/decoder/oracle_bleu.h b/decoder/oracle_bleu.h
index 4dc86bc7..4a2cbbe5 100755
--- a/decoder/oracle_bleu.h
+++ b/decoder/oracle_bleu.h
@@ -94,6 +94,7 @@ struct OracleBleu {
("references,R", value<Refs >(&refs), "Translation reference files")
("oracle_loss", value<string>(&loss_name)->default_value("IBM_BLEU_3"), "IBM_BLEU_3 (default), IBM_BLEU etc")
("bleu_weight", value<double>(&bleu_weight)->default_value(1.), "weight to give the hope/fear loss function vs. model score")
+ ("show_derivation", bool_switch(&show_derivation), "show derivation tree in kbest")
("verbose",bool_switch(&verbose),"detailed logs")
;
}
@@ -248,46 +249,48 @@ struct OracleBleu {
// dest_forest->SortInEdgesByEdgeWeights();
}
-// TODO decoder output should probably be moved to another file - how about oracle_bleu.h
- void DumpKBest(const int sent_id, const Hypergraph& forest, const int k, const bool unique, std::string const &kbest_out_filename_) {
+ bool show_derivation;
+ template <class Filter>
+ void kbest(int sent_id,Hypergraph const& forest,int k,std::ostream &kbest_out=std::cout,std::ostream &deriv_out=std::cerr) {
using namespace std;
using namespace boost;
- cerr << "In kbest\n";
-
- ofstream kbest_out;
- kbest_out.open(kbest_out_filename_.c_str());
- cerr << "Output kbest to " << kbest_out_filename_;
-
+ typedef KBest::KBestDerivations<Sentence, ESentenceTraversal,Filter> K;
+ K kbest(forest,k);
//add length (f side) src length of this sentence to the psuedo-doc src length count
float curr_src_length = doc_src_length + tmp_src_length;
-
- if (unique) {
- KBest::KBestDerivations<Sentence, ESentenceTraversal, KBest::FilterUnique> kbest(forest, k);
- for (int i = 0; i < k; ++i) {
- const KBest::KBestDerivations<Sentence, ESentenceTraversal, KBest::FilterUnique>::Derivation* d =
- kbest.LazyKthBest(forest.nodes_.size() - 1, i);
- if (!d) break;
- //calculate score in context of psuedo-doc
+ for (int i = 0; i < k; ++i) {
+ typename K::Derivation *d = kbest.LazyKthBest(forest.nodes_.size() - 1, i);
+ if (!d) break;
+ kbest_out << sent_id << " ||| " << TD::GetString(d->yield) << " ||| "
+ << d->feature_values << " ||| " << log(d->score);
+ if (!refs.empty()) {
ScoreP sentscore = GetScore(d->yield,sent_id);
sentscore->PlusEquals(*doc_score,float(1));
float bleu = curr_src_length * sentscore->ComputeScore();
- kbest_out << sent_id << " ||| " << TD::GetString(d->yield) << " ||| "
- << d->feature_values << " ||| " << log(d->score) << " ||| " << bleu << endl;
- // cout << sent_id << " ||| " << TD::GetString(d->yield) << " ||| "
- // << d->feature_values << " ||| " << log(d->score) << endl;
+ kbest_out << " ||| " << bleu;
}
- } else {
- KBest::KBestDerivations<Sentence, ESentenceTraversal> kbest(forest, k);
- for (int i = 0; i < k; ++i) {
- const KBest::KBestDerivations<Sentence, ESentenceTraversal>::Derivation* d =
- kbest.LazyKthBest(forest.nodes_.size() - 1, i);
- if (!d) break;
- cout << sent_id << " ||| " << TD::GetString(d->yield) << " ||| "
- << d->feature_values << " ||| " << log(d->score) << endl;
+ kbest_out<<endl<<flush;
+ if (show_derivation) {
+ deriv_out<<"\nsent_id="<<sent_id<<"\n";
+ forest.show_tree(cerr,*d->edge);
+ deriv_out<<flush;
}
}
}
+// TODO decoder output should probably be moved to another file - how about oracle_bleu.h
+ void DumpKBest(const int sent_id, const Hypergraph& forest, const int k, const bool unique, std::string const &kbest_out_filename_) {
+
+ WriteFile ko(kbest_out_filename_);
+ std::cerr << "Output kbest to " << kbest_out_filename_;
+
+ if (unique)
+ kbest<KBest::NoFilter>(sent_id,forest,k,ko.get(),std::cerr);
+ else {
+ kbest<KBest::FilterUnique>(sent_id,forest,k,ko.get(),std::cerr);
+ }
+ }
+
void DumpKBest(std::string const& suffix,const int sent_id, const Hypergraph& forest, const int k, const bool unique, std::string const& forest_output)
{
std::ostringstream kbest_string_stream;
diff --git a/decoder/viterbi.cc b/decoder/viterbi.cc
index 7719de32..d0b7e6ec 100644
--- a/decoder/viterbi.cc
+++ b/decoder/viterbi.cc
@@ -6,7 +6,7 @@
using namespace std;
-std::string viterbi_stats(Hypergraph const& hg, std::string const& name, bool estring, bool etree)
+std::string viterbi_stats(Hypergraph const& hg, std::string const& name, bool estring, bool etree,bool show_derivation)
{
ostringstream o;
o << hg.stats(name);
@@ -19,6 +19,17 @@ std::string viterbi_stats(Hypergraph const& hg, std::string const& name, bool es
if (etree) {
o<<name<<" tree: "<<ViterbiETree(hg)<<endl;
}
+ if (show_derivation) {
+ o<<name<<" derivation: ";
+ ViterbiPathTraversal::Result d;
+ Viterbi<ViterbiPathTraversal>(hg, &d);
+ if (d.empty())
+ o<<"(empty viterbi hyperpath - no translation)";
+ else
+ hg.show_tree(o,*d.back(),false); // last item should be goal (or at least depend on prev items). TODO: this doesn't actually reorder the nodes in hg.
+ o<<endl;
+ }
+
return o.str();
}
diff --git a/decoder/viterbi.h b/decoder/viterbi.h
index 388bff3c..ea2bc6c2 100644
--- a/decoder/viterbi.h
+++ b/decoder/viterbi.h
@@ -6,7 +6,7 @@
#include "hg.h"
#include "tdict.h"
-std::string viterbi_stats(Hypergraph const& hg, std::string const& name="forest", bool estring=true, bool etree=false);
+std::string viterbi_stats(Hypergraph const& hg, std::string const& name="forest", bool estring=true, bool etree=false, bool derivation_tree=false);
/// computes for each hg node the best (according to WeightType/WeightFunction) derivation, and some homomorphism (bottom up expression tree applied through Traversal) of it. T is the "return type" of Traversal, which is called only once for the best edge for a node's result (i.e. result will start default constructed)
//TODO: make T a typename inside Traversal and WeightType a typename inside WeightFunction?