diff options
author | graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-25 02:52:58 +0000 |
---|---|---|
committer | graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-25 02:52:58 +0000 |
commit | 786b689a486413b4ea31841eb352ed728621e4b1 (patch) | |
tree | 2ff55857c4e5367c557642bc44e4f44d1697b3fe | |
parent | 0f80272c5e32dd3a0d5d747d00c914b0a6bf0be8 (diff) |
cleaned up kbest, new USE_INFO_EDGE 1 logs per edge, --show_derivation (needs work; handle kbest deriv, viterbi deriv, sort hg exposing viterbi?)
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@405 ec762483-ff6d-05da-a07a-a48fb63a330f
-rw-r--r-- | decoder/cdec.cc | 18 | ||||
-rw-r--r-- | decoder/ff_factory.h | 9 | ||||
-rwxr-xr-x | decoder/ff_lm_fsa.h | 2 | ||||
-rwxr-xr-x | decoder/ff_sample_fsa.h | 20 | ||||
-rw-r--r-- | decoder/hg.h | 81 | ||||
-rwxr-xr-x | decoder/oracle_bleu.h | 59 | ||||
-rw-r--r-- | decoder/viterbi.cc | 13 | ||||
-rw-r--r-- | decoder/viterbi.h | 2 |
8 files changed, 154 insertions, 50 deletions
diff --git a/decoder/cdec.cc b/decoder/cdec.cc index f366a08f..9110a234 100644 --- a/decoder/cdec.cc +++ b/decoder/cdec.cc @@ -313,8 +313,8 @@ bool prelm_weights_string(po::variables_map const& conf,string &s) } -void forest_stats(Hypergraph &forest,string name,bool show_tree,bool show_features,WeightVector *weights=0) { - cerr << viterbi_stats(forest,name,true,show_tree); +void forest_stats(Hypergraph &forest,string name,bool show_tree,bool show_features,WeightVector *weights=0,bool show_deriv=false) { + cerr << viterbi_stats(forest,name,true,show_tree,show_deriv); if (show_features) { cerr << name<<" features: "; /* Hypergraph::Edge const* best=forest.ViterbiGoalEdge(); @@ -328,9 +328,9 @@ void forest_stats(Hypergraph &forest,string name,bool show_tree,bool show_featur } } -void forest_stats(Hypergraph &forest,string name,bool show_tree,bool show_features,DenseWeightVector const& feature_weights) { +void forest_stats(Hypergraph &forest,string name,bool show_tree,bool show_features,DenseWeightVector const& feature_weights,bool sd=false) { WeightVector fw(feature_weights); - forest_stats(forest,name,show_tree,show_features,&fw); + forest_stats(forest,name,show_tree,show_features,&fw,sd); } @@ -348,7 +348,7 @@ void maybe_prune(Hypergraph &forest,po::variables_map const& conf,string nbeam,s } forest.PruneInsideOutside(beam_prune,density_prune,pm,false,1,conf["promise_power"].as<double>()); if (!forestname.empty()) forestname=" "+forestname; - forest_stats(forest," Pruned "+forestname+" forest",false,false); + forest_stats(forest," Pruned "+forestname+" forest",false,false,false); cerr << " Pruned "<<forestname<<" forest portion of edges kept: "<<forest.edges_.size()/presize<<endl; } } @@ -560,7 +560,7 @@ int main(int argc, char** argv) { } const bool show_tree_structure=conf.count("show_tree_structure"); const bool show_features=conf.count("show_features"); - forest_stats(forest," -LM forest",show_tree_structure,show_features,feature_weights); + forest_stats(forest," -LM forest",show_tree_structure,show_features,feature_weights,oracle.show_derivation); if (conf.count("show_expected_length")) { const PRPair<double, double> res = Inside<PRPair<double, double>, @@ -586,7 +586,7 @@ int main(int argc, char** argv) { &prelm_forest); forest.swap(prelm_forest); forest.Reweight(prelm_feature_weights); - forest_stats(forest," prelm forest",show_tree_structure,show_features,prelm_feature_weights); + forest_stats(forest," prelm forest",show_tree_structure,show_features,prelm_feature_weights,oracle.show_derivation); } maybe_prune(forest,conf,"prelm_beam_prune","prelm_density_prune","-LM",srclen); @@ -605,7 +605,7 @@ int main(int argc, char** argv) { &lm_forest); forest.swap(lm_forest); forest.Reweight(feature_weights); - forest_stats(forest," +LM forest",show_tree_structure,show_features,feature_weights); + forest_stats(forest," +LM forest",show_tree_structure,show_features,feature_weights,oracle.show_derivation); } maybe_prune(forest,conf,"beam_prune","density_prune","+LM",srclen); @@ -650,7 +650,7 @@ int main(int argc, char** argv) { } else { if (kbest) { //TODO: does this work properly? - oracle.DumpKBest(sent_id, forest, conf["k_best"].as<int>(), unique_kbest,""); + oracle.DumpKBest(sent_id, forest, conf["k_best"].as<int>(), unique_kbest,"-"); } else if (csplit_output_plf) { cout << HypergraphIO::AsPLF(forest, false) << endl; } else { diff --git a/decoder/ff_factory.h b/decoder/ff_factory.h index 12e768aa..93681c5e 100644 --- a/decoder/ff_factory.h +++ b/decoder/ff_factory.h @@ -1,7 +1,14 @@ #ifndef _FF_FACTORY_H_ #define _FF_FACTORY_H_ -//TODO: use http://www.boost.org/doc/libs/1_43_0/libs/functional/factory/doc/html/index.html? +/*TODO: register state identity separately from feature function identity? as + * in: string registry for name of state somewhere, assert that same result is + * computed by all users? or, we can just require that ff sharing same state + * all be mashed into a single ffunc, which can just emit all the fid scores at + * once. that's fine. + */ + +//TODO: use http://www.boost.org/doc/libs/1_43_0/libs/functional/factory/doc/html/index.html ? #include <iostream> #include <string> diff --git a/decoder/ff_lm_fsa.h b/decoder/ff_lm_fsa.h index 344cd992..01b3764e 100755 --- a/decoder/ff_lm_fsa.h +++ b/decoder/ff_lm_fsa.h @@ -1,6 +1,8 @@ #ifndef FF_LM_FSA_H #define FF_LM_FSA_H +//TODO: use SRI LM::contextBOW, LM::contextID to shorten state + #include "ff_lm.h" #include "ff_from_fsa.h" diff --git a/decoder/ff_sample_fsa.h b/decoder/ff_sample_fsa.h index 74d9e7b5..2aeaa6de 100755 --- a/decoder/ff_sample_fsa.h +++ b/decoder/ff_sample_fsa.h @@ -3,11 +3,11 @@ #include "ff_from_fsa.h" -// example: feature val = -1 * # of target words +// example: feature val = 1 * # of target words struct WordPenaltyFsa : public FsaFeatureFunctionBase<WordPenaltyFsa> { static std::string usage(bool param,bool verbose) { return FeatureFunction::usage_helper( - "WordPenaltyFsa","","-1 per target word" + "WordPenaltyFsa","","1 per target word" ,param,verbose); } @@ -21,7 +21,7 @@ struct WordPenaltyFsa : public FsaFeatureFunctionBase<WordPenaltyFsa> { } // move from state to next_state after seeing word x, while emitting features->add_value(fid,val) possibly with duplicates. state and next_state may be same memory. Featval Scan1(WordID w,void const* state,void *next_state) const { - return -1; + return 1; } }; @@ -35,7 +35,7 @@ struct LongerThanPrev : public FsaFeatureFunctionBase<LongerThanPrev> { return FeatureFunction::usage_helper( "LongerThanPrev", "", - "stupid example stateful (bigram) feature: -1 per target word that's longer than the previous word (<s> sentence begin considered 3 chars long, </s> is sentence end.)", + "stupid example stateful (bigram) feature: 1 per target word that's longer than the previous word (<s> sentence begin considered 3 chars long, </s> is sentence end.)", param,verbose); } @@ -49,7 +49,7 @@ struct LongerThanPrev : public FsaFeatureFunctionBase<LongerThanPrev> { return std::strlen(TD::Convert(w)); } int markov_order() const { return 1; } - LongerThanPrev(std::string const& param) : Base(sizeof(int),singleton_sentence(TD::se)) { + LongerThanPrev(std::string const& param) : Base(sizeof(int)/* ,singleton_sentence(TD::se) */) { Init(); if (0) { // all this is done in constructor already set_state_bytes(sizeof(int)); @@ -61,7 +61,7 @@ struct LongerThanPrev : public FsaFeatureFunctionBase<LongerThanPrev> { to_state(h_start.begin(),&ss,1); } - state(start.begin())=3; + state(start.begin())=999999; state(h_start.begin())=4; // estimate: anything >4 chars is usually longer than previous } @@ -70,7 +70,7 @@ struct LongerThanPrev : public FsaFeatureFunctionBase<LongerThanPrev> { int prevlen=state(from); int len=wordlen(w); state(next_state)=len; - return len>prevlen ? -1 : 0; + return len>prevlen ? 1 : 0; } }; @@ -82,7 +82,7 @@ struct ShorterThanPrev : FsaTypedBase<int,ShorterThanPrev> { return FeatureFunction::usage_helper( "ShorterThanPrev", "", - "stupid example stateful (bigram) feature: -1 per target word that's shorter than the previous word (end of sentence considered '</s>')", + "stupid example stateful (bigram) feature: 1 per target word that's shorter than the previous word (end of sentence considered '</s>')", param,verbose); } @@ -90,7 +90,7 @@ struct ShorterThanPrev : FsaTypedBase<int,ShorterThanPrev> { return std::strlen(TD::Convert(w)); } ShorterThanPrev(std::string const& param) - : Base(3,4,singleton_sentence(TD::se)) + : Base(-1,4/* ,singleton_sentence(TD::se) */) // start, h_start, end_phrase // estimate: anything <4 chars is usually shorter than previous { @@ -106,7 +106,7 @@ struct ShorterThanPrev : FsaTypedBase<int,ShorterThanPrev> { void ScanT(SentenceMetadata const& /* smeta */,const Hypergraph::Edge& /* edge */,WordID w,int prevlen,int &len,FeatureVector *features) const { len=wordlen(w); if (len<prevlen) - features->add_value(fid_,-1); + features->add_value(fid_,1); } }; diff --git a/decoder/hg.h b/decoder/hg.h index b3bfd19c..34638e04 100644 --- a/decoder/hg.h +++ b/decoder/hg.h @@ -1,12 +1,24 @@ #ifndef _HG_H_ #define _HG_H_ +#define USE_INFO_EDGE 1 +#if USE_INFO_EDGE +# include <sstream> +# define INFO_EDGE(e,msg) do { std::ostringstream &o=e.info_;o<<msg; } while(0) +# define INFO_EDGEw(e,msg) do { std::ostringstream &o=e.info_;if (o.empty()) o<<' ';o<<msg; } while(0) +#else +# define INFO_EDGE(e,msg) +# define INFO_EDGEw(e,msg) +#endif +#define INFO_EDGEln(e,msg) INFO_EDGE(e,msg<<'\n') + #include <string> #include <vector> #include "feature_vector.h" #include "small_vector.h" #include "wordid.h" +#include "tdict.h" #include "trule.h" #include "prob.h" @@ -23,6 +35,7 @@ class Hypergraph { // SmallVector is a fast, small vector<int> implementation for sizes <= 2 typedef SmallVectorInt TailNodeVector; + typedef std::vector<int> EdgesVector; // TODO get rid of cat_? // TODO keep cat_ and add span and/or state? :) @@ -59,8 +72,76 @@ class Hypergraph { short int j_; short int prev_i_; short int prev_j_; +#if USE_INFO_EDGE + private: + std::ostringstream info_; + public: + Edge(Edge const& o) : head_node_(o.head_node_),tail_nodes_(o.tail_nodes_),rule_(o.rule_),feature_values_(o.feature_values_),edge_prob_(o.edge_prob_),id_(o.id_),i_(o.i_),j_(o.j_),prev_i_(o.prev_i_),prev_j_(o.prev_j_), info_(o.info_.str()) { } + void operator=(Edge const& o) { + head_node_ = o.head_node_; tail_nodes_ = o.tail_nodes_; rule_ = o.rule_; feature_values_ = o.feature_values_; edge_prob_ = o.edge_prob_; id_ = o.id_; i_ = o.i_; j_ = o.j_; prev_i_ = o.prev_i_; prev_j_ = o.prev_j_; info_.str(o.info_.str()); + } + std::string info() const { return info_.str(); } +#else + std::string info() const { return std::string(); } +#endif + void show(std::ostream &o,unsigned mask=SPAN|RULE) const { + o<<'{'; + if (mask&CATEGORY) + o<<TD::Convert(rule_->GetLHS()); + if (mask&PREV_SPAN) + o<<'<'<<prev_i_<<','<<prev_j_<<'>'; + if (mask&SPAN) + o<<'<'<<i_<<','<<j_<<'>'; + if (mask&PROB) + o<<" p="<<edge_prob_; + if (mask&FEATURES) + o<<" "<<feature_values_; + if (mask&RULE) + o<<rule_->AsString(mask&RULE_LHS); + if (USE_INFO_EDGE) { + if (mask) o << ' '; + o<<info(); + } + o<<'}'; + } + std::string show(unsigned mask=SPAN|RULE) const { + std::ostringstream o; + show(o,mask); + return o.str(); + } }; + Edge const* viterbi_edge(int node) const { // assumes sorting has best edge as first. FIXME: check. SortInEdgesByEdgeWeights appears to accomplish this + EdgesVector const& v=nodes_[node].in_edges_; + return v.empty() ? 0 : &edges_[v.front()]; + } + + enum { + NONE=0,CATEGORY=1,SPAN=2,PROB=4,FEATURES=8,RULE=16,RULE_LHS=32,PREV_SPAN=64,ALL=0xFFFFFFFF + }; + std::string show_tree(Edge const& e, bool indent=true,unsigned show_mask=SPAN|RULE,int maxdepth=0x7FFFFFFF,int depth=0) const { + std::ostringstream o; + show_tree(o,e,show_mask,indent,maxdepth,depth); + return o.str(); + } + void show_tree(std::ostream &o,Edge const& e,bool indent=true,unsigned show_mask=SPAN|RULE,int maxdepth=0x7FFFFFFF,int depth=0) const { + if (depth>maxdepth) return; + if (indent) for(int i=0;i<depth;++i) o<<' '; + o<<'('; + e.show(o,show_mask); + if (indent) o<<'\n'; + for (int i=0;i<e.tail_nodes_.size();++i) { + Edge const *c=viterbi_edge(e.tail_nodes_[i]); + if (!c) continue; + assert(c); // leaf? + show_tree(o,*c,indent,show_mask,maxdepth,depth+1); + if (!indent) o<<' '; + } + if (indent) for(int i=0;i<depth;++i) o<<' '; + o<<")"; + if (indent) o<<"\n"; + } + // returns edge with rule_.IsGoal, returns 0 if none found. otherwise gives best edge_prob_ - note: I don't think edge_prob_ is viterbi cumulative, so this would just be the best local probability. Edge const* ViterbiGoalEdge() const; diff --git a/decoder/oracle_bleu.h b/decoder/oracle_bleu.h index 4dc86bc7..4a2cbbe5 100755 --- a/decoder/oracle_bleu.h +++ b/decoder/oracle_bleu.h @@ -94,6 +94,7 @@ struct OracleBleu { ("references,R", value<Refs >(&refs), "Translation reference files") ("oracle_loss", value<string>(&loss_name)->default_value("IBM_BLEU_3"), "IBM_BLEU_3 (default), IBM_BLEU etc") ("bleu_weight", value<double>(&bleu_weight)->default_value(1.), "weight to give the hope/fear loss function vs. model score") + ("show_derivation", bool_switch(&show_derivation), "show derivation tree in kbest") ("verbose",bool_switch(&verbose),"detailed logs") ; } @@ -248,46 +249,48 @@ struct OracleBleu { // dest_forest->SortInEdgesByEdgeWeights(); } -// TODO decoder output should probably be moved to another file - how about oracle_bleu.h - void DumpKBest(const int sent_id, const Hypergraph& forest, const int k, const bool unique, std::string const &kbest_out_filename_) { + bool show_derivation; + template <class Filter> + void kbest(int sent_id,Hypergraph const& forest,int k,std::ostream &kbest_out=std::cout,std::ostream &deriv_out=std::cerr) { using namespace std; using namespace boost; - cerr << "In kbest\n"; - - ofstream kbest_out; - kbest_out.open(kbest_out_filename_.c_str()); - cerr << "Output kbest to " << kbest_out_filename_; - + typedef KBest::KBestDerivations<Sentence, ESentenceTraversal,Filter> K; + K kbest(forest,k); //add length (f side) src length of this sentence to the psuedo-doc src length count float curr_src_length = doc_src_length + tmp_src_length; - - if (unique) { - KBest::KBestDerivations<Sentence, ESentenceTraversal, KBest::FilterUnique> kbest(forest, k); - for (int i = 0; i < k; ++i) { - const KBest::KBestDerivations<Sentence, ESentenceTraversal, KBest::FilterUnique>::Derivation* d = - kbest.LazyKthBest(forest.nodes_.size() - 1, i); - if (!d) break; - //calculate score in context of psuedo-doc + for (int i = 0; i < k; ++i) { + typename K::Derivation *d = kbest.LazyKthBest(forest.nodes_.size() - 1, i); + if (!d) break; + kbest_out << sent_id << " ||| " << TD::GetString(d->yield) << " ||| " + << d->feature_values << " ||| " << log(d->score); + if (!refs.empty()) { ScoreP sentscore = GetScore(d->yield,sent_id); sentscore->PlusEquals(*doc_score,float(1)); float bleu = curr_src_length * sentscore->ComputeScore(); - kbest_out << sent_id << " ||| " << TD::GetString(d->yield) << " ||| " - << d->feature_values << " ||| " << log(d->score) << " ||| " << bleu << endl; - // cout << sent_id << " ||| " << TD::GetString(d->yield) << " ||| " - // << d->feature_values << " ||| " << log(d->score) << endl; + kbest_out << " ||| " << bleu; } - } else { - KBest::KBestDerivations<Sentence, ESentenceTraversal> kbest(forest, k); - for (int i = 0; i < k; ++i) { - const KBest::KBestDerivations<Sentence, ESentenceTraversal>::Derivation* d = - kbest.LazyKthBest(forest.nodes_.size() - 1, i); - if (!d) break; - cout << sent_id << " ||| " << TD::GetString(d->yield) << " ||| " - << d->feature_values << " ||| " << log(d->score) << endl; + kbest_out<<endl<<flush; + if (show_derivation) { + deriv_out<<"\nsent_id="<<sent_id<<"\n"; + forest.show_tree(cerr,*d->edge); + deriv_out<<flush; } } } +// TODO decoder output should probably be moved to another file - how about oracle_bleu.h + void DumpKBest(const int sent_id, const Hypergraph& forest, const int k, const bool unique, std::string const &kbest_out_filename_) { + + WriteFile ko(kbest_out_filename_); + std::cerr << "Output kbest to " << kbest_out_filename_; + + if (unique) + kbest<KBest::NoFilter>(sent_id,forest,k,ko.get(),std::cerr); + else { + kbest<KBest::FilterUnique>(sent_id,forest,k,ko.get(),std::cerr); + } + } + void DumpKBest(std::string const& suffix,const int sent_id, const Hypergraph& forest, const int k, const bool unique, std::string const& forest_output) { std::ostringstream kbest_string_stream; diff --git a/decoder/viterbi.cc b/decoder/viterbi.cc index 7719de32..d0b7e6ec 100644 --- a/decoder/viterbi.cc +++ b/decoder/viterbi.cc @@ -6,7 +6,7 @@ using namespace std; -std::string viterbi_stats(Hypergraph const& hg, std::string const& name, bool estring, bool etree) +std::string viterbi_stats(Hypergraph const& hg, std::string const& name, bool estring, bool etree,bool show_derivation) { ostringstream o; o << hg.stats(name); @@ -19,6 +19,17 @@ std::string viterbi_stats(Hypergraph const& hg, std::string const& name, bool es if (etree) { o<<name<<" tree: "<<ViterbiETree(hg)<<endl; } + if (show_derivation) { + o<<name<<" derivation: "; + ViterbiPathTraversal::Result d; + Viterbi<ViterbiPathTraversal>(hg, &d); + if (d.empty()) + o<<"(empty viterbi hyperpath - no translation)"; + else + hg.show_tree(o,*d.back(),false); // last item should be goal (or at least depend on prev items). TODO: this doesn't actually reorder the nodes in hg. + o<<endl; + } + return o.str(); } diff --git a/decoder/viterbi.h b/decoder/viterbi.h index 388bff3c..ea2bc6c2 100644 --- a/decoder/viterbi.h +++ b/decoder/viterbi.h @@ -6,7 +6,7 @@ #include "hg.h" #include "tdict.h" -std::string viterbi_stats(Hypergraph const& hg, std::string const& name="forest", bool estring=true, bool etree=false); +std::string viterbi_stats(Hypergraph const& hg, std::string const& name="forest", bool estring=true, bool etree=false, bool derivation_tree=false); /// computes for each hg node the best (according to WeightType/WeightFunction) derivation, and some homomorphism (bottom up expression tree applied through Traversal) of it. T is the "return type" of Traversal, which is called only once for the best edge for a node's result (i.e. result will start default constructed) //TODO: make T a typename inside Traversal and WeightType a typename inside WeightFunction? |