From 786b689a486413b4ea31841eb352ed728621e4b1 Mon Sep 17 00:00:00 2001 From: graehl Date: Sun, 25 Jul 2010 02:52:58 +0000 Subject: cleaned up kbest, new USE_INFO_EDGE 1 logs per edge, --show_derivation (needs work; handle kbest deriv, viterbi deriv, sort hg exposing viterbi?) git-svn-id: https://ws10smt.googlecode.com/svn/trunk@405 ec762483-ff6d-05da-a07a-a48fb63a330f --- decoder/cdec.cc | 18 +++++------ decoder/ff_factory.h | 9 +++++- decoder/ff_lm_fsa.h | 2 ++ decoder/ff_sample_fsa.h | 20 ++++++------ decoder/hg.h | 81 +++++++++++++++++++++++++++++++++++++++++++++++++ decoder/oracle_bleu.h | 59 ++++++++++++++++++----------------- decoder/viterbi.cc | 13 +++++++- decoder/viterbi.h | 2 +- 8 files changed, 154 insertions(+), 50 deletions(-) diff --git a/decoder/cdec.cc b/decoder/cdec.cc index f366a08f..9110a234 100644 --- a/decoder/cdec.cc +++ b/decoder/cdec.cc @@ -313,8 +313,8 @@ bool prelm_weights_string(po::variables_map const& conf,string &s) } -void forest_stats(Hypergraph &forest,string name,bool show_tree,bool show_features,WeightVector *weights=0) { - cerr << viterbi_stats(forest,name,true,show_tree); +void forest_stats(Hypergraph &forest,string name,bool show_tree,bool show_features,WeightVector *weights=0,bool show_deriv=false) { + cerr << viterbi_stats(forest,name,true,show_tree,show_deriv); if (show_features) { cerr << name<<" features: "; /* Hypergraph::Edge const* best=forest.ViterbiGoalEdge(); @@ -328,9 +328,9 @@ void forest_stats(Hypergraph &forest,string name,bool show_tree,bool show_featur } } -void forest_stats(Hypergraph &forest,string name,bool show_tree,bool show_features,DenseWeightVector const& feature_weights) { +void forest_stats(Hypergraph &forest,string name,bool show_tree,bool show_features,DenseWeightVector const& feature_weights,bool sd=false) { WeightVector fw(feature_weights); - forest_stats(forest,name,show_tree,show_features,&fw); + forest_stats(forest,name,show_tree,show_features,&fw,sd); } @@ -348,7 +348,7 @@ void maybe_prune(Hypergraph &forest,po::variables_map const& conf,string nbeam,s } forest.PruneInsideOutside(beam_prune,density_prune,pm,false,1,conf["promise_power"].as()); if (!forestname.empty()) forestname=" "+forestname; - forest_stats(forest," Pruned "+forestname+" forest",false,false); + forest_stats(forest," Pruned "+forestname+" forest",false,false,false); cerr << " Pruned "< res = Inside, @@ -586,7 +586,7 @@ int main(int argc, char** argv) { &prelm_forest); forest.swap(prelm_forest); forest.Reweight(prelm_feature_weights); - forest_stats(forest," prelm forest",show_tree_structure,show_features,prelm_feature_weights); + forest_stats(forest," prelm forest",show_tree_structure,show_features,prelm_feature_weights,oracle.show_derivation); } maybe_prune(forest,conf,"prelm_beam_prune","prelm_density_prune","-LM",srclen); @@ -605,7 +605,7 @@ int main(int argc, char** argv) { &lm_forest); forest.swap(lm_forest); forest.Reweight(feature_weights); - forest_stats(forest," +LM forest",show_tree_structure,show_features,feature_weights); + forest_stats(forest," +LM forest",show_tree_structure,show_features,feature_weights,oracle.show_derivation); } maybe_prune(forest,conf,"beam_prune","density_prune","+LM",srclen); @@ -650,7 +650,7 @@ int main(int argc, char** argv) { } else { if (kbest) { //TODO: does this work properly? - oracle.DumpKBest(sent_id, forest, conf["k_best"].as(), unique_kbest,""); + oracle.DumpKBest(sent_id, forest, conf["k_best"].as(), unique_kbest,"-"); } else if (csplit_output_plf) { cout << HypergraphIO::AsPLF(forest, false) << endl; } else { diff --git a/decoder/ff_factory.h b/decoder/ff_factory.h index 12e768aa..93681c5e 100644 --- a/decoder/ff_factory.h +++ b/decoder/ff_factory.h @@ -1,7 +1,14 @@ #ifndef _FF_FACTORY_H_ #define _FF_FACTORY_H_ -//TODO: use http://www.boost.org/doc/libs/1_43_0/libs/functional/factory/doc/html/index.html? +/*TODO: register state identity separately from feature function identity? as + * in: string registry for name of state somewhere, assert that same result is + * computed by all users? or, we can just require that ff sharing same state + * all be mashed into a single ffunc, which can just emit all the fid scores at + * once. that's fine. + */ + +//TODO: use http://www.boost.org/doc/libs/1_43_0/libs/functional/factory/doc/html/index.html ? #include #include diff --git a/decoder/ff_lm_fsa.h b/decoder/ff_lm_fsa.h index 344cd992..01b3764e 100755 --- a/decoder/ff_lm_fsa.h +++ b/decoder/ff_lm_fsa.h @@ -1,6 +1,8 @@ #ifndef FF_LM_FSA_H #define FF_LM_FSA_H +//TODO: use SRI LM::contextBOW, LM::contextID to shorten state + #include "ff_lm.h" #include "ff_from_fsa.h" diff --git a/decoder/ff_sample_fsa.h b/decoder/ff_sample_fsa.h index 74d9e7b5..2aeaa6de 100755 --- a/decoder/ff_sample_fsa.h +++ b/decoder/ff_sample_fsa.h @@ -3,11 +3,11 @@ #include "ff_from_fsa.h" -// example: feature val = -1 * # of target words +// example: feature val = 1 * # of target words struct WordPenaltyFsa : public FsaFeatureFunctionBase { static std::string usage(bool param,bool verbose) { return FeatureFunction::usage_helper( - "WordPenaltyFsa","","-1 per target word" + "WordPenaltyFsa","","1 per target word" ,param,verbose); } @@ -21,7 +21,7 @@ struct WordPenaltyFsa : public FsaFeatureFunctionBase { } // move from state to next_state after seeing word x, while emitting features->add_value(fid,val) possibly with duplicates. state and next_state may be same memory. Featval Scan1(WordID w,void const* state,void *next_state) const { - return -1; + return 1; } }; @@ -35,7 +35,7 @@ struct LongerThanPrev : public FsaFeatureFunctionBase { return FeatureFunction::usage_helper( "LongerThanPrev", "", - "stupid example stateful (bigram) feature: -1 per target word that's longer than the previous word ( sentence begin considered 3 chars long, is sentence end.)", + "stupid example stateful (bigram) feature: 1 per target word that's longer than the previous word ( sentence begin considered 3 chars long, is sentence end.)", param,verbose); } @@ -49,7 +49,7 @@ struct LongerThanPrev : public FsaFeatureFunctionBase { return std::strlen(TD::Convert(w)); } int markov_order() const { return 1; } - LongerThanPrev(std::string const& param) : Base(sizeof(int),singleton_sentence(TD::se)) { + LongerThanPrev(std::string const& param) : Base(sizeof(int)/* ,singleton_sentence(TD::se) */) { Init(); if (0) { // all this is done in constructor already set_state_bytes(sizeof(int)); @@ -61,7 +61,7 @@ struct LongerThanPrev : public FsaFeatureFunctionBase { to_state(h_start.begin(),&ss,1); } - state(start.begin())=3; + state(start.begin())=999999; state(h_start.begin())=4; // estimate: anything >4 chars is usually longer than previous } @@ -70,7 +70,7 @@ struct LongerThanPrev : public FsaFeatureFunctionBase { int prevlen=state(from); int len=wordlen(w); state(next_state)=len; - return len>prevlen ? -1 : 0; + return len>prevlen ? 1 : 0; } }; @@ -82,7 +82,7 @@ struct ShorterThanPrev : FsaTypedBase { return FeatureFunction::usage_helper( "ShorterThanPrev", "", - "stupid example stateful (bigram) feature: -1 per target word that's shorter than the previous word (end of sentence considered '')", + "stupid example stateful (bigram) feature: 1 per target word that's shorter than the previous word (end of sentence considered '')", param,verbose); } @@ -90,7 +90,7 @@ struct ShorterThanPrev : FsaTypedBase { return std::strlen(TD::Convert(w)); } ShorterThanPrev(std::string const& param) - : Base(3,4,singleton_sentence(TD::se)) + : Base(-1,4/* ,singleton_sentence(TD::se) */) // start, h_start, end_phrase // estimate: anything <4 chars is usually shorter than previous { @@ -106,7 +106,7 @@ struct ShorterThanPrev : FsaTypedBase { void ScanT(SentenceMetadata const& /* smeta */,const Hypergraph::Edge& /* edge */,WordID w,int prevlen,int &len,FeatureVector *features) const { len=wordlen(w); if (lenadd_value(fid_,-1); + features->add_value(fid_,1); } }; diff --git a/decoder/hg.h b/decoder/hg.h index b3bfd19c..34638e04 100644 --- a/decoder/hg.h +++ b/decoder/hg.h @@ -1,12 +1,24 @@ #ifndef _HG_H_ #define _HG_H_ +#define USE_INFO_EDGE 1 +#if USE_INFO_EDGE +# include +# define INFO_EDGE(e,msg) do { std::ostringstream &o=e.info_;o< #include #include "feature_vector.h" #include "small_vector.h" #include "wordid.h" +#include "tdict.h" #include "trule.h" #include "prob.h" @@ -23,6 +35,7 @@ class Hypergraph { // SmallVector is a fast, small vector implementation for sizes <= 2 typedef SmallVectorInt TailNodeVector; + typedef std::vector EdgesVector; // TODO get rid of cat_? // TODO keep cat_ and add span and/or state? :) @@ -59,8 +72,76 @@ class Hypergraph { short int j_; short int prev_i_; short int prev_j_; +#if USE_INFO_EDGE + private: + std::ostringstream info_; + public: + Edge(Edge const& o) : head_node_(o.head_node_),tail_nodes_(o.tail_nodes_),rule_(o.rule_),feature_values_(o.feature_values_),edge_prob_(o.edge_prob_),id_(o.id_),i_(o.i_),j_(o.j_),prev_i_(o.prev_i_),prev_j_(o.prev_j_), info_(o.info_.str()) { } + void operator=(Edge const& o) { + head_node_ = o.head_node_; tail_nodes_ = o.tail_nodes_; rule_ = o.rule_; feature_values_ = o.feature_values_; edge_prob_ = o.edge_prob_; id_ = o.id_; i_ = o.i_; j_ = o.j_; prev_i_ = o.prev_i_; prev_j_ = o.prev_j_; info_.str(o.info_.str()); + } + std::string info() const { return info_.str(); } +#else + std::string info() const { return std::string(); } +#endif + void show(std::ostream &o,unsigned mask=SPAN|RULE) const { + o<<'{'; + if (mask&CATEGORY) + o<GetLHS()); + if (mask&PREV_SPAN) + o<<'<'<'; + if (mask&SPAN) + o<<'<'<'; + if (mask&PROB) + o<<" p="<AsString(mask&RULE_LHS); + if (USE_INFO_EDGE) { + if (mask) o << ' '; + o<maxdepth) return; + if (indent) for(int i=0;i(&refs), "Translation reference files") ("oracle_loss", value(&loss_name)->default_value("IBM_BLEU_3"), "IBM_BLEU_3 (default), IBM_BLEU etc") ("bleu_weight", value(&bleu_weight)->default_value(1.), "weight to give the hope/fear loss function vs. model score") + ("show_derivation", bool_switch(&show_derivation), "show derivation tree in kbest") ("verbose",bool_switch(&verbose),"detailed logs") ; } @@ -248,46 +249,48 @@ struct OracleBleu { // dest_forest->SortInEdgesByEdgeWeights(); } -// TODO decoder output should probably be moved to another file - how about oracle_bleu.h - void DumpKBest(const int sent_id, const Hypergraph& forest, const int k, const bool unique, std::string const &kbest_out_filename_) { + bool show_derivation; + template + void kbest(int sent_id,Hypergraph const& forest,int k,std::ostream &kbest_out=std::cout,std::ostream &deriv_out=std::cerr) { using namespace std; using namespace boost; - cerr << "In kbest\n"; - - ofstream kbest_out; - kbest_out.open(kbest_out_filename_.c_str()); - cerr << "Output kbest to " << kbest_out_filename_; - + typedef KBest::KBestDerivations K; + K kbest(forest,k); //add length (f side) src length of this sentence to the psuedo-doc src length count float curr_src_length = doc_src_length + tmp_src_length; - - if (unique) { - KBest::KBestDerivations kbest(forest, k); - for (int i = 0; i < k; ++i) { - const KBest::KBestDerivations::Derivation* d = - kbest.LazyKthBest(forest.nodes_.size() - 1, i); - if (!d) break; - //calculate score in context of psuedo-doc + for (int i = 0; i < k; ++i) { + typename K::Derivation *d = kbest.LazyKthBest(forest.nodes_.size() - 1, i); + if (!d) break; + kbest_out << sent_id << " ||| " << TD::GetString(d->yield) << " ||| " + << d->feature_values << " ||| " << log(d->score); + if (!refs.empty()) { ScoreP sentscore = GetScore(d->yield,sent_id); sentscore->PlusEquals(*doc_score,float(1)); float bleu = curr_src_length * sentscore->ComputeScore(); - kbest_out << sent_id << " ||| " << TD::GetString(d->yield) << " ||| " - << d->feature_values << " ||| " << log(d->score) << " ||| " << bleu << endl; - // cout << sent_id << " ||| " << TD::GetString(d->yield) << " ||| " - // << d->feature_values << " ||| " << log(d->score) << endl; + kbest_out << " ||| " << bleu; } - } else { - KBest::KBestDerivations kbest(forest, k); - for (int i = 0; i < k; ++i) { - const KBest::KBestDerivations::Derivation* d = - kbest.LazyKthBest(forest.nodes_.size() - 1, i); - if (!d) break; - cout << sent_id << " ||| " << TD::GetString(d->yield) << " ||| " - << d->feature_values << " ||| " << log(d->score) << endl; + kbest_out<edge); + deriv_out<(sent_id,forest,k,ko.get(),std::cerr); + else { + kbest(sent_id,forest,k,ko.get(),std::cerr); + } + } + void DumpKBest(std::string const& suffix,const int sent_id, const Hypergraph& forest, const int k, const bool unique, std::string const& forest_output) { std::ostringstream kbest_string_stream; diff --git a/decoder/viterbi.cc b/decoder/viterbi.cc index 7719de32..d0b7e6ec 100644 --- a/decoder/viterbi.cc +++ b/decoder/viterbi.cc @@ -6,7 +6,7 @@ using namespace std; -std::string viterbi_stats(Hypergraph const& hg, std::string const& name, bool estring, bool etree) +std::string viterbi_stats(Hypergraph const& hg, std::string const& name, bool estring, bool etree,bool show_derivation) { ostringstream o; o << hg.stats(name); @@ -19,6 +19,17 @@ std::string viterbi_stats(Hypergraph const& hg, std::string const& name, bool es if (etree) { o<(hg, &d); + if (d.empty()) + o<<"(empty viterbi hyperpath - no translation)"; + else + hg.show_tree(o,*d.back(),false); // last item should be goal (or at least depend on prev items). TODO: this doesn't actually reorder the nodes in hg. + o<