From 337b647baac15609a0a493902d58c473d25d2ed8 Mon Sep 17 00:00:00 2001 From: graehl Date: Thu, 8 Jul 2010 16:44:20 +0000 Subject: --show_features git-svn-id: https://ws10smt.googlecode.com/svn/trunk@184 ec762483-ff6d-05da-a07a-a48fb63a330f --- decoder/cdec.cc | 29 +++++++++++--- decoder/hg.cc | 18 +++++++-- decoder/hg.h | 8 ++++ decoder/logval.h | 6 ++- decoder/sparse_vector.h | 39 +++++++++--------- decoder/trule.cc | 5 +++ decoder/trule.h | 4 +- decoder/viterbi.cc | 64 +++++++++++++++++++++++++----- decoder/viterbi.h | 102 ++++++++++++++++++++++++++++++++++++++---------- 9 files changed, 216 insertions(+), 59 deletions(-) diff --git a/decoder/cdec.cc b/decoder/cdec.cc index b43b7826..85412d6a 100644 --- a/decoder/cdec.cc +++ b/decoder/cdec.cc @@ -86,7 +86,7 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) { ("show_expected_length", "Show the expected translation length under the model") ("show_partition,z", "Compute and show the partition (inside score)") ("show_cfg_search_space", "Show the search space as a CFG") - ("show_viterbi_features","Show the feature vector for the viterbi translation") + ("show_features","Show the feature vector for the viterbi translation") ("prelm_density_prune", po::value(), "Applied to -LM forest just before final LM rescoring: keep no more than this many times the number of edges used in the best derivation tree (>=1.0)") ("density_prune", po::value(), "Keep no more than this many times the number of edges used in the best derivation tree (>=1.0)") ("prelm_beam_prune", po::value(), "Prune paths from -LM forest before LM rescoring, keeping paths within exp(alpha>=0)") @@ -266,6 +266,23 @@ bool prelm_weights_string(po::variables_map const& conf,string &s) return false; } + + +void forest_stats(Hypergraph &forest,string name,bool show_tree,bool show_features,FeatureWeights *weights=0) { + cerr << viterbi_stats(forest,name,true,show_tree); + if (show_features) { + cerr << name<<" features: "; +/* Hypergraph::Edge const* best=forest.ViterbiGoalEdge(); + if (!best) + cerr << name<<" has no goal edge."; + else + cerr<feature_values_; +*/ + cerr << ViterbiFeatures(forest,weights); + cerr << endl; + } +} + void maybe_prune(Hypergraph &forest,po::variables_map const& conf,string nbeam,string ndensity,string forestname,double srclen) { double beam_prune,density_prune; bool use_beam_prune=beam_param(conf,nbeam,&beam_prune,conf.count("scale_prune_srclen"),srclen); @@ -283,12 +300,13 @@ void maybe_prune(Hypergraph &forest,po::variables_map const& conf,string nbeam,s if (use_density_prune) forest.DensityPruneInsideOutside(1.0 ,false, density_prune, pm); if (!forestname.empty()) forestname=" "+forestname; - cerr << viterbi_stats(forest," Pruned "+forestname+" forest",false,false); + forest_stats(forest," Pruned "+forestname+" forest",false,false); cerr << " Pruned "< res = Inside, @@ -498,7 +517,7 @@ int main(int argc, char** argv) { &prelm_forest); forest.swap(prelm_forest); forest.Reweight(prelm_feature_weights); - cerr << viterbi_stats(forest," prelm forest",true,show_tree_structure); + forest_stats(forest," prelm forest",show_tree_structure,show_features); } maybe_prune(forest,conf,"prelm_beam_prune","prelm_density_prune","-LM",srclen); @@ -517,7 +536,7 @@ int main(int argc, char** argv) { &lm_forest); forest.swap(lm_forest); forest.Reweight(feature_weights); - cerr << viterbi_stats(forest," +LM forest",true,show_tree_structure); + forest_stats(forest," +LM forest",show_tree_structure,show_features); } maybe_prune(forest,conf,"beam_prune","density_prune","+LM",srclen); diff --git a/decoder/hg.cc b/decoder/hg.cc index 70831d3d..41b954df 100644 --- a/decoder/hg.cc +++ b/decoder/hg.cc @@ -16,6 +16,17 @@ using namespace std; +Hypergraph::Edge const* Hypergraph::ViterbiGoalEdge() const +{ + Edge const* r=0; + for (unsigned i=0,e=edges_.size();iIsGoal() && (!r || e.edge_prob_ > r->edge_prob_)) + r=&e; + } + return r; +} + std::string Hypergraph::stats(std::string const& name) const { ostringstream o; @@ -568,12 +579,13 @@ void Hypergraph::SortInEdgesByEdgeWeights() { } Hypergraph* Hypergraph::CreateViterbiHypergraph(const vector* edges) const { - vector vit_edges; + typedef ViterbiPathTraversal::Result VE; + VE vit_edges; if (edges) { assert(edges->size() == edges_.size()); - Viterbi, ViterbiPathTraversal, prob_t, EdgeSelectEdgeWeightFunction>(*this, &vit_edges, ViterbiPathTraversal(), EdgeSelectEdgeWeightFunction(*edges)); + Viterbi(*this, &vit_edges, ViterbiPathTraversal(), EdgeSelectEdgeWeightFunction(*edges)); } else { - Viterbi, ViterbiPathTraversal, prob_t, EdgeProb>(*this, &vit_edges); + Viterbi(*this, &vit_edges, ViterbiPathTraversal() ,EdgeProb()); } map old2new_node; int num_new_nodes = 0; diff --git a/decoder/hg.h b/decoder/hg.h index ab90650c..0f8d21fd 100644 --- a/decoder/hg.h +++ b/decoder/hg.h @@ -59,6 +59,9 @@ class Hypergraph { short int prev_j_; }; + // returns edge with rule_.IsGoal, returns 0 if none found. otherwise gives best edge_prob_ - note: I don't think edge_prob_ is viterbi cumulative, so this would just be the best local probability. + Edge const* ViterbiGoalEdge() const; + void swap(Hypergraph& other) { other.nodes_.swap(nodes_); std::swap(is_linear_chain_, other.is_linear_chain_); @@ -218,10 +221,12 @@ class Hypergraph { // common WeightFunctions, map an edge -> WeightType // for generic Viterbi/Inside algorithms struct EdgeProb { + typedef prob_t Weight; inline const prob_t& operator()(const Hypergraph::Edge& e) const { return e.edge_prob_; } }; struct EdgeSelectEdgeWeightFunction { + typedef prob_t Weight; typedef std::vector EdgeMask; EdgeSelectEdgeWeightFunction(const EdgeMask& v) : v_(v) {} inline prob_t operator()(const Hypergraph::Edge& e) const { @@ -236,10 +241,12 @@ struct ScaledEdgeProb { ScaledEdgeProb(const double& alpha) : alpha_(alpha) {} inline prob_t operator()(const Hypergraph::Edge& e) const { return e.edge_prob_.pow(alpha_); } const double alpha_; + typedef prob_t Weight; }; // see Li (2010), Section 3.2.2-- this is 'x_e = p_e*r_e' struct EdgeFeaturesAndProbWeightFunction { + typedef const SparseVector Weight; inline const SparseVector operator()(const Hypergraph::Edge& e) const { SparseVector res; for (SparseVector::const_iterator it = e.feature_values_.begin(); @@ -250,6 +257,7 @@ struct EdgeFeaturesAndProbWeightFunction { }; struct TransitionCountWeightFunction { + typedef double Weight; inline double operator()(const Hypergraph::Edge& e) const { (void)e; return 1.0; } }; diff --git a/decoder/logval.h b/decoder/logval.h index 457818e7..9aaba557 100644 --- a/decoder/logval.h +++ b/decoder/logval.h @@ -1,7 +1,7 @@ #ifndef LOGVAL_H_ #define LOGVAL_H_ -#define LOGVAL_CHECK_NEG_POW false +#define LOGVAL_CHECK_NEG false #include #include @@ -59,7 +59,7 @@ class LogVal { } LogVal& poweq(const T& power) { -#if LOGVAL_CHECK_NEG_POW +#if LOGVAL_CHECK_NEG if (s_) { std::cerr << "poweq(T) not implemented when s_ is true\n"; std::abort(); @@ -116,7 +116,9 @@ LogVal operator-(LogVal o1, const LogVal& o2) { template T log(const LogVal& o) { +#ifdef LOGVAL_CHECK_NEG if (o.s_) return log(-1.0); +#endif return o.v_; } diff --git a/decoder/sparse_vector.h b/decoder/sparse_vector.h index 896e8c43..26f5d791 100644 --- a/decoder/sparse_vector.h +++ b/decoder/sparse_vector.h @@ -45,7 +45,7 @@ public: void store(std::valarray* target) const { (*target) *= 0; - for (typename MapType::const_iterator + for (typename MapType::const_iterator it = values_.begin(); it != values_.end(); ++it) { if (it->first >= target->size()) break; (*target)[it->first] = it->second; @@ -63,7 +63,7 @@ public: // as the sparse vector T dot() const { T sum = 0; - for (typename MapType::const_iterator + for (typename MapType::const_iterator it = values_.begin(); it != values_.end(); ++it) sum += it->second; return sum; @@ -72,21 +72,21 @@ public: template S dot(const SparseVector &vec) const { S sum = 0; - for (typename MapType::const_iterator + for (typename MapType::const_iterator it = values_.begin(); it != values_.end(); ++it) { - typename MapType::const_iterator + typename MapType::const_iterator found = vec.values_.find(it->first); if (found != vec.values_.end()) sum += it->second * found->second; } return sum; } - + template S dot(const std::vector &vec) const { S sum = 0; - for (typename MapType::const_iterator + for (typename MapType::const_iterator it = values_.begin(); it != values_.end(); ++it) { if (it->first < static_cast(vec.size())) @@ -99,7 +99,7 @@ public: S dot(const S *vec) const { // this is not range checked! S sum = 0; - for (typename MapType::const_iterator + for (typename MapType::const_iterator it = values_.begin(); it != values_.end(); ++it) sum += it->second * vec[it->first]; std::cout << "dot(*vec) " << sum << std::endl; @@ -108,22 +108,22 @@ public: T l1norm() const { T sum = 0; - for (typename MapType::const_iterator + for (typename MapType::const_iterator it = values_.begin(); it != values_.end(); ++it) sum += fabs(it->second); return sum; } - + T l2norm() const { T sum = 0; - for (typename MapType::const_iterator + for (typename MapType::const_iterator it = values_.begin(); it != values_.end(); ++it) sum += it->second * it->second; return sqrt(sum); } - + SparseVector &operator+=(const SparseVector &other) { - for (typename MapType::const_iterator + for (typename MapType::const_iterator it = other.values_.begin(); it != other.values_.end(); ++it) { T v = (values_[it->first] += it->second); @@ -134,7 +134,7 @@ public: } SparseVector &operator-=(const SparseVector &other) { - for (typename MapType::const_iterator + for (typename MapType::const_iterator it = other.values_.begin(); it != other.values_.end(); ++it) { T v = (values_[it->first] -= it->second); @@ -145,28 +145,28 @@ public: } SparseVector &operator-=(const double &x) { - for (typename MapType::iterator + for (typename MapType::iterator it = values_.begin(); it != values_.end(); ++it) it->second -= x; return *this; } SparseVector &operator+=(const double &x) { - for (typename MapType::iterator + for (typename MapType::iterator it = values_.begin(); it != values_.end(); ++it) it->second += x; return *this; } SparseVector &operator/=(const T &x) { - for (typename MapType::iterator + for (typename MapType::iterator it = values_.begin(); it != values_.end(); ++it) it->second /= x; return *this; } SparseVector &operator*=(const T& x) { - for (typename MapType::iterator + for (typename MapType::iterator it = values_.begin(); it != values_.end(); ++it) it->second *= x; return *this; @@ -194,7 +194,7 @@ public: void Write(const bool with_semi, std::ostream* os) const { bool first = true; - for (typename MapType::const_iterator + for (typename MapType::const_iterator it = values_.begin(); it != values_.end(); ++it) { // by definition feature id 0 is a dummy value if (it->first == 0) continue; @@ -244,6 +244,9 @@ private: MapType values_; }; +typedef SparseVector FeatureVector; +typedef std::vector FeatureWeights; + template SparseVector operator+(const SparseVector& a, const SparseVector& b) { SparseVector result = a; diff --git a/decoder/trule.cc b/decoder/trule.cc index 505839c7..170e3a95 100644 --- a/decoder/trule.cc +++ b/decoder/trule.cc @@ -7,6 +7,11 @@ using namespace std; +bool TRule::IsGoal() const { + static const int kGOAL(TD::Convert("Goal") * -1); // this will happen once, and after static init of trule.cc static dict. + return GetLHS() == kGOAL; +} + static WordID ConvertTrgString(const string& w) { int len = w.size(); WordID id = 0; diff --git a/decoder/trule.h b/decoder/trule.h index 7fb92924..defdbeb9 100644 --- a/decoder/trule.h +++ b/decoder/trule.h @@ -28,6 +28,8 @@ class TRule { scores_.set_value(feat_ids[i], feat_vals[i]); } + bool IsGoal() const; + explicit TRule(const std::vector& e) : e_(e), lhs_(0), prev_i(-1), prev_j(-1) {} TRule(const std::vector& e, const std::vector& f, const WordID& lhs) : e_(e), f_(f), lhs_(lhs), prev_i(-1), prev_j(-1) {} @@ -126,7 +128,7 @@ class TRule { std::vector f_; WordID lhs_; SparseVector scores_; - + char arity_; TRulePtr parent_rule_; // usually NULL, except when doing constrained decoding diff --git a/decoder/viterbi.cc b/decoder/viterbi.cc index 7f52d08c..6a7a970b 100644 --- a/decoder/viterbi.cc +++ b/decoder/viterbi.cc @@ -25,33 +25,33 @@ std::string viterbi_stats(Hypergraph const& hg, std::string const& name, bool es string ViterbiETree(const Hypergraph& hg) { vector tmp; - const prob_t p = Viterbi, ETreeTraversal, prob_t, EdgeProb>(hg, &tmp); + Viterbi(hg, &tmp); return TD::GetString(tmp); } string ViterbiFTree(const Hypergraph& hg) { vector tmp; - const prob_t p = Viterbi, FTreeTraversal, prob_t, EdgeProb>(hg, &tmp); + Viterbi(hg, &tmp); return TD::GetString(tmp); } prob_t ViterbiESentence(const Hypergraph& hg, vector* result) { - return Viterbi, ESentenceTraversal, prob_t, EdgeProb>(hg, result); + return Viterbi(hg, result); } prob_t ViterbiFSentence(const Hypergraph& hg, vector* result) { - return Viterbi, FSentenceTraversal, prob_t, EdgeProb>(hg, result); + return Viterbi(hg, result); } int ViterbiELength(const Hypergraph& hg) { int len = -1; - Viterbi(hg, &len); + Viterbi(hg, &len); return len; } int ViterbiPathLength(const Hypergraph& hg) { int len = -1; - Viterbi(hg, &len); + Viterbi(hg, &len); return len; } @@ -61,10 +61,11 @@ struct JoshuaVisTraversal { const std::string left; const std::string space; const std::string right; + typedef std::vector Result; void operator()(const Hypergraph::Edge& edge, - const std::vector*>& ants, - std::vector* result) const { - std::vector tmp; + const std::vector& ants, + Result* result) const { + Result tmp; edge.rule_->ESubstitute(ants, &tmp); const std::string cat = TD::Convert(edge.rule_->GetLHS() * -1); if (cat == "Goal") @@ -81,7 +82,50 @@ struct JoshuaVisTraversal { string JoshuaVisualizationString(const Hypergraph& hg) { vector tmp; - const prob_t p = Viterbi, JoshuaVisTraversal, prob_t, EdgeProb>(hg, &tmp); + Viterbi(hg, &tmp); return TD::GetString(tmp); } + +//TODO: move to appropriate header if useful elsewhere +/* + The simple solution like abs(f1-f2) <= e does not work for very small or very big values. This floating-point comparison algorithm is based on the more confident solution presented by Knuth in [1]. For a given floating point values u and v and a tolerance e: + +| u - v | <= e * |u| and | u - v | <= e * |v| +defines a "very close with tolerance e" relationship between u and v + (1) + +| u - v | <= e * |u| or | u - v | <= e * |v| +defines a "close enough with tolerance e" relationship between u and v + (2) + +Both relationships are commutative but are not transitive. The relationship defined by inequations (1) is stronger that the relationship defined by inequations (2) (i.e. (1) => (2) ). Because of the multiplication in the right side of inequations, that could cause an unwanted underflow condition, the implementation is using modified version of the inequations (1) and (2) where all underflow, overflow conditions could be guarded safely: + +| u - v | / |u| <= e and | u - v | / |v| <= e +| u - v | / |u| <= e or | u - v | / |v| <= e + (1`) +(2`) +*/ +#include +#include +#include +inline bool close_enough(double a,double b,double epsilon) +{ + using std::fabs; + double diff=fabs(a-b); + return diff<=epsilon*fabs(a) || diff<=epsilon*fabs(b); +} + +FeatureVector ViterbiFeatures(Hypergraph const& hg,FeatureWeights const* weights) { + FeatureVector r; + const prob_t p = Viterbi(hg, &r); + if (weights) { + double logp=log(p); + double fv=r.dot(*weights); + const double EPSILON=1e-5; + if (!close_enough(logp,fv,EPSILON)) + throw std::runtime_error("ViterbiFeatures log prob disagrees with features.dot(weights)"+boost::lexical_cast(logp)+"!="+boost::lexical_cast(fv)); + } + return r; +} + diff --git a/decoder/viterbi.h b/decoder/viterbi.h index 6b8bbed1..7e1e2c0e 100644 --- a/decoder/viterbi.h +++ b/decoder/viterbi.h @@ -8,13 +8,21 @@ std::string viterbi_stats(Hypergraph const& hg, std::string const& name="forest", bool estring=true, bool etree=false); -// V must implement: -// void operator()(const vector& ants, T* result); -template -WeightType Viterbi(const Hypergraph& hg, - T* result, - const Traversal& traverse = Traversal(), - const WeightFunction& weight = WeightFunction()) { +/// computes for each hg node the best (according to WeightType/WeightFunction) derivation, and some homomorphism (bottom up expression tree applied through Traversal) of it. T is the "return type" of Traversal, which is called only once for the best edge for a node's result (i.e. result will start default constructed) +//TODO: make T a typename inside Traversal and WeightType a typename inside WeightFunction? +// Traversal must implement: +// typedef T Result; +// void operator()(Hypergraph::Edge const& e,const vector& ants, Result* result) const; +// WeightFunction must implement: +// typedef prob_t Weight; +// Weight operator()(Hypergraph::Edge const& e) const; +template +typename WeightFunction::Weight Viterbi(const Hypergraph& hg, + typename Traversal::Result* result, + const Traversal& traverse, + const WeightFunction& weight) { + typedef typename Traversal::Result T; + typedef typename WeightFunction::Weight WeightType; const int num_nodes = hg.nodes_.size(); std::vector vit_result(num_nodes); std::vector vit_weight(num_nodes, WeightType::Zero()); @@ -51,7 +59,41 @@ WeightType Viterbi(const Hypergraph& hg, return vit_weight.back(); } + +/* +template +typename WeightFunction::Weight Viterbi(const Hypergraph& hg, + typename Traversal::Result* result) +{ + Traversal traverse; + WeightFunction weight; + return Viterbi(hg,result,traverse,weight); +} + +template +typename WeightFunction::Weight Viterbi(const Hypergraph& hg, + typename Traversal::Result* result, + Traversal const& traverse=Traversal() + ) +{ + WeightFunction weight; + return Viterbi(hg,result,traverse,weight); +} +*/ + +//spec for EdgeProb +template +prob_t Viterbi(const Hypergraph& hg, + typename Traversal::Result* result, + Traversal const& traverse=Traversal() + ) +{ + EdgeProb weight; + return Viterbi(hg,result,traverse,weight); +} + struct PathLengthTraversal { + typedef int Result; void operator()(const Hypergraph::Edge& edge, const std::vector& ants, int* result) const { @@ -62,14 +104,16 @@ struct PathLengthTraversal { }; struct ESentenceTraversal { + typedef std::vector Result; void operator()(const Hypergraph::Edge& edge, - const std::vector*>& ants, - std::vector* result) const { + const std::vector& ants, + Result* result) const { edge.rule_->ESubstitute(ants, result); } }; struct ELengthTraversal { + typedef int Result; void operator()(const Hypergraph::Edge& edge, const std::vector& ants, int* result) const { @@ -79,9 +123,10 @@ struct ELengthTraversal { }; struct FSentenceTraversal { + typedef std::vector Result; void operator()(const Hypergraph::Edge& edge, - const std::vector*>& ants, - std::vector* result) const { + const std::vector& ants, + Result* result) const { edge.rule_->FSubstitute(ants, result); } }; @@ -92,10 +137,11 @@ struct ETreeTraversal { const std::string left; const std::string space; const std::string right; + typedef std::vector Result; void operator()(const Hypergraph::Edge& edge, - const std::vector*>& ants, - std::vector* result) const { - std::vector tmp; + const std::vector& ants, + Result* result) const { + Result tmp; edge.rule_->ESubstitute(ants, &tmp); const std::string cat = TD::Convert(edge.rule_->GetLHS() * -1); if (cat == "Goal") @@ -111,10 +157,11 @@ struct FTreeTraversal { const std::string left; const std::string space; const std::string right; + typedef std::vector Result; void operator()(const Hypergraph::Edge& edge, - const std::vector*>& ants, - std::vector* result) const { - std::vector tmp; + const std::vector& ants, + Result* result) const { + Result tmp; edge.rule_->FSubstitute(ants, &tmp); const std::string cat = TD::Convert(edge.rule_->GetLHS() * -1); if (cat == "Goal") @@ -126,10 +173,10 @@ struct FTreeTraversal { }; struct ViterbiPathTraversal { + typedef std::vector Result; void operator()(const Hypergraph::Edge& edge, - const std::vector* >& ants, - std::vector* result) const { - result->clear(); + std::vector const& ants, + Result* result) const { for (int i = 0; i < ants.size(); ++i) for (int j = 0; j < ants[i]->size(); ++j) result->push_back((*ants[i])[j]); @@ -137,6 +184,18 @@ struct ViterbiPathTraversal { } }; +struct FeatureVectorTraversal { + typedef FeatureVector Result; + void operator()(Hypergraph::Edge const& edge, + std::vector const& ants, + Result* result) const { + for (int i = 0; i < ants.size(); ++i) + *result+=*ants[i]; + *result+=edge.feature_values_; + } +}; + + std::string JoshuaVisualizationString(const Hypergraph& hg); prob_t ViterbiESentence(const Hypergraph& hg, std::vector* result); std::string ViterbiETree(const Hypergraph& hg); @@ -145,4 +204,7 @@ std::string ViterbiFTree(const Hypergraph& hg); int ViterbiELength(const Hypergraph& hg); int ViterbiPathLength(const Hypergraph& hg); +/// if weights supplied, assert viterbi prob = features.dot(*weights). return features (sum over all edges in viterbi derivation) +FeatureVector ViterbiFeatures(Hypergraph const& hg,FeatureWeights const* weights=0); + #endif -- cgit v1.2.3