#include "viterbi.h" #include #include #include "hg.h" using namespace std; std::string viterbi_stats(Hypergraph const& hg, std::string const& name, bool estring, bool etree,bool show_derivation) { ostringstream o; o << hg.stats(name); if (estring) { vector trans; const prob_t vs = ViterbiESentence(hg, &trans); o< tmp; Viterbi(hg, &tmp); return TD::GetString(tmp); } string ViterbiFTree(const Hypergraph& hg) { vector tmp; Viterbi(hg, &tmp); return TD::GetString(tmp); } prob_t ViterbiESentence(const Hypergraph& hg, vector* result) { return Viterbi(hg, result); } prob_t ViterbiFSentence(const Hypergraph& hg, vector* result) { return Viterbi(hg, result); } int ViterbiELength(const Hypergraph& hg) { int len = -1; Viterbi(hg, &len); return len; } int ViterbiPathLength(const Hypergraph& hg) { int len = -1; Viterbi(hg, &len); return len; } // create a strings of the form (S (X the man) (X said (X he (X would (X go))))) struct JoshuaVisTraversal { JoshuaVisTraversal() : left("("), space(" "), right(")") {} const std::string left; const std::string space; const std::string right; typedef std::vector Result; void operator()(const Hypergraph::Edge& edge, const std::vector& ants, Result* result) const { Result tmp; edge.rule_->ESubstitute(ants, &tmp); const std::string cat = TD::Convert(edge.rule_->GetLHS() * -1); if (cat == "Goal") result->swap(tmp); else { ostringstream os; os << left << cat << '{' << edge.i_ << '-' << edge.j_ << '}' << space << TD::GetString(tmp) << right; TD::ConvertSentence(os.str(), result); } } }; string JoshuaVisualizationString(const Hypergraph& hg) { vector tmp; Viterbi(hg, &tmp); return TD::GetString(tmp); } //TODO: move to appropriate header if useful elsewhere /* The simple solution like abs(f1-f2) <= e does not work for very small or very big values. This floating-point comparison algorithm is based on the more confident solution presented by Knuth in [1]. For a given floating point values u and v and a tolerance e: | u - v | <= e * |u| and | u - v | <= e * |v| defines a "very close with tolerance e" relationship between u and v (1) | u - v | <= e * |u| or | u - v | <= e * |v| defines a "close enough with tolerance e" relationship between u and v (2) Both relationships are commutative but are not transitive. The relationship defined by inequations (1) is stronger that the relationship defined by inequations (2) (i.e. (1) => (2) ). Because of the multiplication in the right side of inequations, that could cause an unwanted underflow condition, the implementation is using modified version of the inequations (1) and (2) where all underflow, overflow conditions could be guarded safely: | u - v | / |u| <= e and | u - v | / |v| <= e | u - v | / |u| <= e or | u - v | / |v| <= e (1`) (2`) */ #include #include #include inline bool close_enough(double a,double b,double epsilon) { using std::fabs; double diff=fabs(a-b); return diff<=epsilon*fabs(a) || diff<=epsilon*fabs(b); } FeatureVector ViterbiFeatures(Hypergraph const& hg,WeightVector const* weights,bool fatal_dotprod_disagreement) { FeatureVector r; const prob_t p = Viterbi(hg, &r); if (weights) { double logp=log(p); double fv=r.dot(*weights); const double EPSILON=1e-5; if (!close_enough(logp,fv,EPSILON)) { string complaint="ViterbiFeatures log prob disagrees with features.dot(weights)"+boost::lexical_cast(logp)+"!="+boost::lexical_cast(fv); if (fatal_dotprod_disagreement) throw std::runtime_error(complaint); else cerr<