summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgraehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-08 16:44:20 +0000
committergraehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-08 16:44:20 +0000
commit337b647baac15609a0a493902d58c473d25d2ed8 (patch)
tree3827f59a180164878ee217f27f445c259c9e5bab
parent7386574a2c70c7ed6e937eb94e3add8023cd7327 (diff)
--show_features
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@184 ec762483-ff6d-05da-a07a-a48fb63a330f
-rw-r--r--decoder/cdec.cc29
-rw-r--r--decoder/hg.cc18
-rw-r--r--decoder/hg.h8
-rw-r--r--decoder/logval.h6
-rw-r--r--decoder/sparse_vector.h39
-rw-r--r--decoder/trule.cc5
-rw-r--r--decoder/trule.h4
-rw-r--r--decoder/viterbi.cc64
-rw-r--r--decoder/viterbi.h102
9 files changed, 216 insertions, 59 deletions
diff --git a/decoder/cdec.cc b/decoder/cdec.cc
index b43b7826..85412d6a 100644
--- a/decoder/cdec.cc
+++ b/decoder/cdec.cc
@@ -86,7 +86,7 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
("show_expected_length", "Show the expected translation length under the model")
("show_partition,z", "Compute and show the partition (inside score)")
("show_cfg_search_space", "Show the search space as a CFG")
- ("show_viterbi_features","Show the feature vector for the viterbi translation")
+ ("show_features","Show the feature vector for the viterbi translation")
("prelm_density_prune", po::value<double>(), "Applied to -LM forest just before final LM rescoring: keep no more than this many times the number of edges used in the best derivation tree (>=1.0)")
("density_prune", po::value<double>(), "Keep no more than this many times the number of edges used in the best derivation tree (>=1.0)")
("prelm_beam_prune", po::value<double>(), "Prune paths from -LM forest before LM rescoring, keeping paths within exp(alpha>=0)")
@@ -266,6 +266,23 @@ bool prelm_weights_string(po::variables_map const& conf,string &s)
return false;
}
+
+
+void forest_stats(Hypergraph &forest,string name,bool show_tree,bool show_features,FeatureWeights *weights=0) {
+ cerr << viterbi_stats(forest,name,true,show_tree);
+ if (show_features) {
+ cerr << name<<" features: ";
+/* Hypergraph::Edge const* best=forest.ViterbiGoalEdge();
+ if (!best)
+ cerr << name<<" has no goal edge.";
+ else
+ cerr<<best->feature_values_;
+*/
+ cerr << ViterbiFeatures(forest,weights);
+ cerr << endl;
+ }
+}
+
void maybe_prune(Hypergraph &forest,po::variables_map const& conf,string nbeam,string ndensity,string forestname,double srclen) {
double beam_prune,density_prune;
bool use_beam_prune=beam_param(conf,nbeam,&beam_prune,conf.count("scale_prune_srclen"),srclen);
@@ -283,12 +300,13 @@ void maybe_prune(Hypergraph &forest,po::variables_map const& conf,string nbeam,s
if (use_density_prune)
forest.DensityPruneInsideOutside(1.0 ,false, density_prune, pm);
if (!forestname.empty()) forestname=" "+forestname;
- cerr << viterbi_stats(forest," Pruned "+forestname+" forest",false,false);
+ forest_stats(forest," Pruned "+forestname+" forest",false,false);
cerr << " Pruned "<<forestname<<" forest portion of edges kept: "<<forest.edges_.size()/presize<<endl;
}
}
+
int main(int argc, char** argv) {
global_ff_registry.reset(new FFRegistry);
register_feature_functions();
@@ -470,7 +488,8 @@ int main(int argc, char** argv) {
continue;
}
const bool show_tree_structure=conf.count("show_tree_structure");
- cerr << viterbi_stats(forest," -LM forest",true,show_tree_structure);
+ const bool show_features=conf.count("show_features");
+ forest_stats(forest," -LM forest",show_tree_structure,show_features);
if (conf.count("show_expected_length")) {
const PRPair<double, double> res =
Inside<PRPair<double, double>,
@@ -498,7 +517,7 @@ int main(int argc, char** argv) {
&prelm_forest);
forest.swap(prelm_forest);
forest.Reweight(prelm_feature_weights);
- cerr << viterbi_stats(forest," prelm forest",true,show_tree_structure);
+ forest_stats(forest," prelm forest",show_tree_structure,show_features);
}
maybe_prune(forest,conf,"prelm_beam_prune","prelm_density_prune","-LM",srclen);
@@ -517,7 +536,7 @@ int main(int argc, char** argv) {
&lm_forest);
forest.swap(lm_forest);
forest.Reweight(feature_weights);
- cerr << viterbi_stats(forest," +LM forest",true,show_tree_structure);
+ forest_stats(forest," +LM forest",show_tree_structure,show_features);
}
maybe_prune(forest,conf,"beam_prune","density_prune","+LM",srclen);
diff --git a/decoder/hg.cc b/decoder/hg.cc
index 70831d3d..41b954df 100644
--- a/decoder/hg.cc
+++ b/decoder/hg.cc
@@ -16,6 +16,17 @@
using namespace std;
+Hypergraph::Edge const* Hypergraph::ViterbiGoalEdge() const
+{
+ Edge const* r=0;
+ for (unsigned i=0,e=edges_.size();i<e;++i) {
+ Edge const& e=edges_[i];
+ if (e.rule_ && e.rule_->IsGoal() && (!r || e.edge_prob_ > r->edge_prob_))
+ r=&e;
+ }
+ return r;
+}
+
std::string Hypergraph::stats(std::string const& name) const
{
ostringstream o;
@@ -568,12 +579,13 @@ void Hypergraph::SortInEdgesByEdgeWeights() {
}
Hypergraph* Hypergraph::CreateViterbiHypergraph(const vector<bool>* edges) const {
- vector<const Edge*> vit_edges;
+ typedef ViterbiPathTraversal::Result VE;
+ VE vit_edges;
if (edges) {
assert(edges->size() == edges_.size());
- Viterbi<vector<const Edge*>, ViterbiPathTraversal, prob_t, EdgeSelectEdgeWeightFunction>(*this, &vit_edges, ViterbiPathTraversal(), EdgeSelectEdgeWeightFunction(*edges));
+ Viterbi(*this, &vit_edges, ViterbiPathTraversal(), EdgeSelectEdgeWeightFunction(*edges));
} else {
- Viterbi<vector<const Edge*>, ViterbiPathTraversal, prob_t, EdgeProb>(*this, &vit_edges);
+ Viterbi(*this, &vit_edges, ViterbiPathTraversal() ,EdgeProb());
}
map<int, int> old2new_node;
int num_new_nodes = 0;
diff --git a/decoder/hg.h b/decoder/hg.h
index ab90650c..0f8d21fd 100644
--- a/decoder/hg.h
+++ b/decoder/hg.h
@@ -59,6 +59,9 @@ class Hypergraph {
short int prev_j_;
};
+ // returns edge with rule_.IsGoal, returns 0 if none found. otherwise gives best edge_prob_ - note: I don't think edge_prob_ is viterbi cumulative, so this would just be the best local probability.
+ Edge const* ViterbiGoalEdge() const;
+
void swap(Hypergraph& other) {
other.nodes_.swap(nodes_);
std::swap(is_linear_chain_, other.is_linear_chain_);
@@ -218,10 +221,12 @@ class Hypergraph {
// common WeightFunctions, map an edge -> WeightType
// for generic Viterbi/Inside algorithms
struct EdgeProb {
+ typedef prob_t Weight;
inline const prob_t& operator()(const Hypergraph::Edge& e) const { return e.edge_prob_; }
};
struct EdgeSelectEdgeWeightFunction {
+ typedef prob_t Weight;
typedef std::vector<bool> EdgeMask;
EdgeSelectEdgeWeightFunction(const EdgeMask& v) : v_(v) {}
inline prob_t operator()(const Hypergraph::Edge& e) const {
@@ -236,10 +241,12 @@ struct ScaledEdgeProb {
ScaledEdgeProb(const double& alpha) : alpha_(alpha) {}
inline prob_t operator()(const Hypergraph::Edge& e) const { return e.edge_prob_.pow(alpha_); }
const double alpha_;
+ typedef prob_t Weight;
};
// see Li (2010), Section 3.2.2-- this is 'x_e = p_e*r_e'
struct EdgeFeaturesAndProbWeightFunction {
+ typedef const SparseVector<prob_t> Weight;
inline const SparseVector<prob_t> operator()(const Hypergraph::Edge& e) const {
SparseVector<prob_t> res;
for (SparseVector<double>::const_iterator it = e.feature_values_.begin();
@@ -250,6 +257,7 @@ struct EdgeFeaturesAndProbWeightFunction {
};
struct TransitionCountWeightFunction {
+ typedef double Weight;
inline double operator()(const Hypergraph::Edge& e) const { (void)e; return 1.0; }
};
diff --git a/decoder/logval.h b/decoder/logval.h
index 457818e7..9aaba557 100644
--- a/decoder/logval.h
+++ b/decoder/logval.h
@@ -1,7 +1,7 @@
#ifndef LOGVAL_H_
#define LOGVAL_H_
-#define LOGVAL_CHECK_NEG_POW false
+#define LOGVAL_CHECK_NEG false
#include <iostream>
#include <cstdlib>
@@ -59,7 +59,7 @@ class LogVal {
}
LogVal& poweq(const T& power) {
-#if LOGVAL_CHECK_NEG_POW
+#if LOGVAL_CHECK_NEG
if (s_) {
std::cerr << "poweq(T) not implemented when s_ is true\n";
std::abort();
@@ -116,7 +116,9 @@ LogVal<T> operator-(LogVal<T> o1, const LogVal<T>& o2) {
template<typename T>
T log(const LogVal<T>& o) {
+#ifdef LOGVAL_CHECK_NEG
if (o.s_) return log(-1.0);
+#endif
return o.v_;
}
diff --git a/decoder/sparse_vector.h b/decoder/sparse_vector.h
index 896e8c43..26f5d791 100644
--- a/decoder/sparse_vector.h
+++ b/decoder/sparse_vector.h
@@ -45,7 +45,7 @@ public:
void store(std::valarray<T>* target) const {
(*target) *= 0;
- for (typename MapType::const_iterator
+ for (typename MapType::const_iterator
it = values_.begin(); it != values_.end(); ++it) {
if (it->first >= target->size()) break;
(*target)[it->first] = it->second;
@@ -63,7 +63,7 @@ public:
// as the sparse vector
T dot() const {
T sum = 0;
- for (typename MapType::const_iterator
+ for (typename MapType::const_iterator
it = values_.begin(); it != values_.end(); ++it)
sum += it->second;
return sum;
@@ -72,21 +72,21 @@ public:
template<typename S>
S dot(const SparseVector<S> &vec) const {
S sum = 0;
- for (typename MapType::const_iterator
+ for (typename MapType::const_iterator
it = values_.begin(); it != values_.end(); ++it)
{
- typename MapType::const_iterator
+ typename MapType::const_iterator
found = vec.values_.find(it->first);
if (found != vec.values_.end())
sum += it->second * found->second;
}
return sum;
}
-
+
template<typename S>
S dot(const std::vector<S> &vec) const {
S sum = 0;
- for (typename MapType::const_iterator
+ for (typename MapType::const_iterator
it = values_.begin(); it != values_.end(); ++it)
{
if (it->first < static_cast<int>(vec.size()))
@@ -99,7 +99,7 @@ public:
S dot(const S *vec) const {
// this is not range checked!
S sum = 0;
- for (typename MapType::const_iterator
+ for (typename MapType::const_iterator
it = values_.begin(); it != values_.end(); ++it)
sum += it->second * vec[it->first];
std::cout << "dot(*vec) " << sum << std::endl;
@@ -108,22 +108,22 @@ public:
T l1norm() const {
T sum = 0;
- for (typename MapType::const_iterator
+ for (typename MapType::const_iterator
it = values_.begin(); it != values_.end(); ++it)
sum += fabs(it->second);
return sum;
}
-
+
T l2norm() const {
T sum = 0;
- for (typename MapType::const_iterator
+ for (typename MapType::const_iterator
it = values_.begin(); it != values_.end(); ++it)
sum += it->second * it->second;
return sqrt(sum);
}
-
+
SparseVector<T> &operator+=(const SparseVector<T> &other) {
- for (typename MapType::const_iterator
+ for (typename MapType::const_iterator
it = other.values_.begin(); it != other.values_.end(); ++it)
{
T v = (values_[it->first] += it->second);
@@ -134,7 +134,7 @@ public:
}
SparseVector<T> &operator-=(const SparseVector<T> &other) {
- for (typename MapType::const_iterator
+ for (typename MapType::const_iterator
it = other.values_.begin(); it != other.values_.end(); ++it)
{
T v = (values_[it->first] -= it->second);
@@ -145,28 +145,28 @@ public:
}
SparseVector<T> &operator-=(const double &x) {
- for (typename MapType::iterator
+ for (typename MapType::iterator
it = values_.begin(); it != values_.end(); ++it)
it->second -= x;
return *this;
}
SparseVector<T> &operator+=(const double &x) {
- for (typename MapType::iterator
+ for (typename MapType::iterator
it = values_.begin(); it != values_.end(); ++it)
it->second += x;
return *this;
}
SparseVector<T> &operator/=(const T &x) {
- for (typename MapType::iterator
+ for (typename MapType::iterator
it = values_.begin(); it != values_.end(); ++it)
it->second /= x;
return *this;
}
SparseVector<T> &operator*=(const T& x) {
- for (typename MapType::iterator
+ for (typename MapType::iterator
it = values_.begin(); it != values_.end(); ++it)
it->second *= x;
return *this;
@@ -194,7 +194,7 @@ public:
void Write(const bool with_semi, std::ostream* os) const {
bool first = true;
- for (typename MapType::const_iterator
+ for (typename MapType::const_iterator
it = values_.begin(); it != values_.end(); ++it) {
// by definition feature id 0 is a dummy value
if (it->first == 0) continue;
@@ -244,6 +244,9 @@ private:
MapType values_;
};
+typedef SparseVector<double> FeatureVector;
+typedef std::vector<double> FeatureWeights;
+
template <typename T>
SparseVector<T> operator+(const SparseVector<T>& a, const SparseVector<T>& b) {
SparseVector<T> result = a;
diff --git a/decoder/trule.cc b/decoder/trule.cc
index 505839c7..170e3a95 100644
--- a/decoder/trule.cc
+++ b/decoder/trule.cc
@@ -7,6 +7,11 @@
using namespace std;
+bool TRule::IsGoal() const {
+ static const int kGOAL(TD::Convert("Goal") * -1); // this will happen once, and after static init of trule.cc static dict.
+ return GetLHS() == kGOAL;
+}
+
static WordID ConvertTrgString(const string& w) {
int len = w.size();
WordID id = 0;
diff --git a/decoder/trule.h b/decoder/trule.h
index 7fb92924..defdbeb9 100644
--- a/decoder/trule.h
+++ b/decoder/trule.h
@@ -28,6 +28,8 @@ class TRule {
scores_.set_value(feat_ids[i], feat_vals[i]);
}
+ bool IsGoal() const;
+
explicit TRule(const std::vector<WordID>& e) : e_(e), lhs_(0), prev_i(-1), prev_j(-1) {}
TRule(const std::vector<WordID>& e, const std::vector<WordID>& f, const WordID& lhs) :
e_(e), f_(f), lhs_(lhs), prev_i(-1), prev_j(-1) {}
@@ -126,7 +128,7 @@ class TRule {
std::vector<WordID> f_;
WordID lhs_;
SparseVector<double> scores_;
-
+
char arity_;
TRulePtr parent_rule_; // usually NULL, except when doing constrained decoding
diff --git a/decoder/viterbi.cc b/decoder/viterbi.cc
index 7f52d08c..6a7a970b 100644
--- a/decoder/viterbi.cc
+++ b/decoder/viterbi.cc
@@ -25,33 +25,33 @@ std::string viterbi_stats(Hypergraph const& hg, std::string const& name, bool es
string ViterbiETree(const Hypergraph& hg) {
vector<WordID> tmp;
- const prob_t p = Viterbi<vector<WordID>, ETreeTraversal, prob_t, EdgeProb>(hg, &tmp);
+ Viterbi<ETreeTraversal>(hg, &tmp);
return TD::GetString(tmp);
}
string ViterbiFTree(const Hypergraph& hg) {
vector<WordID> tmp;
- const prob_t p = Viterbi<vector<WordID>, FTreeTraversal, prob_t, EdgeProb>(hg, &tmp);
+ Viterbi<FTreeTraversal>(hg, &tmp);
return TD::GetString(tmp);
}
prob_t ViterbiESentence(const Hypergraph& hg, vector<WordID>* result) {
- return Viterbi<vector<WordID>, ESentenceTraversal, prob_t, EdgeProb>(hg, result);
+ return Viterbi<ESentenceTraversal>(hg, result);
}
prob_t ViterbiFSentence(const Hypergraph& hg, vector<WordID>* result) {
- return Viterbi<vector<WordID>, FSentenceTraversal, prob_t, EdgeProb>(hg, result);
+ return Viterbi<FSentenceTraversal>(hg, result);
}
int ViterbiELength(const Hypergraph& hg) {
int len = -1;
- Viterbi<int, ELengthTraversal, prob_t, EdgeProb>(hg, &len);
+ Viterbi<ELengthTraversal>(hg, &len);
return len;
}
int ViterbiPathLength(const Hypergraph& hg) {
int len = -1;
- Viterbi<int, PathLengthTraversal, prob_t, EdgeProb>(hg, &len);
+ Viterbi<PathLengthTraversal>(hg, &len);
return len;
}
@@ -61,10 +61,11 @@ struct JoshuaVisTraversal {
const std::string left;
const std::string space;
const std::string right;
+ typedef std::vector<WordID> Result;
void operator()(const Hypergraph::Edge& edge,
- const std::vector<const std::vector<WordID>*>& ants,
- std::vector<WordID>* result) const {
- std::vector<WordID> tmp;
+ const std::vector<const Result*>& ants,
+ Result* result) const {
+ Result tmp;
edge.rule_->ESubstitute(ants, &tmp);
const std::string cat = TD::Convert(edge.rule_->GetLHS() * -1);
if (cat == "Goal")
@@ -81,7 +82,50 @@ struct JoshuaVisTraversal {
string JoshuaVisualizationString(const Hypergraph& hg) {
vector<WordID> tmp;
- const prob_t p = Viterbi<vector<WordID>, JoshuaVisTraversal, prob_t, EdgeProb>(hg, &tmp);
+ Viterbi<JoshuaVisTraversal>(hg, &tmp);
return TD::GetString(tmp);
}
+
+//TODO: move to appropriate header if useful elsewhere
+/*
+ The simple solution like abs(f1-f2) <= e does not work for very small or very big values. This floating-point comparison algorithm is based on the more confident solution presented by Knuth in [1]. For a given floating point values u and v and a tolerance e:
+
+| u - v | <= e * |u| and | u - v | <= e * |v|
+defines a "very close with tolerance e" relationship between u and v
+ (1)
+
+| u - v | <= e * |u| or | u - v | <= e * |v|
+defines a "close enough with tolerance e" relationship between u and v
+ (2)
+
+Both relationships are commutative but are not transitive. The relationship defined by inequations (1) is stronger that the relationship defined by inequations (2) (i.e. (1) => (2) ). Because of the multiplication in the right side of inequations, that could cause an unwanted underflow condition, the implementation is using modified version of the inequations (1) and (2) where all underflow, overflow conditions could be guarded safely:
+
+| u - v | / |u| <= e and | u - v | / |v| <= e
+| u - v | / |u| <= e or | u - v | / |v| <= e
+ (1`)
+(2`)
+*/
+#include <cmath>
+#include <stdexcept>
+#include <boost/lexical_cast.hpp>
+inline bool close_enough(double a,double b,double epsilon)
+{
+ using std::fabs;
+ double diff=fabs(a-b);
+ return diff<=epsilon*fabs(a) || diff<=epsilon*fabs(b);
+}
+
+FeatureVector ViterbiFeatures(Hypergraph const& hg,FeatureWeights const* weights) {
+ FeatureVector r;
+ const prob_t p = Viterbi<FeatureVectorTraversal>(hg, &r);
+ if (weights) {
+ double logp=log(p);
+ double fv=r.dot(*weights);
+ const double EPSILON=1e-5;
+ if (!close_enough(logp,fv,EPSILON))
+ throw std::runtime_error("ViterbiFeatures log prob disagrees with features.dot(weights)"+boost::lexical_cast<string>(logp)+"!="+boost::lexical_cast<string>(fv));
+ }
+ return r;
+}
+
diff --git a/decoder/viterbi.h b/decoder/viterbi.h
index 6b8bbed1..7e1e2c0e 100644
--- a/decoder/viterbi.h
+++ b/decoder/viterbi.h
@@ -8,13 +8,21 @@
std::string viterbi_stats(Hypergraph const& hg, std::string const& name="forest", bool estring=true, bool etree=false);
-// V must implement:
-// void operator()(const vector<const T*>& ants, T* result);
-template<typename T, typename Traversal, typename WeightType, typename WeightFunction>
-WeightType Viterbi(const Hypergraph& hg,
- T* result,
- const Traversal& traverse = Traversal(),
- const WeightFunction& weight = WeightFunction()) {
+/// computes for each hg node the best (according to WeightType/WeightFunction) derivation, and some homomorphism (bottom up expression tree applied through Traversal) of it. T is the "return type" of Traversal, which is called only once for the best edge for a node's result (i.e. result will start default constructed)
+//TODO: make T a typename inside Traversal and WeightType a typename inside WeightFunction?
+// Traversal must implement:
+// typedef T Result;
+// void operator()(Hypergraph::Edge const& e,const vector<const Result*>& ants, Result* result) const;
+// WeightFunction must implement:
+// typedef prob_t Weight;
+// Weight operator()(Hypergraph::Edge const& e) const;
+template<typename Traversal,typename WeightFunction>
+typename WeightFunction::Weight Viterbi(const Hypergraph& hg,
+ typename Traversal::Result* result,
+ const Traversal& traverse,
+ const WeightFunction& weight) {
+ typedef typename Traversal::Result T;
+ typedef typename WeightFunction::Weight WeightType;
const int num_nodes = hg.nodes_.size();
std::vector<T> vit_result(num_nodes);
std::vector<WeightType> vit_weight(num_nodes, WeightType::Zero());
@@ -51,7 +59,41 @@ WeightType Viterbi(const Hypergraph& hg,
return vit_weight.back();
}
+
+/*
+template<typename Traversal,typename WeightFunction>
+typename WeightFunction::Weight Viterbi(const Hypergraph& hg,
+ typename Traversal::Result* result)
+{
+ Traversal traverse;
+ WeightFunction weight;
+ return Viterbi(hg,result,traverse,weight);
+}
+
+template<class Traversal,class WeightFunction=EdgeProb>
+typename WeightFunction::Weight Viterbi(const Hypergraph& hg,
+ typename Traversal::Result* result,
+ Traversal const& traverse=Traversal()
+ )
+{
+ WeightFunction weight;
+ return Viterbi(hg,result,traverse,weight);
+}
+*/
+
+//spec for EdgeProb
+template<class Traversal>
+prob_t Viterbi(const Hypergraph& hg,
+ typename Traversal::Result* result,
+ Traversal const& traverse=Traversal()
+ )
+{
+ EdgeProb weight;
+ return Viterbi(hg,result,traverse,weight);
+}
+
struct PathLengthTraversal {
+ typedef int Result;
void operator()(const Hypergraph::Edge& edge,
const std::vector<const int*>& ants,
int* result) const {
@@ -62,14 +104,16 @@ struct PathLengthTraversal {
};
struct ESentenceTraversal {
+ typedef std::vector<WordID> Result;
void operator()(const Hypergraph::Edge& edge,
- const std::vector<const std::vector<WordID>*>& ants,
- std::vector<WordID>* result) const {
+ const std::vector<const Result*>& ants,
+ Result* result) const {
edge.rule_->ESubstitute(ants, result);
}
};
struct ELengthTraversal {
+ typedef int Result;
void operator()(const Hypergraph::Edge& edge,
const std::vector<const int*>& ants,
int* result) const {
@@ -79,9 +123,10 @@ struct ELengthTraversal {
};
struct FSentenceTraversal {
+ typedef std::vector<WordID> Result;
void operator()(const Hypergraph::Edge& edge,
- const std::vector<const std::vector<WordID>*>& ants,
- std::vector<WordID>* result) const {
+ const std::vector<const Result*>& ants,
+ Result* result) const {
edge.rule_->FSubstitute(ants, result);
}
};
@@ -92,10 +137,11 @@ struct ETreeTraversal {
const std::string left;
const std::string space;
const std::string right;
+ typedef std::vector<WordID> Result;
void operator()(const Hypergraph::Edge& edge,
- const std::vector<const std::vector<WordID>*>& ants,
- std::vector<WordID>* result) const {
- std::vector<WordID> tmp;
+ const std::vector<const Result*>& ants,
+ Result* result) const {
+ Result tmp;
edge.rule_->ESubstitute(ants, &tmp);
const std::string cat = TD::Convert(edge.rule_->GetLHS() * -1);
if (cat == "Goal")
@@ -111,10 +157,11 @@ struct FTreeTraversal {
const std::string left;
const std::string space;
const std::string right;
+ typedef std::vector<WordID> Result;
void operator()(const Hypergraph::Edge& edge,
- const std::vector<const std::vector<WordID>*>& ants,
- std::vector<WordID>* result) const {
- std::vector<WordID> tmp;
+ const std::vector<const Result*>& ants,
+ Result* result) const {
+ Result tmp;
edge.rule_->FSubstitute(ants, &tmp);
const std::string cat = TD::Convert(edge.rule_->GetLHS() * -1);
if (cat == "Goal")
@@ -126,10 +173,10 @@ struct FTreeTraversal {
};
struct ViterbiPathTraversal {
+ typedef std::vector<Hypergraph::Edge const*> Result;
void operator()(const Hypergraph::Edge& edge,
- const std::vector<const std::vector<const Hypergraph::Edge*>* >& ants,
- std::vector<const Hypergraph::Edge*>* result) const {
- result->clear();
+ std::vector<Result const*> const& ants,
+ Result* result) const {
for (int i = 0; i < ants.size(); ++i)
for (int j = 0; j < ants[i]->size(); ++j)
result->push_back((*ants[i])[j]);
@@ -137,6 +184,18 @@ struct ViterbiPathTraversal {
}
};
+struct FeatureVectorTraversal {
+ typedef FeatureVector Result;
+ void operator()(Hypergraph::Edge const& edge,
+ std::vector<Result const*> const& ants,
+ Result* result) const {
+ for (int i = 0; i < ants.size(); ++i)
+ *result+=*ants[i];
+ *result+=edge.feature_values_;
+ }
+};
+
+
std::string JoshuaVisualizationString(const Hypergraph& hg);
prob_t ViterbiESentence(const Hypergraph& hg, std::vector<WordID>* result);
std::string ViterbiETree(const Hypergraph& hg);
@@ -145,4 +204,7 @@ std::string ViterbiFTree(const Hypergraph& hg);
int ViterbiELength(const Hypergraph& hg);
int ViterbiPathLength(const Hypergraph& hg);
+/// if weights supplied, assert viterbi prob = features.dot(*weights). return features (sum over all edges in viterbi derivation)
+FeatureVector ViterbiFeatures(Hypergraph const& hg,FeatureWeights const* weights=0);
+
#endif