From 61559eee0fcc46192a2bb3ecfcd3d4a526b6f848 Mon Sep 17 00:00:00 2001 From: graehl Date: Tue, 13 Jul 2010 02:01:07 +0000 Subject: vest: combine over-similar search directions, exclude primary directions, skeleton for oracle directions git-svn-id: https://ws10smt.googlecode.com/svn/trunk@227 ec762483-ff6d-05da-a07a-a48fb63a330f --- decoder/grammar.cc | 9 ++++----- decoder/sparse_vector.h | 32 ++++++++++++++++++++++++-------- 2 files changed, 28 insertions(+), 13 deletions(-) (limited to 'decoder') diff --git a/decoder/grammar.cc b/decoder/grammar.cc index 5eb7887d..499e79fe 100644 --- a/decoder/grammar.cc +++ b/decoder/grammar.cc @@ -71,7 +71,7 @@ struct TGImpl { }; TextGrammar::TextGrammar() : max_span_(10), pimpl_(new TGImpl) {} -TextGrammar::TextGrammar(const string& file) : +TextGrammar::TextGrammar(const string& file) : max_span_(10), pimpl_(new TGImpl) { ReadFromFile(file); @@ -104,7 +104,7 @@ void TextGrammar::ReadFromFile(const string& filename) { RuleLexer::ReadRules(in.stream(), &AddRuleHelper, this); } -bool TextGrammar::HasRuleForSpan(int i, int j, int distance) const { +bool TextGrammar::HasRuleForSpan(int /* i */, int /* j */, int distance) const { return (max_span_ >= distance); } @@ -121,8 +121,7 @@ GlueGrammar::GlueGrammar(const string& goal_nt, const string& default_nt) { //cerr << "GLUE: " << glue->AsString() << endl; } -bool GlueGrammar::HasRuleForSpan(int i, int j, int distance) const { - (void) j; +bool GlueGrammar::HasRuleForSpan(int i, int /* j */, int /* distance */) const { return (i == 0); } @@ -141,7 +140,7 @@ PassThroughGrammar::PassThroughGrammar(const Lattice& input, const string& cat) } } -bool PassThroughGrammar::HasRuleForSpan(int i, int j, int distance) const { +bool PassThroughGrammar::HasRuleForSpan(int i, int j, int /* distance */) const { const set& hr = has_rule_[i]; if (i == j) { return !hr.empty(); } return (hr.find(j) != hr.end()); diff --git a/decoder/sparse_vector.h b/decoder/sparse_vector.h index be91f324..bfdeebcc 100644 --- a/decoder/sparse_vector.h +++ b/decoder/sparse_vector.h @@ -58,7 +58,7 @@ public: } int max_index() const { - if (values_.empty()) return 0; + if (empty()) return 0; typename MapType::const_iterator found =values_.end(); --found; return found->first; @@ -74,6 +74,18 @@ public: return sum; } + template + S cosine_sim(const SparseVector &vec) const { + return dot(vec)/(l2norm()*vec.l2norm()); + } + + // if values are binary, gives |A intersect B|/|A union B| + template + S tanimoto_coef(const SparseVector &vec) const { + S dp=dot(vec); + return dp/(l2norm_sq()*vec.l2norm_sq()-dp); + } + template S dot(const SparseVector &vec) const { S sum = 0; @@ -119,12 +131,16 @@ public: return sum; } - T l2norm() const { + T l2norm_sq() const { T sum = 0; for (typename MapType::const_iterator it = values_.begin(); it != values_.end(); ++it) sum += it->second * it->second; - return sqrt(sum); + return sum; + } + + T l2norm() const { + return sqrt(l2norm_sq()); } SparseVector &operator+=(const SparseVector &other) { @@ -149,14 +165,14 @@ public: return *this; } - SparseVector &operator-=(const double &x) { + SparseVector &operator-=(T const& x) { for (typename MapType::iterator it = values_.begin(); it != values_.end(); ++it) it->second -= x; return *this; } - SparseVector &operator+=(const double &x) { + SparseVector &operator+=(T const& x) { for (typename MapType::iterator it = values_.begin(); it != values_.end(); ++it) it->second += x; @@ -177,17 +193,17 @@ public: return *this; } - SparseVector operator+(const double &x) const { + SparseVector operator+(T const& x) const { SparseVector result = *this; return result += x; } - SparseVector operator-(const double &x) const { + SparseVector operator-(T const& x) const { SparseVector result = *this; return result -= x; } - SparseVector operator/(const double &x) const { + SparseVector operator/(T const& x) const { SparseVector result = *this; return result /= x; } -- cgit v1.2.3