few small fixes of alignment tools, add new orthographic similarity feature for word aligner, final naming of directories, libraries in cdec

author: Chris Dyer <redpony@gmail.com> 2009-12-14 20:35:11 -0500
committer: Chris Dyer <redpony@gmail.com> 2009-12-14 20:35:11 -0500
commit: 851e389dffdd6996ea32d70defb8906de80b9edc (patch)
tree: 8c68ee77205badc056b8ab5b332e67e3e98017df /src/kbest.h
parent: dc6930c00b4b276883280cff1ed6dcd9ddef03c7 (diff)
1 files changed, 0 insertions, 207 deletions
diff --git a/src/kbest.h b/src/kbest.h
deleted file mode 100644
index cd9b6c2b..00000000
--- a/src/kbest.h
+++ /dev/null
@@ -1,207 +0,0 @@
-#ifndef _HG_KBEST_H_
-#define _HG_KBEST_H_
-
-#include <vector>
-#include <utility>
-#include <tr1/unordered_set>
-
-#include <boost/shared_ptr.hpp>
-
-#include "wordid.h"
-#include "hg.h"
-
-namespace KBest {
-  // default, don't filter any derivations from the k-best list
-  struct NoFilter {
-    bool operator()(const std::vector<WordID>& yield) {
-      (void) yield;
-      return false;
-    }
-  };
-
-  // optional, filter unique yield strings
-  struct FilterUnique {
-    std::tr1::unordered_set<std::vector<WordID>, boost::hash<std::vector<WordID> > > unique;
-
-    bool operator()(const std::vector<WordID>& yield) {
-      return !unique.insert(yield).second;
-    }
-  };
-
-  // utility class to lazily create the k-best derivations from a forest, uses
-  // the lazy k-best algorithm (Algorithm 3) from Huang and Chiang (IWPT 2005)
-  template<typename T,  // yield type (returned by Traversal)
-           typename Traversal,
-           typename DerivationFilter = NoFilter,
-           typename WeightType = prob_t,
-           typename WeightFunction = EdgeProb>
-  struct KBestDerivations {
-    KBestDerivations(const Hypergraph& hg,
-                     const size_t k,
-                     const Traversal& tf = Traversal(),
-                     const WeightFunction& wf = WeightFunction()) :
-      traverse(tf), w(wf), g(hg), nds(g.nodes_.size()), k_prime(k) {}
-
-    ~KBestDerivations() {
-      for (int i = 0; i < freelist.size(); ++i)
-        delete freelist[i];
-    }
-
-    struct Derivation {
-      Derivation(const Hypergraph::Edge& e,
-                 const SmallVector& jv,
-                 const WeightType& w,
-                 const SparseVector<double>& f) :
-        edge(&e),
-        j(jv),
-        score(w),
-        feature_values(f) {}
-
-      // dummy constructor, just for query
-      Derivation(const Hypergraph::Edge& e,
-                 const SmallVector& jv) : edge(&e), j(jv) {}
-
-      T yield;
-      const Hypergraph::Edge* const edge;
-      const SmallVector j;
-      const WeightType score;
-      const SparseVector<double> feature_values;
-    };
-    struct HeapCompare {
-      bool operator()(const Derivation* a, const Derivation* b) const {
-        return a->score < b->score;
-      }
-    };
-    struct DerivationCompare {
-      bool operator()(const Derivation* a, const Derivation* b) const {
-        return a->score > b->score;
-      }
-    };
-    struct DerivationUniquenessHash {
-      size_t operator()(const Derivation* d) const {
-        size_t x = 5381;
-        x = ((x << 5) + x) ^ d->edge->id_;
-        for (int i = 0; i < d->j.size(); ++i)
-          x = ((x << 5) + x) ^ d->j[i];
-        return x;
-      }
-    };
-    struct DerivationUniquenessEquals {
-      bool operator()(const Derivation* a, const Derivation* b) const {
-        return (a->edge == b->edge) && (a->j == b->j);
-      }
-    };
-    typedef std::vector<Derivation*> CandidateHeap;
-    typedef std::vector<Derivation*> DerivationList;
-    typedef std::tr1::unordered_set<
-       const Derivation*, DerivationUniquenessHash, DerivationUniquenessEquals> UniqueDerivationSet;
-
-    struct NodeDerivationState {
-      CandidateHeap cand;
-      DerivationList D;
-      DerivationFilter filter;
-      UniqueDerivationSet ds;
-      explicit NodeDerivationState(const DerivationFilter& f = DerivationFilter()) : filter(f) {}
-    };
-
-    Derivation* LazyKthBest(int v, int k) {
-      NodeDerivationState& s = GetCandidates(v);
-      CandidateHeap& cand = s.cand;
-      DerivationList& D = s.D;
-      DerivationFilter& filter = s.filter;
-      bool add_next = true;
-      while (D.size() <= k) {
-        if (add_next && D.size() > 0) {
-          const Derivation* d = D.back();
-          LazyNext(d, &cand, &s.ds);
-        }
-        add_next = false;
-
-        if (cand.size() > 0) {
-          std::pop_heap(cand.begin(), cand.end(), HeapCompare());
-          Derivation* d = cand.back();
-          cand.pop_back();
-          std::vector<const T*> ants(d->edge->Arity());
-          for (int j = 0; j < ants.size(); ++j)
-            ants[j] = &LazyKthBest(d->edge->tail_nodes_[j], d->j[j])->yield;
-          traverse(*d->edge, ants, &d->yield);
-          if (!filter(d->yield)) {
-            D.push_back(d);
-            add_next = true;
-          }
-        } else {
-          break;
-        }
-      }
-      if (k < D.size()) return D[k]; else return NULL;
-    }
-
-  private:
-    // creates a derivation object with all fields set but the yield
-    // the yield is computed in LazyKthBest before the derivation is added to D
-    // returns NULL if j refers to derivation numbers larger than the
-    // antecedent structure define
-    Derivation* CreateDerivation(const Hypergraph::Edge& e, const SmallVector& j) {
-      WeightType score = w(e);
-      SparseVector<double> feats = e.feature_values_;
-      for (int i = 0; i < e.Arity(); ++i) {
-        const Derivation* ant = LazyKthBest(e.tail_nodes_[i], j[i]);
-        if (!ant) { return NULL; }
-        score *= ant->score;
-        feats += ant->feature_values;
-      }
-      freelist.push_back(new Derivation(e, j, score, feats));
-      return freelist.back();
-    }
-
-    NodeDerivationState& GetCandidates(int v) {
-      NodeDerivationState& s = nds[v];
-      if (!s.D.empty() || !s.cand.empty()) return s;
-
-      const Hypergraph::Node& node = g.nodes_[v];
-      for (int i = 0; i < node.in_edges_.size(); ++i) {
-        const Hypergraph::Edge& edge = g.edges_[node.in_edges_[i]];
-        SmallVector jv(edge.Arity(), 0);
-        Derivation* d = CreateDerivation(edge, jv);
-        assert(d);
-        s.cand.push_back(d);
-      }
-
-      const int effective_k = std::min(k_prime, s.cand.size());
-      const typename CandidateHeap::iterator kth = s.cand.begin() + effective_k;
-      std::nth_element(s.cand.begin(), kth, s.cand.end(), DerivationCompare());
-      s.cand.resize(effective_k);
-      std::make_heap(s.cand.begin(), s.cand.end(), HeapCompare());
-
-      return s;
-    }
-
-    void LazyNext(const Derivation* d, CandidateHeap* cand, UniqueDerivationSet* ds) {
-      for (int i = 0; i < d->j.size(); ++i) {
-        SmallVector j = d->j;
-        ++j[i];
-        const Derivation* ant = LazyKthBest(d->edge->tail_nodes_[i], j[i]);
-        if (ant) {
-          Derivation query_unique(*d->edge, j);
-          if (ds->count(&query_unique) == 0) {
-            Derivation* new_d = CreateDerivation(*d->edge, j);
-            if (new_d) {
-              cand->push_back(new_d);
-              std::push_heap(cand->begin(), cand->end(), HeapCompare());
-              assert(ds->insert(new_d).second);  // insert into uniqueness set, sanity check
-            }
-          }
-        }
-      }
-    }
-
-    const Traversal traverse;
-    const WeightFunction w;
-    const Hypergraph& g;
-    std::vector<NodeDerivationState> nds;
-    std::vector<Derivation*> freelist;
-    const size_t k_prime;
-  };
-}
-
-#endif
author	Chris Dyer <redpony@gmail.com>	2009-12-14 20:35:11 -0500
committer	Chris Dyer <redpony@gmail.com>	2009-12-14 20:35:11 -0500
commit	851e389dffdd6996ea32d70defb8906de80b9edc (patch)
tree	8c68ee77205badc056b8ab5b332e67e3e98017df /src/kbest.h
parent	dc6930c00b4b276883280cff1ed6dcd9ddef03c7 (diff)