From 485344fc8bceacaeec7272347b1eb2923738014b Mon Sep 17 00:00:00 2001
From: graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>
Date: Sat, 21 Aug 2010 03:57:32 +0000
Subject: intern pool, agenda

git-svn-id: https://ws10smt.googlecode.com/svn/trunk@613 ec762483-ff6d-05da-a07a-a48fb63a330f
---
 decoder/apply_fsa_models.cc |   2 +-
 utils/agenda.h              |  18 +++++---
 utils/hash.h                |   2 +
 utils/intern_pool.h         | 110 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 125 insertions(+), 7 deletions(-)
 create mode 100755 utils/intern_pool.h

diff --git a/decoder/apply_fsa_models.cc b/decoder/apply_fsa_models.cc
index f9c94ec3..c958f7c4 100755
--- a/decoder/apply_fsa_models.cc
+++ b/decoder/apply_fsa_models.cc
@@ -291,7 +291,7 @@ struct ItemP {
 
 struct Chart {
   //Agenda<Item> a;
-  //typedef HASH_MAP(Item,ItemP,boost::hash<Item>) Items;
+  //typedef HASH_MAP<Item,ItemP,boost::hash<Item> > Items;
   //typedef Items::iterator FindItem;
   //typedef std::pair<FindItem,bool> InsertItem;
 //  Items items;
diff --git a/utils/agenda.h b/utils/agenda.h
index 8198f077..639f35a9 100755
--- a/utils/agenda.h
+++ b/utils/agenda.h
@@ -37,7 +37,7 @@
  */
 
 #include "best.h"
-#include "hash.h"
+#include "intern_pool.h"
 #include "d_ary_heap.h"
 #include "lvalue_pmap.h"
 #include <vector>
@@ -76,18 +76,24 @@ PMAP_MEMBER_INDIRECT(LocationMap,unsigned,location)
 PMAP_MEMBER_INDIRECT(PriorityMap,best_t,priority)
 
 // LocMap and PrioMap are boost property maps put(locmap,key,size_t), Better(get(priomap,k1),get(priomap,k2)) means k1 should be above k2 (be popped first).  Locmap and PrioMap may have state; the rest are assumed stateless functors
-template <class Item,class Hash=boost::hash<Item>,class Equal=std::equal_to<Item>,class Better=std::less<Item> >
-struct Agenda {
+template <class Item,class Better=std::less<Item>, /* intern_pool args */ class KeyF=get_key<Item>,class HashKey=boost::hash<typename KeyF::return_type>,class EqKey=std::equal_to<typename KeyF::return_type>, class Pool=boost::object_pool<Item> >
+struct Agenda : intern_pool<Item,KeyF,EqKey,Pool> {
   /* this is less generic than it could be, because I want to use a single hash mapping to intern to canonical mutable object pointers, where the property maps are just lvalue accessors */
+  typedef intern_pool<Item,KeyF,EqKey,Pool> Intern; // inherited because I want to use construct()
+  typedef Item * Handle;
+  typedef LocationMap<Handle> LocMap;
+  typedef PriorityMap<Handle> PrioMap;
+  LocMap locmap;
+  PrioMap priomap;
+  //NOT NEEDED: initialize function object state (there is none)
+
+
 /*
   typedef Item *CanonItem;
   static const std::size_t heap_arity=4; // might be fastest possible (depends on key size probably - cache locality is bad w/ arity=2)
   typedef std::vector<CanonItem> HeapStorage;
   typedef boost::detail::d_ary_heap_indirect<Key,heap_arity,LocMap,PrioMap,Better,HeapStorage> Heap;
   HASH_SET<Key,Hash,Equal> queued;
-  typedef LocationMap<ItemP>
-  LocMap locmap;
-  PrioMap priomap;
   Agenda(LocMap const& lm=LocMap(),PrioMap const& pm=PrioMap()) : locmap(lm), priomap(pm) {  }
 */
 };
diff --git a/utils/hash.h b/utils/hash.h
index e9584dd5..17f150ff 100755
--- a/utils/hash.h
+++ b/utils/hash.h
@@ -8,12 +8,14 @@
 #include "config.h"
 #ifdef HAVE_SPARSEHASH
 # include <google/dense_hash_map>
+# include <google/dense_hash_set>
 # define HASH_MAP google::dense_hash_map
 # define HASH_SET google::dense_hash_set
 # define HASH_MAP_RESERVED(h,empty,deleted) do { h.set_empty_key(empty); h.set_deleted_key(deleted); } while(0)
 # define HASH_MAP_EMPTY(h,empty) do { h.set_empty_key(empty); } while(0)
 #else
 # include <tr1/unordered_map>
+# include <tr1/unordered_set>
 # define HASH_MAP std::tr1::unordered_map
 # define HASH_SET std::tr1::unordered_set
 # define HASH_MAP_RESERVED(h,empty,deleted)
diff --git a/utils/intern_pool.h b/utils/intern_pool.h
new file mode 100755
index 00000000..c85c9881
--- /dev/null
+++ b/utils/intern_pool.h
@@ -0,0 +1,110 @@
+#ifndef INTERN_POOL_H
+#define INTERN_POOL_H
+
+#define DEBUG_INTERN_POOL(x) x
+
+/* to "intern" a string in lisp is to make a symbol from it (a pointer to a canonical copy whose pointer can be equality-compared/hashed directly with other interned things).  we take an Item that has a key part and some mutable parts (that aren't in its identity), and we hash-by-value the key part to map to a canonical on-heap Item - and we use a boost object pool to allocate them */
+
+//FIXME: actually store function object state (assumed stateless so far)
+
+#include <boost/pool/object_pool.hpp>
+#include "hash.h"
+//#include "null_traits.h"
+#include <functional>
+
+template <class I>
+struct get_key { // default accessor for I = like pair<key,val>
+  typedef typename I::first_type const& return_type;
+  typedef I const& argument_type;
+  return_type operator()(I const& i) const {
+    return i.first;
+  }
+};
+
+template <class KeyF,class F,class Arg=typename KeyF::argument_type>
+struct compose_indirect {
+  typedef Arg *argument_type; // also Arg
+  KeyF kf;
+  F f;
+  typedef typename F::return_type return_type;
+  return_type operator()(Arg p) const {
+    return f(kf(p));
+  }
+  template <class V>
+  return_type operator()(Arg *p) const {
+    return f(kf(*p));
+  }
+
+};
+
+/*
+
+template <class F>
+struct indirect_function {
+  F f;
+  explicit indirect_function(F const& f=F()) : f(f) {}
+  typedef typename F::return_type return_type;
+  template <class V>
+  return_type operator()(V *p) const {
+    return f(*p);
+  }
+};
+*/
+
+template <class Item,class KeyF=get_key<Item>,class HashKey=boost::hash<typename KeyF::return_type>,class EqKey=std::equal_to<typename KeyF::return_type>, class Pool=boost::object_pool<Item> >
+struct intern_pool : Pool {
+  KeyF key;
+  typedef typename KeyF::return_type Key;
+  typedef Item *Handle;
+  typedef compose_indirect<KeyF,HashKey,Handle> HashDeep;
+  typedef compose_indirect<KeyF,EqKey,Handle> EqDeep;
+  typedef HASH_SET<Handle,HashDeep,EqDeep> Canonical;
+  typedef typename Canonical::iterator CFind;
+  typedef std::pair<CFind,bool> CInsert;
+  Canonical canonical;
+  void interneq(Handle &i) {
+    i=intern(i);
+  }
+// inherited: Handle construct(...)
+  Handle construct_fresh() { return Pool::construct(); }
+  Handle intern(Handle i) { // (maybe invalidating i, returning a valid canonical handle (pointer)
+    CInsert i_new=canonical.insert(i);
+    if (i_new.second)
+      return i;
+    else {
+      free(i);
+      return *i_new->first;
+    }
+  }
+  void destroy_interned(Handle i) {
+    DEBUG_INTERN_POOL(assert(canonical.find(i)!=canonical.end()));
+    canonical.erase(i);
+    destroy(i);
+  }
+  bool destroy_fresh(Handle i) {
+    DEBUG_INTERN_POOL(assert(canonical.find(i)!=canonical.end()||*canonical.find(i)!=i)); // i is a constructed item not yet interned.
+    destroy(i);
+  }
+  void destroy_both(Handle i) { // i must have come from this pool.  may be interned, or not.  destroy both the noninterned and interned.
+    if (!destroy_if_interned(i)) destroy(i);
+  }
+  // destroy intern(i) if it exists.  return true if it existed AND its address was i.  otherwise return false (whether or not a value-equal item existed and was destroyed)
+  bool destroy_if_interned(Handle i) {
+    CFind f=canonical.find(i);
+    if (f!=canonical.end()) {
+      Handle interned=*f;
+      canonical.erase(f);
+      destroy(f);
+      if (f==i) return true;
+    }
+    return false;
+  }
+
+  intern_pool() {
+    HASH_MAP_EMPTY(canonical,(Handle)0);
+  }
+};
+
+
+
+#endif
-- 
cgit v1.2.3