From 2972dc47d3e55f3785f4854cc86bd951a0dd32d4 Mon Sep 17 00:00:00 2001 From: graehl Date: Sat, 21 Aug 2010 03:57:32 +0000 Subject: intern pool, agenda git-svn-id: https://ws10smt.googlecode.com/svn/trunk@613 ec762483-ff6d-05da-a07a-a48fb63a330f --- decoder/apply_fsa_models.cc | 2 +- utils/agenda.h | 18 +++++--- utils/hash.h | 2 + utils/intern_pool.h | 110 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 125 insertions(+), 7 deletions(-) create mode 100755 utils/intern_pool.h diff --git a/decoder/apply_fsa_models.cc b/decoder/apply_fsa_models.cc index f9c94ec3..c958f7c4 100755 --- a/decoder/apply_fsa_models.cc +++ b/decoder/apply_fsa_models.cc @@ -291,7 +291,7 @@ struct ItemP { struct Chart { //Agenda a; - //typedef HASH_MAP(Item,ItemP,boost::hash) Items; + //typedef HASH_MAP > Items; //typedef Items::iterator FindItem; //typedef std::pair InsertItem; // Items items; diff --git a/utils/agenda.h b/utils/agenda.h index 8198f077..639f35a9 100755 --- a/utils/agenda.h +++ b/utils/agenda.h @@ -37,7 +37,7 @@ */ #include "best.h" -#include "hash.h" +#include "intern_pool.h" #include "d_ary_heap.h" #include "lvalue_pmap.h" #include @@ -76,18 +76,24 @@ PMAP_MEMBER_INDIRECT(LocationMap,unsigned,location) PMAP_MEMBER_INDIRECT(PriorityMap,best_t,priority) // LocMap and PrioMap are boost property maps put(locmap,key,size_t), Better(get(priomap,k1),get(priomap,k2)) means k1 should be above k2 (be popped first). Locmap and PrioMap may have state; the rest are assumed stateless functors -template ,class Equal=std::equal_to,class Better=std::less > -struct Agenda { +template , /* intern_pool args */ class KeyF=get_key,class HashKey=boost::hash,class EqKey=std::equal_to, class Pool=boost::object_pool > +struct Agenda : intern_pool { /* this is less generic than it could be, because I want to use a single hash mapping to intern to canonical mutable object pointers, where the property maps are just lvalue accessors */ + typedef intern_pool Intern; // inherited because I want to use construct() + typedef Item * Handle; + typedef LocationMap LocMap; + typedef PriorityMap PrioMap; + LocMap locmap; + PrioMap priomap; + //NOT NEEDED: initialize function object state (there is none) + + /* typedef Item *CanonItem; static const std::size_t heap_arity=4; // might be fastest possible (depends on key size probably - cache locality is bad w/ arity=2) typedef std::vector HeapStorage; typedef boost::detail::d_ary_heap_indirect Heap; HASH_SET queued; - typedef LocationMap - LocMap locmap; - PrioMap priomap; Agenda(LocMap const& lm=LocMap(),PrioMap const& pm=PrioMap()) : locmap(lm), priomap(pm) { } */ }; diff --git a/utils/hash.h b/utils/hash.h index e9584dd5..17f150ff 100755 --- a/utils/hash.h +++ b/utils/hash.h @@ -8,12 +8,14 @@ #include "config.h" #ifdef HAVE_SPARSEHASH # include +# include # define HASH_MAP google::dense_hash_map # define HASH_SET google::dense_hash_set # define HASH_MAP_RESERVED(h,empty,deleted) do { h.set_empty_key(empty); h.set_deleted_key(deleted); } while(0) # define HASH_MAP_EMPTY(h,empty) do { h.set_empty_key(empty); } while(0) #else # include +# include # define HASH_MAP std::tr1::unordered_map # define HASH_SET std::tr1::unordered_set # define HASH_MAP_RESERVED(h,empty,deleted) diff --git a/utils/intern_pool.h b/utils/intern_pool.h new file mode 100755 index 00000000..c85c9881 --- /dev/null +++ b/utils/intern_pool.h @@ -0,0 +1,110 @@ +#ifndef INTERN_POOL_H +#define INTERN_POOL_H + +#define DEBUG_INTERN_POOL(x) x + +/* to "intern" a string in lisp is to make a symbol from it (a pointer to a canonical copy whose pointer can be equality-compared/hashed directly with other interned things). we take an Item that has a key part and some mutable parts (that aren't in its identity), and we hash-by-value the key part to map to a canonical on-heap Item - and we use a boost object pool to allocate them */ + +//FIXME: actually store function object state (assumed stateless so far) + +#include +#include "hash.h" +//#include "null_traits.h" +#include + +template +struct get_key { // default accessor for I = like pair + typedef typename I::first_type const& return_type; + typedef I const& argument_type; + return_type operator()(I const& i) const { + return i.first; + } +}; + +template +struct compose_indirect { + typedef Arg *argument_type; // also Arg + KeyF kf; + F f; + typedef typename F::return_type return_type; + return_type operator()(Arg p) const { + return f(kf(p)); + } + template + return_type operator()(Arg *p) const { + return f(kf(*p)); + } + +}; + +/* + +template +struct indirect_function { + F f; + explicit indirect_function(F const& f=F()) : f(f) {} + typedef typename F::return_type return_type; + template + return_type operator()(V *p) const { + return f(*p); + } +}; +*/ + +template ,class HashKey=boost::hash,class EqKey=std::equal_to, class Pool=boost::object_pool > +struct intern_pool : Pool { + KeyF key; + typedef typename KeyF::return_type Key; + typedef Item *Handle; + typedef compose_indirect HashDeep; + typedef compose_indirect EqDeep; + typedef HASH_SET Canonical; + typedef typename Canonical::iterator CFind; + typedef std::pair CInsert; + Canonical canonical; + void interneq(Handle &i) { + i=intern(i); + } +// inherited: Handle construct(...) + Handle construct_fresh() { return Pool::construct(); } + Handle intern(Handle i) { // (maybe invalidating i, returning a valid canonical handle (pointer) + CInsert i_new=canonical.insert(i); + if (i_new.second) + return i; + else { + free(i); + return *i_new->first; + } + } + void destroy_interned(Handle i) { + DEBUG_INTERN_POOL(assert(canonical.find(i)!=canonical.end())); + canonical.erase(i); + destroy(i); + } + bool destroy_fresh(Handle i) { + DEBUG_INTERN_POOL(assert(canonical.find(i)!=canonical.end()||*canonical.find(i)!=i)); // i is a constructed item not yet interned. + destroy(i); + } + void destroy_both(Handle i) { // i must have come from this pool. may be interned, or not. destroy both the noninterned and interned. + if (!destroy_if_interned(i)) destroy(i); + } + // destroy intern(i) if it exists. return true if it existed AND its address was i. otherwise return false (whether or not a value-equal item existed and was destroyed) + bool destroy_if_interned(Handle i) { + CFind f=canonical.find(i); + if (f!=canonical.end()) { + Handle interned=*f; + canonical.erase(f); + destroy(f); + if (f==i) return true; + } + return false; + } + + intern_pool() { + HASH_MAP_EMPTY(canonical,(Handle)0); + } +}; + + + +#endif -- cgit v1.2.3