diff options
author | graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-08-13 08:20:47 +0000 |
---|---|---|
committer | graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-08-13 08:20:47 +0000 |
commit | 5fd3e970bf9ef0ce243a9c66c791c6b420b2a86a (patch) | |
tree | 33a7995d86e3ba10631c2aa68a3b53dfd9390248 /utils | |
parent | e60301e1be0a0d5bb44f1efd40ff43316121d4d3 (diff) |
(NEEDS TESTING) cfg index rules->nts, sort by prob, remove duplicates keeping highest prob, topo sort (and after binarize topo sort). beginning to apply_fsa_models (PrefixTrieNode)
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@539 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'utils')
-rwxr-xr-x | utils/hash.h | 27 | ||||
-rwxr-xr-x | utils/indices_after.h | 61 | ||||
-rwxr-xr-x | utils/named_enum.h | 5 | ||||
-rw-r--r-- | utils/small_vector.h | 24 |
4 files changed, 110 insertions, 7 deletions
diff --git a/utils/hash.h b/utils/hash.h index e89b1863..b0b1c43e 100755 --- a/utils/hash.h +++ b/utils/hash.h @@ -58,4 +58,31 @@ typename H::mapped_type & get_default(H &ht,K const& k,typename H::mapped_type c return const_cast<typename H::mapped_type &>(ht.insert(typename H::value_type(k,v)).first->second); } +// the below could also return a ref to the mapped max/min. they have the advantage of not falsely claiming an improvement when an equal value already existed. otherwise you could just modify the get_default and if equal assume new. +template <class H,class K> +bool improve_mapped_max(H &ht,K const& k,typename H::mapped_type const& v) { + std::pair<typename H::iterator,bool> inew=ht.insert(typename H::value_type(k,v)); + if (inew.second) return true; + typedef typename H::mapped_type V; + V &oldv=const_cast<V&>(inew.first->second); + if (oldv<v) { + oldv=v; + return true; + } + return false; +} + +template <class H,class K> +bool improve_mapped_min(H &ht,K const& k,typename H::mapped_type const& v) { + std::pair<typename H::iterator,bool> inew=ht.insert(typename H::value_type(k,v)); + if (inew.second) return true; + typedef typename H::mapped_type V; + V &oldv=const_cast<V&>(inew.first->second); + if (v<oldv) { // the only difference from above + oldv=v; + return true; + } + return false; +} + #endif diff --git a/utils/indices_after.h b/utils/indices_after.h index 2b662d5c..2891563c 100755 --- a/utils/indices_after.h +++ b/utils/indices_after.h @@ -1,6 +1,9 @@ #ifndef INDICES_AFTER_REMOVING_H #define INDICES_AFTER_REMOVING_H +//TODO: instead of making REMOVED a constant, make it class state (defaults to -1 as before, of course). + +#include <assert.h> #include <boost/config.hpp> // STATIC_CONSTANT #include <algorithm> //swap #include <iterator> @@ -48,19 +51,50 @@ unsigned new_indices(KEEP keep,O out) { return new_indices(keep.begin(),keep.end(),out); } +template <class V,class Out,class Permi> +void copy_perm_to(Out o,V const& from,Permi i,Permi e) { + for (;i!=e;++i) + *o++=from[*i]; +} + +//to cannot be same as from, for most permutations. for to==from, use indices_after::init_inverse_order instead. +template <class Vto,class Vfrom,class Perm> +void remap_perm_to(Vto &to,Vfrom const& from,Perm const& p) { + to.resize(p.size()); + copy_perm_to(to.begin(),from,p.begin(),p.end()); +} + // given a vector and a parallel sequence of bools where true means keep, keep only the marked elements while maintaining order. // this is done with a parallel sequence to the input, marked with positions the kept items would map into in a destination array, with removed items marked with the index -1. the reverse would be more compact (parallel to destination array, index of input item that goes into it) but would require the input sequence be random access. struct indices_after { BOOST_STATIC_CONSTANT(unsigned,REMOVED=(unsigned)-1); unsigned *map; // map[i] == REMOVED if i is deleted - unsigned n_kept; + unsigned n_kept; // important to init this. unsigned n_mapped; template <class AB,class ABe> indices_after(AB i, ABe end) { init(i,end); } + template <class Order> + void init_inverse_order(unsigned from_sz,Order const& order) { + init_inverse_order(from_sz,order.begin(),order.end()); + } + template <class OrderI> + void init_inverse_order(unsigned from_sz,OrderI i,OrderI end) { + init_alloc(from_sz); + unsigned d=0; + n_kept=0; + for(;i!=end;++i) { + assert(d<from_sz); + map[d++]=*i; + ++n_kept; + } + for(;d<from_sz;++d) + map[d]=REMOVED; + } + template <class Vec,class R> void init_keep_if(Vec v,R const& r) { @@ -69,6 +103,25 @@ struct indices_after map=(unsigned *)::operator new(sizeof(unsigned)*n_mapped); n_kept=new_indices_keep_if_n(n_mapped,r,map); } + // contents uninit. + void init_alloc(unsigned n) { + free(); + n_mapped=n; + map=n_mapped>0 ? + (unsigned *)::operator new(sizeof(unsigned)*n_mapped) + : 0; + } + void init_const(unsigned n,unsigned map_all_to) { + init_alloc(n); + for (unsigned i=0;i<n;++i) + map[i]=map_all_to; + n_kept=(map_all_to==REMOVED)?0:n; + } + + void init_keep_none(unsigned n) { + init_const(n,REMOVED); + n_kept=0; + } template <class AB,class ABe> void init(AB i, ABe end) { @@ -93,9 +146,14 @@ struct indices_after indices_after() : n_mapped(0) {} ~indices_after() { + free(); + } + void free() { if (n_mapped) ::operator delete((void*)map); + n_mapped=0; } + bool removing(unsigned i) const { return map[i] == REMOVED; @@ -127,7 +185,6 @@ struct indices_after { using std::swap; assert(v.size()==n_mapped); - unsigned r=n_mapped; unsigned i=0; for (;i<n_mapped&&keeping(i);++i) ; for(;i<n_mapped;++i) diff --git a/utils/named_enum.h b/utils/named_enum.h index 447dff78..675ec868 100755 --- a/utils/named_enum.h +++ b/utils/named_enum.h @@ -2,13 +2,16 @@ #define NAMED_ENUM_H #ifndef NAMED_ENUM_USE_OPTIONAL -# define NAMED_ENUM_USE_OPTIONAL 1 +# define NAMED_ENUM_USE_OPTIONAL 0 #endif //TODO: efficiency - supply map type (e.g. std::map or tr1::unordered_map) for string->int (int->string already fast w/ switch) - then implement iterators that don't assume contiguous ids. //TODO: hidden (can't convert string->id, but can do reverse) sentinel values. XX (hidden) and XY (can convert to) //TODO: bitfield "A|B" strings - note: slightly complicates int->string, as well. //TODO: option for case-insensitive compare (ctype tolower?) +//TODO: program_options validate method so you can declare po::value<MyEnum> instead of po::value<string>? +//TODO: cout << MyEnum ? +//impossible: (without wrapping in struct) MyEnum(string) /* named enum (string<->int). note: inefficient linear search for string->int diff --git a/utils/small_vector.h b/utils/small_vector.h index 077a524a..8c1c3bfe 100644 --- a/utils/small_vector.h +++ b/utils/small_vector.h @@ -15,6 +15,8 @@ #include <new> #include <stdint.h> #include "swap_pod.h" +#include <boost/functional/hash.hpp> + //sizeof(T)/sizeof(T*)>1?sizeof(T)/sizeof(T*):1 template <class T,int SV_MAX=2> @@ -250,6 +252,15 @@ public: swap_pod(*this,o); } + inline std::size_t hash() const { + using namespace boost; + if (size_==0) return 0; + if (size_==1) return hash_value(data_.vals[0]); + if (size<= SV_MAX) + return hash_range(data_.vals,data_.vals+size_); + return hash_range(data_.ptr,data_.ptr+size_); + } + private: union StorageType { T vals[SV_MAX]; @@ -260,15 +271,20 @@ public: uint16_t capacity_; // only defined when size_ > __SV_MAX_STATIC }; -template <class T,int SV_MAX> -inline void swap(SmallVector<T,SV_MAX> &a,SmallVector<T,SV_MAX> &b) { +template <class T,int M> +std::size_t hash_value(SmallVector<T,M> const& x) { + return x.hash(); +} + +template <class T,int M> +inline void swap(SmallVector<T,M> &a,SmallVector<T,M> &b) { a.swap(b); } typedef SmallVector<int,2> SmallVectorInt; -template <class T,int N> -void memcpy(void *out,SmallVector<T,N> const& v) { +template <class T,int M> +void memcpy(void *out,SmallVector<T,M> const& v) { std::memcpy(out,v.begin(),v.size()*sizeof(T)); } |