From ead8845217c5e6e48f3680ead6f859ec8e110eb2 Mon Sep 17 00:00:00 2001 From: graehl Date: Fri, 13 Aug 2010 08:20:47 +0000 Subject: (NEEDS TESTING) cfg index rules->nts, sort by prob, remove duplicates keeping highest prob, topo sort (and after binarize topo sort). beginning to apply_fsa_models (PrefixTrieNode) git-svn-id: https://ws10smt.googlecode.com/svn/trunk@539 ec762483-ff6d-05da-a07a-a48fb63a330f --- utils/hash.h | 27 +++++++++++++++++++++++ utils/indices_after.h | 61 +++++++++++++++++++++++++++++++++++++++++++++++++-- utils/named_enum.h | 5 ++++- utils/small_vector.h | 24 ++++++++++++++++---- 4 files changed, 110 insertions(+), 7 deletions(-) (limited to 'utils') diff --git a/utils/hash.h b/utils/hash.h index e89b1863..b0b1c43e 100755 --- a/utils/hash.h +++ b/utils/hash.h @@ -58,4 +58,31 @@ typename H::mapped_type & get_default(H &ht,K const& k,typename H::mapped_type c return const_cast(ht.insert(typename H::value_type(k,v)).first->second); } +// the below could also return a ref to the mapped max/min. they have the advantage of not falsely claiming an improvement when an equal value already existed. otherwise you could just modify the get_default and if equal assume new. +template +bool improve_mapped_max(H &ht,K const& k,typename H::mapped_type const& v) { + std::pair inew=ht.insert(typename H::value_type(k,v)); + if (inew.second) return true; + typedef typename H::mapped_type V; + V &oldv=const_cast(inew.first->second); + if (oldv +bool improve_mapped_min(H &ht,K const& k,typename H::mapped_type const& v) { + std::pair inew=ht.insert(typename H::value_type(k,v)); + if (inew.second) return true; + typedef typename H::mapped_type V; + V &oldv=const_cast(inew.first->second); + if (v #include // STATIC_CONSTANT #include //swap #include @@ -48,19 +51,50 @@ unsigned new_indices(KEEP keep,O out) { return new_indices(keep.begin(),keep.end(),out); } +template +void copy_perm_to(Out o,V const& from,Permi i,Permi e) { + for (;i!=e;++i) + *o++=from[*i]; +} + +//to cannot be same as from, for most permutations. for to==from, use indices_after::init_inverse_order instead. +template +void remap_perm_to(Vto &to,Vfrom const& from,Perm const& p) { + to.resize(p.size()); + copy_perm_to(to.begin(),from,p.begin(),p.end()); +} + // given a vector and a parallel sequence of bools where true means keep, keep only the marked elements while maintaining order. // this is done with a parallel sequence to the input, marked with positions the kept items would map into in a destination array, with removed items marked with the index -1. the reverse would be more compact (parallel to destination array, index of input item that goes into it) but would require the input sequence be random access. struct indices_after { BOOST_STATIC_CONSTANT(unsigned,REMOVED=(unsigned)-1); unsigned *map; // map[i] == REMOVED if i is deleted - unsigned n_kept; + unsigned n_kept; // important to init this. unsigned n_mapped; template indices_after(AB i, ABe end) { init(i,end); } + template + void init_inverse_order(unsigned from_sz,Order const& order) { + init_inverse_order(from_sz,order.begin(),order.end()); + } + template + void init_inverse_order(unsigned from_sz,OrderI i,OrderI end) { + init_alloc(from_sz); + unsigned d=0; + n_kept=0; + for(;i!=end;++i) { + assert(d void init_keep_if(Vec v,R const& r) { @@ -69,6 +103,25 @@ struct indices_after map=(unsigned *)::operator new(sizeof(unsigned)*n_mapped); n_kept=new_indices_keep_if_n(n_mapped,r,map); } + // contents uninit. + void init_alloc(unsigned n) { + free(); + n_mapped=n; + map=n_mapped>0 ? + (unsigned *)::operator new(sizeof(unsigned)*n_mapped) + : 0; + } + void init_const(unsigned n,unsigned map_all_to) { + init_alloc(n); + for (unsigned i=0;i void init(AB i, ABe end) { @@ -93,9 +146,14 @@ struct indices_after indices_after() : n_mapped(0) {} ~indices_after() { + free(); + } + void free() { if (n_mapped) ::operator delete((void*)map); + n_mapped=0; } + bool removing(unsigned i) const { return map[i] == REMOVED; @@ -127,7 +185,6 @@ struct indices_after { using std::swap; assert(v.size()==n_mapped); - unsigned r=n_mapped; unsigned i=0; for (;iint (int->string already fast w/ switch) - then implement iterators that don't assume contiguous ids. //TODO: hidden (can't convert string->id, but can do reverse) sentinel values. XX (hidden) and XY (can convert to) //TODO: bitfield "A|B" strings - note: slightly complicates int->string, as well. //TODO: option for case-insensitive compare (ctype tolower?) +//TODO: program_options validate method so you can declare po::value instead of po::value? +//TODO: cout << MyEnum ? +//impossible: (without wrapping in struct) MyEnum(string) /* named enum (string<->int). note: inefficient linear search for string->int diff --git a/utils/small_vector.h b/utils/small_vector.h index 077a524a..8c1c3bfe 100644 --- a/utils/small_vector.h +++ b/utils/small_vector.h @@ -15,6 +15,8 @@ #include #include #include "swap_pod.h" +#include + //sizeof(T)/sizeof(T*)>1?sizeof(T)/sizeof(T*):1 template @@ -250,6 +252,15 @@ public: swap_pod(*this,o); } + inline std::size_t hash() const { + using namespace boost; + if (size_==0) return 0; + if (size_==1) return hash_value(data_.vals[0]); + if (size<= SV_MAX) + return hash_range(data_.vals,data_.vals+size_); + return hash_range(data_.ptr,data_.ptr+size_); + } + private: union StorageType { T vals[SV_MAX]; @@ -260,15 +271,20 @@ public: uint16_t capacity_; // only defined when size_ > __SV_MAX_STATIC }; -template -inline void swap(SmallVector &a,SmallVector &b) { +template +std::size_t hash_value(SmallVector const& x) { + return x.hash(); +} + +template +inline void swap(SmallVector &a,SmallVector &b) { a.swap(b); } typedef SmallVector SmallVectorInt; -template -void memcpy(void *out,SmallVector const& v) { +template +void memcpy(void *out,SmallVector const& v) { std::memcpy(out,v.begin(),v.size()*sizeof(T)); } -- cgit v1.2.3