summaryrefslogtreecommitdiff
path: root/utils
diff options
context:
space:
mode:
authorgraehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-08-13 08:20:47 +0000
committergraehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-08-13 08:20:47 +0000
commit5fd3e970bf9ef0ce243a9c66c791c6b420b2a86a (patch)
tree33a7995d86e3ba10631c2aa68a3b53dfd9390248 /utils
parente60301e1be0a0d5bb44f1efd40ff43316121d4d3 (diff)
(NEEDS TESTING) cfg index rules->nts, sort by prob, remove duplicates keeping highest prob, topo sort (and after binarize topo sort). beginning to apply_fsa_models (PrefixTrieNode)
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@539 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'utils')
-rwxr-xr-xutils/hash.h27
-rwxr-xr-xutils/indices_after.h61
-rwxr-xr-xutils/named_enum.h5
-rw-r--r--utils/small_vector.h24
4 files changed, 110 insertions, 7 deletions
diff --git a/utils/hash.h b/utils/hash.h
index e89b1863..b0b1c43e 100755
--- a/utils/hash.h
+++ b/utils/hash.h
@@ -58,4 +58,31 @@ typename H::mapped_type & get_default(H &ht,K const& k,typename H::mapped_type c
return const_cast<typename H::mapped_type &>(ht.insert(typename H::value_type(k,v)).first->second);
}
+// the below could also return a ref to the mapped max/min. they have the advantage of not falsely claiming an improvement when an equal value already existed. otherwise you could just modify the get_default and if equal assume new.
+template <class H,class K>
+bool improve_mapped_max(H &ht,K const& k,typename H::mapped_type const& v) {
+ std::pair<typename H::iterator,bool> inew=ht.insert(typename H::value_type(k,v));
+ if (inew.second) return true;
+ typedef typename H::mapped_type V;
+ V &oldv=const_cast<V&>(inew.first->second);
+ if (oldv<v) {
+ oldv=v;
+ return true;
+ }
+ return false;
+}
+
+template <class H,class K>
+bool improve_mapped_min(H &ht,K const& k,typename H::mapped_type const& v) {
+ std::pair<typename H::iterator,bool> inew=ht.insert(typename H::value_type(k,v));
+ if (inew.second) return true;
+ typedef typename H::mapped_type V;
+ V &oldv=const_cast<V&>(inew.first->second);
+ if (v<oldv) { // the only difference from above
+ oldv=v;
+ return true;
+ }
+ return false;
+}
+
#endif
diff --git a/utils/indices_after.h b/utils/indices_after.h
index 2b662d5c..2891563c 100755
--- a/utils/indices_after.h
+++ b/utils/indices_after.h
@@ -1,6 +1,9 @@
#ifndef INDICES_AFTER_REMOVING_H
#define INDICES_AFTER_REMOVING_H
+//TODO: instead of making REMOVED a constant, make it class state (defaults to -1 as before, of course).
+
+#include <assert.h>
#include <boost/config.hpp> // STATIC_CONSTANT
#include <algorithm> //swap
#include <iterator>
@@ -48,19 +51,50 @@ unsigned new_indices(KEEP keep,O out) {
return new_indices(keep.begin(),keep.end(),out);
}
+template <class V,class Out,class Permi>
+void copy_perm_to(Out o,V const& from,Permi i,Permi e) {
+ for (;i!=e;++i)
+ *o++=from[*i];
+}
+
+//to cannot be same as from, for most permutations. for to==from, use indices_after::init_inverse_order instead.
+template <class Vto,class Vfrom,class Perm>
+void remap_perm_to(Vto &to,Vfrom const& from,Perm const& p) {
+ to.resize(p.size());
+ copy_perm_to(to.begin(),from,p.begin(),p.end());
+}
+
// given a vector and a parallel sequence of bools where true means keep, keep only the marked elements while maintaining order.
// this is done with a parallel sequence to the input, marked with positions the kept items would map into in a destination array, with removed items marked with the index -1. the reverse would be more compact (parallel to destination array, index of input item that goes into it) but would require the input sequence be random access.
struct indices_after
{
BOOST_STATIC_CONSTANT(unsigned,REMOVED=(unsigned)-1);
unsigned *map; // map[i] == REMOVED if i is deleted
- unsigned n_kept;
+ unsigned n_kept; // important to init this.
unsigned n_mapped;
template <class AB,class ABe>
indices_after(AB i, ABe end) {
init(i,end);
}
+ template <class Order>
+ void init_inverse_order(unsigned from_sz,Order const& order) {
+ init_inverse_order(from_sz,order.begin(),order.end());
+ }
+ template <class OrderI>
+ void init_inverse_order(unsigned from_sz,OrderI i,OrderI end) {
+ init_alloc(from_sz);
+ unsigned d=0;
+ n_kept=0;
+ for(;i!=end;++i) {
+ assert(d<from_sz);
+ map[d++]=*i;
+ ++n_kept;
+ }
+ for(;d<from_sz;++d)
+ map[d]=REMOVED;
+ }
+
template <class Vec,class R>
void init_keep_if(Vec v,R const& r)
{
@@ -69,6 +103,25 @@ struct indices_after
map=(unsigned *)::operator new(sizeof(unsigned)*n_mapped);
n_kept=new_indices_keep_if_n(n_mapped,r,map);
}
+ // contents uninit.
+ void init_alloc(unsigned n) {
+ free();
+ n_mapped=n;
+ map=n_mapped>0 ?
+ (unsigned *)::operator new(sizeof(unsigned)*n_mapped)
+ : 0;
+ }
+ void init_const(unsigned n,unsigned map_all_to) {
+ init_alloc(n);
+ for (unsigned i=0;i<n;++i)
+ map[i]=map_all_to;
+ n_kept=(map_all_to==REMOVED)?0:n;
+ }
+
+ void init_keep_none(unsigned n) {
+ init_const(n,REMOVED);
+ n_kept=0;
+ }
template <class AB,class ABe>
void init(AB i, ABe end) {
@@ -93,9 +146,14 @@ struct indices_after
indices_after() : n_mapped(0) {}
~indices_after()
{
+ free();
+ }
+ void free() {
if (n_mapped)
::operator delete((void*)map);
+ n_mapped=0;
}
+
bool removing(unsigned i) const
{
return map[i] == REMOVED;
@@ -127,7 +185,6 @@ struct indices_after
{
using std::swap;
assert(v.size()==n_mapped);
- unsigned r=n_mapped;
unsigned i=0;
for (;i<n_mapped&&keeping(i);++i) ;
for(;i<n_mapped;++i)
diff --git a/utils/named_enum.h b/utils/named_enum.h
index 447dff78..675ec868 100755
--- a/utils/named_enum.h
+++ b/utils/named_enum.h
@@ -2,13 +2,16 @@
#define NAMED_ENUM_H
#ifndef NAMED_ENUM_USE_OPTIONAL
-# define NAMED_ENUM_USE_OPTIONAL 1
+# define NAMED_ENUM_USE_OPTIONAL 0
#endif
//TODO: efficiency - supply map type (e.g. std::map or tr1::unordered_map) for string->int (int->string already fast w/ switch) - then implement iterators that don't assume contiguous ids.
//TODO: hidden (can't convert string->id, but can do reverse) sentinel values. XX (hidden) and XY (can convert to)
//TODO: bitfield "A|B" strings - note: slightly complicates int->string, as well.
//TODO: option for case-insensitive compare (ctype tolower?)
+//TODO: program_options validate method so you can declare po::value<MyEnum> instead of po::value<string>?
+//TODO: cout << MyEnum ?
+//impossible: (without wrapping in struct) MyEnum(string)
/* named enum (string<->int). note: inefficient linear search for string->int
diff --git a/utils/small_vector.h b/utils/small_vector.h
index 077a524a..8c1c3bfe 100644
--- a/utils/small_vector.h
+++ b/utils/small_vector.h
@@ -15,6 +15,8 @@
#include <new>
#include <stdint.h>
#include "swap_pod.h"
+#include <boost/functional/hash.hpp>
+
//sizeof(T)/sizeof(T*)>1?sizeof(T)/sizeof(T*):1
template <class T,int SV_MAX=2>
@@ -250,6 +252,15 @@ public:
swap_pod(*this,o);
}
+ inline std::size_t hash() const {
+ using namespace boost;
+ if (size_==0) return 0;
+ if (size_==1) return hash_value(data_.vals[0]);
+ if (size<= SV_MAX)
+ return hash_range(data_.vals,data_.vals+size_);
+ return hash_range(data_.ptr,data_.ptr+size_);
+ }
+
private:
union StorageType {
T vals[SV_MAX];
@@ -260,15 +271,20 @@ public:
uint16_t capacity_; // only defined when size_ > __SV_MAX_STATIC
};
-template <class T,int SV_MAX>
-inline void swap(SmallVector<T,SV_MAX> &a,SmallVector<T,SV_MAX> &b) {
+template <class T,int M>
+std::size_t hash_value(SmallVector<T,M> const& x) {
+ return x.hash();
+}
+
+template <class T,int M>
+inline void swap(SmallVector<T,M> &a,SmallVector<T,M> &b) {
a.swap(b);
}
typedef SmallVector<int,2> SmallVectorInt;
-template <class T,int N>
-void memcpy(void *out,SmallVector<T,N> const& v) {
+template <class T,int M>
+void memcpy(void *out,SmallVector<T,M> const& v) {
std::memcpy(out,v.begin(),v.size()*sizeof(T));
}