diff options
Diffstat (limited to 'gi/pyp-topics/src/pyp.hh')
-rw-r--r-- | gi/pyp-topics/src/pyp.hh | 52 |
1 files changed, 37 insertions, 15 deletions
diff --git a/gi/pyp-topics/src/pyp.hh b/gi/pyp-topics/src/pyp.hh index 7a520d6a..dc47244b 100644 --- a/gi/pyp-topics/src/pyp.hh +++ b/gi/pyp-topics/src/pyp.hh @@ -4,6 +4,7 @@ #include <math.h> #include <map> #include <tr1/unordered_map> +//#include <google/sparse_hash_map> #include <boost/random/uniform_real.hpp> #include <boost/random/variate_generator.hpp> @@ -11,6 +12,7 @@ #include "log_add.h" #include "slice-sampler.h" +#include "mt19937ar.h" // // Pitman-Yor process with customer and table tracking @@ -18,12 +20,17 @@ template <typename Dish, typename Hash=std::tr1::hash<Dish> > class PYP : protected std::tr1::unordered_map<Dish, int, Hash> +//class PYP : protected google::sparse_hash_map<Dish, int, Hash> { public: using std::tr1::unordered_map<Dish,int>::const_iterator; using std::tr1::unordered_map<Dish,int>::iterator; using std::tr1::unordered_map<Dish,int>::begin; using std::tr1::unordered_map<Dish,int>::end; +// using google::sparse_hash_map<Dish,int>::const_iterator; +// using google::sparse_hash_map<Dish,int>::iterator; +// using google::sparse_hash_map<Dish,int>::begin; +// using google::sparse_hash_map<Dish,int>::end; PYP(double a, double b, unsigned long seed = 0, Hash hash=Hash()); @@ -39,6 +46,7 @@ public: int num_customers() const { return _total_customers; } int num_types() const { return std::tr1::unordered_map<Dish,int>::size(); } + //int num_types() const { return google::sparse_hash_map<Dish,int>::size(); } bool empty() const { return _total_customers == 0; } double log_prob(Dish dish, double log_p0) const; @@ -79,6 +87,7 @@ private: std::map<int, int> table_histogram; // num customers at table -> number tables }; typedef std::tr1::unordered_map<Dish, TableCounter, Hash> DishTableType; + //typedef google::sparse_hash_map<Dish, TableCounter, Hash> DishTableType; DishTableType _dish_tables; int _total_customers, _total_tables; @@ -86,11 +95,10 @@ private: typedef boost::uniform_real<> uni_dist_type; typedef boost::variate_generator<base_generator_type&, uni_dist_type> gen_type; - uni_dist_type uni_dist; - base_generator_type rng; //this gets the seed - gen_type rnd; //instantiate: rnd(rng, uni_dist) +// uni_dist_type uni_dist; +// base_generator_type rng; //this gets the seed +// gen_type rnd; //instantiate: rnd(rng, uni_dist) //call: rnd() generates uniform on [0,1) - // Function objects for calculating the parts of the log_prob for // the parameters a and b @@ -132,12 +140,12 @@ private: } }; - /* lbetadist() returns the log probability density of x under a Beta(alpha,beta) + /* lbetadist() returns the log probability density of x under a Beta(alpha,beta) * distribution. - copied from Mark Johnson's gammadist.c */ - static long double lbetadist(long double x, long double alpha, long double beta); + static long double lbetadist(long double x, long double alpha, long double beta); - /* lgammadist() returns the log probability density of x under a Gamma(alpha,beta) + /* lgammadist() returns the log probability density of x under a Gamma(alpha,beta) * distribution - copied from Mark Johnson's gammadist.c */ static long double lgammadist(long double x, long double alpha, long double beta); @@ -146,13 +154,15 @@ private: template <typename Dish, typename Hash> PYP<Dish,Hash>::PYP(double a, double b, unsigned long seed, Hash) -: std::tr1::unordered_map<Dish, int, Hash>(), _a(a), _b(b), +: std::tr1::unordered_map<Dish, int, Hash>(10), _a(a), _b(b), +//: google::sparse_hash_map<Dish, int, Hash>(10), _a(a), _b(b), _a_beta_a(1), _a_beta_b(1), _b_gamma_s(1), _b_gamma_c(1), //_a_beta_a(1), _a_beta_b(1), _b_gamma_s(10), _b_gamma_c(0.1), - _total_customers(0), _total_tables(0), - uni_dist(0,1), rng(seed == 0 ? (unsigned long)this : seed), rnd(rng, uni_dist) + _total_customers(0), _total_tables(0)//, + //uni_dist(0,1), rng(seed == 0 ? (unsigned long)this : seed), rnd(rng, uni_dist) { // std::cerr << "\t##PYP<Dish,Hash>::PYP(a=" << _a << ",b=" << _b << ")" << std::endl; + //set_deleted_key(-std::numeric_limits<Dish>::max()); } template <typename Dish, typename Hash> @@ -235,7 +245,8 @@ PYP<Dish,Hash>::increment(Dish dish, double p0) { assert (pshare >= 0.0); //assert (pnew > 0.0); - if (rnd() < pnew / (pshare + pnew)) { + //if (rnd() < pnew / (pshare + pnew)) { + if (mt_genrand_res53() < pnew / (pshare + pnew)) { // assign to a new table tc.tables += 1; tc.table_histogram[1] += 1; @@ -245,7 +256,8 @@ PYP<Dish,Hash>::increment(Dish dish, double p0) { else { // randomly assign to an existing table // remove constant denominator from inner loop - double r = rnd() * (c - _a*t); + //double r = rnd() * (c - _a*t); + double r = mt_genrand_res53() * (c - _a*t); for (std::map<int,int>::iterator hit = tc.table_histogram.begin(); hit != tc.table_histogram.end(); ++hit) { @@ -266,6 +278,7 @@ PYP<Dish,Hash>::increment(Dish dish, double p0) { } std::tr1::unordered_map<Dish,int,Hash>::operator[](dish) += 1; + //google::sparse_hash_map<Dish,int,Hash>::operator[](dish) += 1; _total_customers += 1; return delta; @@ -276,6 +289,7 @@ int PYP<Dish,Hash>::count(Dish dish) const { typename std::tr1::unordered_map<Dish, int>::const_iterator + //typename google::sparse_hash_map<Dish, int>::const_iterator dcit = find(dish); if (dcit != end()) return dcit->second; @@ -288,6 +302,7 @@ int PYP<Dish,Hash>::decrement(Dish dish) { typename std::tr1::unordered_map<Dish, int>::iterator dcit = find(dish); + //typename google::sparse_hash_map<Dish, int>::iterator dcit = find(dish); if (dcit == end()) { std::cerr << dish << std::endl; assert(false); @@ -296,6 +311,7 @@ PYP<Dish,Hash>::decrement(Dish dish) int delta = 0; typename std::tr1::unordered_map<Dish, TableCounter>::iterator dtit = _dish_tables.find(dish); + //typename google::sparse_hash_map<Dish, TableCounter>::iterator dtit = _dish_tables.find(dish); if (dtit == _dish_tables.end()) { std::cerr << dish << std::endl; assert(false); @@ -307,7 +323,8 @@ PYP<Dish,Hash>::decrement(Dish dish) //std::cerr << "count: " << count(dish) << " "; //std::cerr << "tables: " << tc.tables << "\n"; - double r = rnd() * count(dish); + //double r = rnd() * count(dish); + double r = mt_genrand_res53() * count(dish); for (std::map<int,int>::iterator hit = tc.table_histogram.begin(); hit != tc.table_histogram.end(); ++hit) { @@ -357,6 +374,7 @@ int PYP<Dish,Hash>::num_tables(Dish dish) const { typename std::tr1::unordered_map<Dish, TableCounter, Hash>::const_iterator + //typename google::sparse_hash_map<Dish, TableCounter, Hash>::const_iterator dtit = _dish_tables.find(dish); //assert(dtit != _dish_tables.end()); @@ -379,6 +397,7 @@ PYP<Dish,Hash>::debug_info(std::ostream& os) const { int hists = 0, tables = 0; for (typename std::tr1::unordered_map<Dish, TableCounter, Hash>::const_iterator + //for (typename google::sparse_hash_map<Dish, TableCounter, Hash>::const_iterator dtit = _dish_tables.begin(); dtit != _dish_tables.end(); ++dtit) { hists += dtit->second.table_histogram.size(); @@ -409,6 +428,7 @@ void PYP<Dish,Hash>::clear() { this->std::tr1::unordered_map<Dish,int,Hash>::clear(); + //this->google::sparse_hash_map<Dish,int,Hash>::clear(); _dish_tables.clear(); _total_tables = _total_customers = 0; } @@ -509,7 +529,8 @@ PYP<Dish,Hash>::resample_prior_b() { int niterations = 10; // number of resampling iterations //std::cerr << "\n## resample_prior_b(), initial a = " << _a << ", b = " << _b << std::endl; resample_b_type b_log_prob(_total_customers, _total_tables, _a, _b_gamma_c, _b_gamma_s); - _b = slice_sampler1d(b_log_prob, _b, rnd, (double) 0.0, std::numeric_limits<double>::infinity(), + //_b = slice_sampler1d(b_log_prob, _b, rnd, (double) 0.0, std::numeric_limits<double>::infinity(), + _b = slice_sampler1d(b_log_prob, _b, random, (double) 0.0, std::numeric_limits<double>::infinity(), (double) 0.0, niterations, 100*niterations); //std::cerr << "\n## resample_prior_b(), final a = " << _a << ", b = " << _b << std::endl; } @@ -523,7 +544,8 @@ PYP<Dish,Hash>::resample_prior_a() { int niterations = 10; //std::cerr << "\n## Initial a = " << _a << ", b = " << _b << std::endl; resample_a_type a_log_prob(_total_customers, _total_tables, _b, _a_beta_a, _a_beta_b, _dish_tables); - _a = slice_sampler1d(a_log_prob, _a, rnd, std::numeric_limits<double>::min(), + //_a = slice_sampler1d(a_log_prob, _a, rnd, std::numeric_limits<double>::min(), + _a = slice_sampler1d(a_log_prob, _a, random, std::numeric_limits<double>::min(), (double) 1.0, (double) 0.0, niterations, 100*niterations); } |