summaryrefslogtreecommitdiff
path: root/gi/clda/src
diff options
context:
space:
mode:
authorKenneth Heafield <github@kheafield.com>2012-10-22 12:07:20 +0100
committerKenneth Heafield <github@kheafield.com>2012-10-22 12:07:20 +0100
commit5f98fe5c4f2a2090eeb9d30c030305a70a8347d1 (patch)
tree9b6002f850e6dea1e3400c6b19bb31a9cdf3067f /gi/clda/src
parentcf9994131993b40be62e90e213b1e11e6b550143 (diff)
parent21825a09d97c2e0afd20512f306fb25fed55e529 (diff)
Merge remote branch 'upstream/master'
Conflicts: Jamroot bjam decoder/Jamfile decoder/cdec.cc dpmert/Jamfile jam-files/sanity.jam klm/lm/Jamfile klm/util/Jamfile mira/Jamfile
Diffstat (limited to 'gi/clda/src')
-rw-r--r--gi/clda/src/Makefile.am6
-rw-r--r--gi/clda/src/ccrp.h291
-rw-r--r--gi/clda/src/clda.cc148
-rw-r--r--gi/clda/src/crp.h50
-rw-r--r--gi/clda/src/slice_sampler.h191
-rw-r--r--gi/clda/src/timer.h20
6 files changed, 0 insertions, 706 deletions
diff --git a/gi/clda/src/Makefile.am b/gi/clda/src/Makefile.am
deleted file mode 100644
index cdca1f97..00000000
--- a/gi/clda/src/Makefile.am
+++ /dev/null
@@ -1,6 +0,0 @@
-bin_PROGRAMS = clda
-
-clda_SOURCES = clda.cc
-
-AM_CPPFLAGS = -W -Wall -Wno-sign-compare -funroll-loops -I$(top_srcdir)/utils $(GTEST_CPPFLAGS)
-AM_LDFLAGS = $(top_srcdir)/utils/libutils.a -lz
diff --git a/gi/clda/src/ccrp.h b/gi/clda/src/ccrp.h
deleted file mode 100644
index a7c2825c..00000000
--- a/gi/clda/src/ccrp.h
+++ /dev/null
@@ -1,291 +0,0 @@
-#ifndef _CCRP_H_
-#define _CCRP_H_
-
-#include <numeric>
-#include <cassert>
-#include <cmath>
-#include <list>
-#include <iostream>
-#include <vector>
-#include <tr1/unordered_map>
-#include <boost/functional/hash.hpp>
-#include "sampler.h"
-#include "slice_sampler.h"
-
-// Chinese restaurant process (Pitman-Yor parameters) with table tracking.
-
-template <typename Dish, typename DishHash = boost::hash<Dish> >
-class CCRP {
- public:
- CCRP(double disc, double conc) :
- num_tables_(),
- num_customers_(),
- discount_(disc),
- concentration_(conc),
- discount_prior_alpha_(std::numeric_limits<double>::quiet_NaN()),
- discount_prior_beta_(std::numeric_limits<double>::quiet_NaN()),
- concentration_prior_shape_(std::numeric_limits<double>::quiet_NaN()),
- concentration_prior_rate_(std::numeric_limits<double>::quiet_NaN()) {}
-
- CCRP(double d_alpha, double d_beta, double c_shape, double c_rate, double d = 0.1, double c = 10.0) :
- num_tables_(),
- num_customers_(),
- discount_(d),
- concentration_(c),
- discount_prior_alpha_(d_alpha),
- discount_prior_beta_(d_beta),
- concentration_prior_shape_(c_shape),
- concentration_prior_rate_(c_rate) {}
-
- double discount() const { return discount_; }
- double concentration() const { return concentration_; }
-
- bool has_discount_prior() const {
- return !std::isnan(discount_prior_alpha_);
- }
-
- bool has_concentration_prior() const {
- return !std::isnan(concentration_prior_shape_);
- }
-
- void clear() {
- num_tables_ = 0;
- num_customers_ = 0;
- dish_locs_.clear();
- }
-
- unsigned num_tables(const Dish& dish) const {
- const typename std::tr1::unordered_map<Dish, DishLocations, DishHash>::const_iterator it = dish_locs_.find(dish);
- if (it == dish_locs_.end()) return 0;
- return it->second.table_counts_.size();
- }
-
- unsigned num_customers() const {
- return num_customers_;
- }
-
- unsigned num_customers(const Dish& dish) const {
- const typename std::tr1::unordered_map<Dish, DishLocations, DishHash>::const_iterator it = dish_locs_.find(dish);
- if (it == dish_locs_.end()) return 0;
- return it->total_dish_count_;
- }
-
- // returns +1 or 0 indicating whether a new table was opened
- int increment(const Dish& dish, const double& p0, MT19937* rng) {
- DishLocations& loc = dish_locs_[dish];
- bool share_table = false;
- if (loc.total_dish_count_) {
- const double p_empty = (concentration_ + num_tables_ * discount_) * p0;
- const double p_share = (loc.total_dish_count_ - loc.table_counts_.size() * discount_);
- share_table = rng->SelectSample(p_empty, p_share);
- }
- if (share_table) {
- double r = rng->next() * (loc.total_dish_count_ - loc.table_counts_.size() * discount_);
- for (typename std::list<unsigned>::iterator ti = loc.table_counts_.begin();
- ti != loc.table_counts_.end(); ++ti) {
- r -= (*ti - discount_);
- if (r <= 0.0) {
- ++(*ti);
- break;
- }
- }
- if (r > 0.0) {
- std::cerr << "Serious error: r=" << r << std::endl;
- Print(&std::cerr);
- assert(r <= 0.0);
- }
- } else {
- loc.table_counts_.push_back(1u);
- ++num_tables_;
- }
- ++loc.total_dish_count_;
- ++num_customers_;
- return (share_table ? 0 : 1);
- }
-
- // returns -1 or 0, indicating whether a table was closed
- int decrement(const Dish& dish, MT19937* rng) {
- DishLocations& loc = dish_locs_[dish];
- assert(loc.total_dish_count_);
- if (loc.total_dish_count_ == 1) {
- dish_locs_.erase(dish);
- --num_tables_;
- --num_customers_;
- return -1;
- } else {
- int delta = 0;
- // sample customer to remove UNIFORMLY. that is, do NOT use the discount
- // here. if you do, it will introduce (unwanted) bias!
- double r = rng->next() * loc.total_dish_count_;
- --loc.total_dish_count_;
- for (typename std::list<unsigned>::iterator ti = loc.table_counts_.begin();
- ti != loc.table_counts_.end(); ++ti) {
- r -= *ti;
- if (r <= 0.0) {
- if ((--(*ti)) == 0) {
- --num_tables_;
- delta = -1;
- loc.table_counts_.erase(ti);
- }
- break;
- }
- }
- if (r > 0.0) {
- std::cerr << "Serious error: r=" << r << std::endl;
- Print(&std::cerr);
- assert(r <= 0.0);
- }
- --num_customers_;
- return delta;
- }
- }
-
- double prob(const Dish& dish, const double& p0) const {
- const typename std::tr1::unordered_map<Dish, DishLocations, DishHash>::const_iterator it = dish_locs_.find(dish);
- const double r = num_tables_ * discount_ + concentration_;
- if (it == dish_locs_.end()) {
- return r * p0 / (num_customers_ + concentration_);
- } else {
- return (it->second.total_dish_count_ - discount_ * it->second.table_counts_.size() + r * p0) /
- (num_customers_ + concentration_);
- }
- }
-
- double log_crp_prob() const {
- return log_crp_prob(discount_, concentration_);
- }
-
- static double log_beta_density(const double& x, const double& alpha, const double& beta) {
- assert(x > 0.0);
- assert(x < 1.0);
- assert(alpha > 0.0);
- assert(beta > 0.0);
- const double lp = (alpha-1)*log(x)+(beta-1)*log(1-x)+lgamma(alpha+beta)-lgamma(alpha)-lgamma(beta);
- return lp;
- }
-
- static double log_gamma_density(const double& x, const double& shape, const double& rate) {
- assert(x >= 0.0);
- assert(shape > 0.0);
- assert(rate > 0.0);
- const double lp = (shape-1)*log(x) - shape*log(rate) - x/rate - lgamma(shape);
- return lp;
- }
-
- // taken from http://en.wikipedia.org/wiki/Chinese_restaurant_process
- // does not include P_0's
- double log_crp_prob(const double& discount, const double& concentration) const {
- double lp = 0.0;
- if (has_discount_prior())
- lp = log_beta_density(discount, discount_prior_alpha_, discount_prior_beta_);
- if (has_concentration_prior())
- lp += log_gamma_density(concentration, concentration_prior_shape_, concentration_prior_rate_);
- assert(lp <= 0.0);
- if (num_customers_) {
- if (discount > 0.0) {
- const double r = lgamma(1.0 - discount);
- lp += lgamma(concentration) - lgamma(concentration + num_customers_)
- + num_tables_ * log(discount) + lgamma(concentration / discount + num_tables_)
- - lgamma(concentration / discount);
- assert(std::isfinite(lp));
- for (typename std::tr1::unordered_map<Dish, DishLocations, DishHash>::const_iterator it = dish_locs_.begin();
- it != dish_locs_.end(); ++it) {
- const DishLocations& cur = it->second;
- for (std::list<unsigned>::const_iterator ti = cur.table_counts_.begin(); ti != cur.table_counts_.end(); ++ti) {
- lp += lgamma(*ti - discount) - r;
- }
- }
- } else {
- assert(!"not implemented yet");
- }
- }
- assert(std::isfinite(lp));
- return lp;
- }
-
- void resample_hyperparameters(MT19937* rng) {
- assert(has_discount_prior() || has_concentration_prior());
- DiscountResampler dr(*this);
- ConcentrationResampler cr(*this);
- const int niterations = 10;
- double gamma_upper = std::numeric_limits<double>::infinity();
- for (int iter = 0; iter < 5; ++iter) {
- if (has_concentration_prior()) {
- concentration_ = slice_sampler1d(cr, concentration_, *rng, 0.0,
- gamma_upper, 0.0, niterations, 100*niterations);
- }
- if (has_discount_prior()) {
- discount_ = slice_sampler1d(dr, discount_, *rng, std::numeric_limits<double>::min(),
- 1.0, 0.0, niterations, 100*niterations);
- }
- }
- concentration_ = slice_sampler1d(cr, concentration_, *rng, 0.0,
- gamma_upper, 0.0, niterations, 100*niterations);
- }
-
- struct DiscountResampler {
- DiscountResampler(const CCRP& crp) : crp_(crp) {}
- const CCRP& crp_;
- double operator()(const double& proposed_discount) const {
- return crp_.log_crp_prob(proposed_discount, crp_.concentration_);
- }
- };
-
- struct ConcentrationResampler {
- ConcentrationResampler(const CCRP& crp) : crp_(crp) {}
- const CCRP& crp_;
- double operator()(const double& proposed_concentration) const {
- return crp_.log_crp_prob(crp_.discount_, proposed_concentration);
- }
- };
-
- struct DishLocations {
- DishLocations() : total_dish_count_() {}
- unsigned total_dish_count_; // customers at all tables with this dish
- std::list<unsigned> table_counts_; // list<> gives O(1) deletion and insertion, which we want
- // .size() is the number of tables for this dish
- };
-
- void Print(std::ostream* out) const {
- for (typename std::tr1::unordered_map<Dish, DishLocations, DishHash>::const_iterator it = dish_locs_.begin();
- it != dish_locs_.end(); ++it) {
- (*out) << it->first << " (" << it->second.total_dish_count_ << " on " << it->second.table_counts_.size() << " tables): ";
- for (typename std::list<unsigned>::const_iterator i = it->second.table_counts_.begin();
- i != it->second.table_counts_.end(); ++i) {
- (*out) << " " << *i;
- }
- (*out) << std::endl;
- }
- }
-
- typedef typename std::tr1::unordered_map<Dish, DishLocations, DishHash>::const_iterator const_iterator;
- const_iterator begin() const {
- return dish_locs_.begin();
- }
- const_iterator end() const {
- return dish_locs_.end();
- }
-
- unsigned num_tables_;
- unsigned num_customers_;
- std::tr1::unordered_map<Dish, DishLocations, DishHash> dish_locs_;
-
- double discount_;
- double concentration_;
-
- // optional beta prior on discount_ (NaN if no prior)
- double discount_prior_alpha_;
- double discount_prior_beta_;
-
- // optional gamma prior on concentration_ (NaN if no prior)
- double concentration_prior_shape_;
- double concentration_prior_rate_;
-};
-
-template <typename T,typename H>
-std::ostream& operator<<(std::ostream& o, const CCRP<T,H>& c) {
- c.Print(&o);
- return o;
-}
-
-#endif
diff --git a/gi/clda/src/clda.cc b/gi/clda/src/clda.cc
deleted file mode 100644
index f548997f..00000000
--- a/gi/clda/src/clda.cc
+++ /dev/null
@@ -1,148 +0,0 @@
-#include <iostream>
-#include <vector>
-#include <map>
-#include <string>
-
-#include "timer.h"
-#include "crp.h"
-#include "ccrp.h"
-#include "sampler.h"
-#include "tdict.h"
-const size_t MAX_DOC_LEN_CHARS = 10000000;
-
-using namespace std;
-
-void ShowTopWordsForTopic(const map<WordID, int>& counts) {
- multimap<int, WordID> ms;
- for (map<WordID,int>::const_iterator it = counts.begin(); it != counts.end(); ++it)
- ms.insert(make_pair(it->second, it->first));
- int cc = 0;
- for (multimap<int, WordID>::reverse_iterator it = ms.rbegin(); it != ms.rend(); ++it) {
- cerr << it->first << ':' << TD::Convert(it->second) << " ";
- ++cc;
- if (cc==20) break;
- }
- cerr << endl;
-}
-
-int main(int argc, char** argv) {
- if (argc != 3) {
- cerr << "Usage: " << argv[0] << " num-classes num-samples\n";
- return 1;
- }
- const int num_classes = atoi(argv[1]);
- const int num_iterations = atoi(argv[2]);
- const int burnin_size = num_iterations * 0.9;
- if (num_classes < 2) {
- cerr << "Must request more than 1 class\n";
- return 1;
- }
- if (num_iterations < 5) {
- cerr << "Must request more than 5 iterations\n";
- return 1;
- }
- cerr << "CLASSES: " << num_classes << endl;
- char* buf = new char[MAX_DOC_LEN_CHARS];
- vector<vector<int> > wji; // w[j][i] - observed word i of doc j
- vector<vector<int> > zji; // z[j][i] - topic assignment for word i of doc j
- cerr << "READING DOCUMENTS\n";
- while(cin) {
- cin.getline(buf, MAX_DOC_LEN_CHARS);
- if (buf[0] == 0) continue;
- wji.push_back(vector<WordID>());
- TD::ConvertSentence(buf, &wji.back());
- }
- cerr << "READ " << wji.size() << " DOCUMENTS\n";
- MT19937 rng;
- cerr << "INITIALIZING RANDOM TOPIC ASSIGNMENTS\n";
- zji.resize(wji.size());
- double disc = 0.1;
- double beta = 10.0;
- double alpha = 50.0;
- const double uniform_topic = 1.0 / num_classes;
- const double uniform_word = 1.0 / TD::NumWords();
- vector<CCRP<int> > dr(zji.size(), CCRP<int>(1,1,1,1,disc, beta)); // dr[i] describes the probability of using a topic in document i
- vector<CCRP<int> > wr(num_classes, CCRP<int>(1,1,1,1,disc, alpha)); // wr[k] describes the probability of generating a word in topic k
- for (int j = 0; j < zji.size(); ++j) {
- const size_t num_words = wji[j].size();
- vector<int>& zj = zji[j];
- const vector<int>& wj = wji[j];
- zj.resize(num_words);
- for (int i = 0; i < num_words; ++i) {
- int random_topic = rng.next() * num_classes;
- if (random_topic == num_classes) { --random_topic; }
- zj[i] = random_topic;
- const int word = wj[i];
- dr[j].increment(random_topic, uniform_topic, &rng);
- wr[random_topic].increment(word, uniform_word, &rng);
- }
- }
- cerr << "SAMPLING\n";
- vector<map<WordID, int> > t2w(num_classes);
- Timer timer;
- SampleSet<double> ss;
- ss.resize(num_classes);
- double total_time = 0;
- for (int iter = 0; iter < num_iterations; ++iter) {
- cerr << '.';
- if (iter && iter % 10 == 0) {
- total_time += timer.Elapsed();
- timer.Reset();
- double llh = 0;
-#if 1
- for (int j = 0; j < dr.size(); ++j)
- dr[j].resample_hyperparameters(&rng);
- for (int j = 0; j < wr.size(); ++j)
- wr[j].resample_hyperparameters(&rng);
-#endif
-
- for (int j = 0; j < dr.size(); ++j)
- llh += dr[j].log_crp_prob();
- for (int j = 0; j < wr.size(); ++j)
- llh += wr[j].log_crp_prob();
- cerr << " [LLH=" << llh << " I=" << iter << "]\n";
- }
- for (int j = 0; j < zji.size(); ++j) {
- const size_t num_words = wji[j].size();
- vector<int>& zj = zji[j];
- const vector<int>& wj = wji[j];
- for (int i = 0; i < num_words; ++i) {
- const int word = wj[i];
- const int cur_topic = zj[i];
- dr[j].decrement(cur_topic, &rng);
- wr[cur_topic].decrement(word, &rng);
-
- for (int k = 0; k < num_classes; ++k) {
- ss[k]= dr[j].prob(k, uniform_topic) * wr[k].prob(word, uniform_word);
- }
- const int new_topic = rng.SelectSample(ss);
- dr[j].increment(new_topic, uniform_topic, &rng);
- wr[new_topic].increment(word, uniform_word, &rng);
- zj[i] = new_topic;
- if (iter > burnin_size) {
- ++t2w[cur_topic][word];
- }
- }
- }
- }
- for (int i = 0; i < num_classes; ++i) {
- cerr << "---------------------------------\n";
- cerr << " final PYP(" << wr[i].discount() << "," << wr[i].concentration() << ")\n";
- ShowTopWordsForTopic(t2w[i]);
- }
- cerr << "-------------\n";
-#if 0
- for (int j = 0; j < zji.size(); ++j) {
- const size_t num_words = wji[j].size();
- vector<int>& zj = zji[j];
- const vector<int>& wj = wji[j];
- zj.resize(num_words);
- for (int i = 0; i < num_words; ++i) {
- cerr << TD::Convert(wji[j][i]) << '(' << zj[i] << ") ";
- }
- cerr << endl;
- }
-#endif
- return 0;
-}
-
diff --git a/gi/clda/src/crp.h b/gi/clda/src/crp.h
deleted file mode 100644
index 9d35857e..00000000
--- a/gi/clda/src/crp.h
+++ /dev/null
@@ -1,50 +0,0 @@
-#ifndef _CRP_H_
-#define _CRP_H_
-
-// shamelessly adapted from code by Phil Blunsom and Trevor Cohn
-
-#include <boost/functional/hash.hpp>
-#include <tr1/unordered_map>
-
-#include "prob.h"
-
-template <typename DishType, typename Hash = boost::hash<DishType> >
-class CRP {
- public:
- CRP(double alpha) : alpha_(alpha), palpha_(alpha), total_customers_() {}
- void increment(const DishType& dish);
- void decrement(const DishType& dish);
- void erase(const DishType& dish) {
- counts_.erase(dish);
- }
- inline int count(const DishType& dish) const {
- const typename MapType::const_iterator i = counts_.find(dish);
- if (i == counts_.end()) return 0; else return i->second;
- }
- inline prob_t prob(const DishType& dish, const prob_t& p0) const {
- return (prob_t(count(dish)) + palpha_ * p0) / prob_t(total_customers_ + alpha_);
- }
- private:
- typedef std::tr1::unordered_map<DishType, int, Hash> MapType;
- MapType counts_;
- const double alpha_;
- const prob_t palpha_;
- int total_customers_;
-};
-
-template <typename Dish, typename Hash>
-void CRP<Dish,Hash>::increment(const Dish& dish) {
- ++counts_[dish];
- ++total_customers_;
-}
-
-template <typename Dish, typename Hash>
-void CRP<Dish,Hash>::decrement(const Dish& dish) {
- typename MapType::iterator i = counts_.find(dish);
- assert(i != counts_.end());
- if (--i->second == 0)
- counts_.erase(i);
- --total_customers_;
-}
-
-#endif
diff --git a/gi/clda/src/slice_sampler.h b/gi/clda/src/slice_sampler.h
deleted file mode 100644
index aa48a169..00000000
--- a/gi/clda/src/slice_sampler.h
+++ /dev/null
@@ -1,191 +0,0 @@
-//! slice-sampler.h is an MCMC slice sampler
-//!
-//! Mark Johnson, 1st August 2008
-
-#ifndef SLICE_SAMPLER_H
-#define SLICE_SAMPLER_H
-
-#include <algorithm>
-#include <cassert>
-#include <cmath>
-#include <iostream>
-#include <limits>
-
-//! slice_sampler_rfc_type{} returns the value of a user-specified
-//! function if the argument is within range, or - infinity otherwise
-//
-template <typename F, typename Fn, typename U>
-struct slice_sampler_rfc_type {
- F min_x, max_x;
- const Fn& f;
- U max_nfeval, nfeval;
- slice_sampler_rfc_type(F min_x, F max_x, const Fn& f, U max_nfeval)
- : min_x(min_x), max_x(max_x), f(f), max_nfeval(max_nfeval), nfeval(0) { }
-
- F operator() (F x) {
- if (min_x < x && x < max_x) {
- assert(++nfeval <= max_nfeval);
- F fx = f(x);
- assert(std::isfinite(fx));
- return fx;
- }
- return -std::numeric_limits<F>::infinity();
- }
-}; // slice_sampler_rfc_type{}
-
-//! slice_sampler1d() implements the univariate "range doubling" slice sampler
-//! described in Neal (2003) "Slice Sampling", The Annals of Statistics 31(3), 705-767.
-//
-template <typename F, typename LogF, typename Uniform01>
-F slice_sampler1d(const LogF& logF0, //!< log of function to sample
- F x, //!< starting point
- Uniform01& u01, //!< uniform [0,1) random number generator
- F min_x = -std::numeric_limits<F>::infinity(), //!< minimum value of support
- F max_x = std::numeric_limits<F>::infinity(), //!< maximum value of support
- F w = 0.0, //!< guess at initial width
- unsigned nsamples=1, //!< number of samples to draw
- unsigned max_nfeval=200) //!< max number of function evaluations
-{
- typedef unsigned U;
- slice_sampler_rfc_type<F,LogF,U> logF(min_x, max_x, logF0, max_nfeval);
-
- assert(std::isfinite(x));
-
- if (w <= 0.0) { // set w to a default width
- if (min_x > -std::numeric_limits<F>::infinity() && max_x < std::numeric_limits<F>::infinity())
- w = (max_x - min_x)/4;
- else
- w = std::max(((x < 0.0) ? -x : x)/4, (F) 0.1);
- }
- assert(std::isfinite(w));
-
- F logFx = logF(x);
- for (U sample = 0; sample < nsamples; ++sample) {
- F logY = logFx + log(u01()+1e-100); //! slice logFx at this value
- assert(std::isfinite(logY));
-
- F xl = x - w*u01(); //! lower bound on slice interval
- F logFxl = logF(xl);
- F xr = xl + w; //! upper bound on slice interval
- F logFxr = logF(xr);
-
- while (logY < logFxl || logY < logFxr) // doubling procedure
- if (u01() < 0.5)
- logFxl = logF(xl -= xr - xl);
- else
- logFxr = logF(xr += xr - xl);
-
- F xl1 = xl;
- F xr1 = xr;
- while (true) { // shrinking procedure
- F x1 = xl1 + u01()*(xr1 - xl1);
- if (logY < logF(x1)) {
- F xl2 = xl; // acceptance procedure
- F xr2 = xr;
- bool d = false;
- while (xr2 - xl2 > 1.1*w) {
- F xm = (xl2 + xr2)/2;
- if ((x < xm && x1 >= xm) || (x >= xm && x1 < xm))
- d = true;
- if (x1 < xm)
- xr2 = xm;
- else
- xl2 = xm;
- if (d && logY >= logF(xl2) && logY >= logF(xr2))
- goto unacceptable;
- }
- x = x1;
- goto acceptable;
- }
- goto acceptable;
- unacceptable:
- if (x1 < x) // rest of shrinking procedure
- xl1 = x1;
- else
- xr1 = x1;
- }
- acceptable:
- w = (4*w + (xr1 - xl1))/5; // update width estimate
- }
- return x;
-}
-
-/*
-//! slice_sampler1d() implements a 1-d MCMC slice sampler.
-//! It should be correct for unimodal distributions, but
-//! not for multimodal ones.
-//
-template <typename F, typename LogP, typename Uniform01>
-F slice_sampler1d(const LogP& logP, //!< log of distribution to sample
- F x, //!< initial sample
- Uniform01& u01, //!< uniform random number generator
- F min_x = -std::numeric_limits<F>::infinity(), //!< minimum value of support
- F max_x = std::numeric_limits<F>::infinity(), //!< maximum value of support
- F w = 0.0, //!< guess at initial width
- unsigned nsamples=1, //!< number of samples to draw
- unsigned max_nfeval=200) //!< max number of function evaluations
-{
- typedef unsigned U;
- assert(std::isfinite(x));
- if (w <= 0.0) {
- if (min_x > -std::numeric_limits<F>::infinity() && max_x < std::numeric_limits<F>::infinity())
- w = (max_x - min_x)/4;
- else
- w = std::max(((x < 0.0) ? -x : x)/4, 0.1);
- }
- // TRACE4(x, min_x, max_x, w);
- F logPx = logP(x);
- assert(std::isfinite(logPx));
- U nfeval = 1;
- for (U sample = 0; sample < nsamples; ++sample) {
- F x0 = x;
- F logU = logPx + log(u01()+1e-100);
- assert(std::isfinite(logU));
- F r = u01();
- F xl = std::max(min_x, x - r*w);
- F xr = std::min(max_x, x + (1-r)*w);
- // TRACE3(x, logPx, logU);
- while (xl > min_x && logP(xl) > logU) {
- xl -= w;
- w *= 2;
- ++nfeval;
- if (nfeval >= max_nfeval)
- std::cerr << "## Error: nfeval = " << nfeval << ", max_nfeval = " << max_nfeval << ", sample = " << sample << ", nsamples = " << nsamples << ", r = " << r << ", w = " << w << ", xl = " << xl << std::endl;
- assert(nfeval < max_nfeval);
- }
- xl = std::max(xl, min_x);
- while (xr < max_x && logP(xr) > logU) {
- xr += w;
- w *= 2;
- ++nfeval;
- if (nfeval >= max_nfeval)
- std::cerr << "## Error: nfeval = " << nfeval << ", max_nfeval = " << max_nfeval << ", sample = " << sample << ", nsamples = " << nsamples << ", r = " << r << ", w = " << w << ", xr = " << xr << std::endl;
- assert(nfeval < max_nfeval);
- }
- xr = std::min(xr, max_x);
- while (true) {
- r = u01();
- x = r*xl + (1-r)*xr;
- assert(std::isfinite(x));
- logPx = logP(x);
- // TRACE4(logPx, x, xl, xr);
- assert(std::isfinite(logPx));
- ++nfeval;
- if (nfeval >= max_nfeval)
- std::cerr << "## Error: nfeval = " << nfeval << ", max_nfeval = " << max_nfeval << ", sample = " << sample << ", nsamples = " << nsamples << ", r = " << r << ", w = " << w << ", xl = " << xl << ", xr = " << xr << ", x = " << x << std::endl;
- assert(nfeval < max_nfeval);
- if (logPx > logU)
- break;
- else if (x > x0)
- xr = x;
- else
- xl = x;
- }
- // w = (4*w + (xr-xl))/5; // gradually adjust w
- }
- // TRACE2(logPx, x);
- return x;
-} // slice_sampler1d()
-*/
-
-#endif // SLICE_SAMPLER_H
diff --git a/gi/clda/src/timer.h b/gi/clda/src/timer.h
deleted file mode 100644
index 123d9a94..00000000
--- a/gi/clda/src/timer.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef _TIMER_STATS_H_
-#define _TIMER_STATS_H_
-
-#include <ctime>
-
-struct Timer {
- Timer() { Reset(); }
- void Reset() {
- start_t = clock();
- }
- double Elapsed() const {
- const clock_t end_t = clock();
- const double elapsed = (end_t - start_t) / 1000000.0;
- return elapsed;
- }
- private:
- std::clock_t start_t;
-};
-
-#endif