diff options
Diffstat (limited to 'gi')
234 files changed, 0 insertions, 36886 deletions
diff --git a/gi/clda/src/Makefile.am b/gi/clda/src/Makefile.am deleted file mode 100644 index cdca1f97..00000000 --- a/gi/clda/src/Makefile.am +++ /dev/null @@ -1,6 +0,0 @@ -bin_PROGRAMS = clda - -clda_SOURCES = clda.cc - -AM_CPPFLAGS = -W -Wall -Wno-sign-compare -funroll-loops -I$(top_srcdir)/utils $(GTEST_CPPFLAGS) -AM_LDFLAGS = $(top_srcdir)/utils/libutils.a -lz diff --git a/gi/clda/src/ccrp.h b/gi/clda/src/ccrp.h deleted file mode 100644 index a7c2825c..00000000 --- a/gi/clda/src/ccrp.h +++ /dev/null @@ -1,291 +0,0 @@ -#ifndef _CCRP_H_ -#define _CCRP_H_ - -#include <numeric> -#include <cassert> -#include <cmath> -#include <list> -#include <iostream> -#include <vector> -#include <tr1/unordered_map> -#include <boost/functional/hash.hpp> -#include "sampler.h" -#include "slice_sampler.h" - -// Chinese restaurant process (Pitman-Yor parameters) with table tracking. - -template <typename Dish, typename DishHash = boost::hash<Dish> > -class CCRP { - public: -  CCRP(double disc, double conc) : -    num_tables_(), -    num_customers_(), -    discount_(disc), -    concentration_(conc), -    discount_prior_alpha_(std::numeric_limits<double>::quiet_NaN()), -    discount_prior_beta_(std::numeric_limits<double>::quiet_NaN()), -    concentration_prior_shape_(std::numeric_limits<double>::quiet_NaN()), -    concentration_prior_rate_(std::numeric_limits<double>::quiet_NaN()) {} - -  CCRP(double d_alpha, double d_beta, double c_shape, double c_rate, double d = 0.1, double c = 10.0) : -    num_tables_(), -    num_customers_(), -    discount_(d), -    concentration_(c), -    discount_prior_alpha_(d_alpha), -    discount_prior_beta_(d_beta), -    concentration_prior_shape_(c_shape), -    concentration_prior_rate_(c_rate) {} - -  double discount() const { return discount_; } -  double concentration() const { return concentration_; } - -  bool has_discount_prior() const { -    return !std::isnan(discount_prior_alpha_); -  } - -  bool has_concentration_prior() const { -    return !std::isnan(concentration_prior_shape_); -  } - -  void clear() { -    num_tables_ = 0; -    num_customers_ = 0; -    dish_locs_.clear(); -  } - -  unsigned num_tables(const Dish& dish) const { -    const typename std::tr1::unordered_map<Dish, DishLocations, DishHash>::const_iterator it = dish_locs_.find(dish); -    if (it == dish_locs_.end()) return 0; -    return it->second.table_counts_.size(); -  } - -  unsigned num_customers() const { -    return num_customers_; -  } - -  unsigned num_customers(const Dish& dish) const { -    const typename std::tr1::unordered_map<Dish, DishLocations, DishHash>::const_iterator it = dish_locs_.find(dish); -    if (it == dish_locs_.end()) return 0; -    return it->total_dish_count_; -  } - -  // returns +1 or 0 indicating whether a new table was opened -  int increment(const Dish& dish, const double& p0, MT19937* rng) { -    DishLocations& loc = dish_locs_[dish]; -    bool share_table = false; -    if (loc.total_dish_count_) { -      const double p_empty = (concentration_ + num_tables_ * discount_) * p0; -      const double p_share = (loc.total_dish_count_ - loc.table_counts_.size() * discount_); -      share_table = rng->SelectSample(p_empty, p_share); -    } -    if (share_table) { -      double r = rng->next() * (loc.total_dish_count_ - loc.table_counts_.size() * discount_); -      for (typename std::list<unsigned>::iterator ti = loc.table_counts_.begin(); -           ti != loc.table_counts_.end(); ++ti) { -        r -= (*ti - discount_); -        if (r <= 0.0) { -          ++(*ti); -          break; -        } -      } -      if (r > 0.0) { -        std::cerr << "Serious error: r=" << r << std::endl; -        Print(&std::cerr); -        assert(r <= 0.0); -      } -    } else { -      loc.table_counts_.push_back(1u); -      ++num_tables_; -    } -    ++loc.total_dish_count_; -    ++num_customers_; -    return (share_table ? 0 : 1); -  } - -  // returns -1 or 0, indicating whether a table was closed -  int decrement(const Dish& dish, MT19937* rng) { -    DishLocations& loc = dish_locs_[dish]; -    assert(loc.total_dish_count_); -    if (loc.total_dish_count_ == 1) { -      dish_locs_.erase(dish); -      --num_tables_; -      --num_customers_; -      return -1; -    } else { -      int delta = 0; -      // sample customer to remove UNIFORMLY. that is, do NOT use the discount -      // here. if you do, it will introduce (unwanted) bias! -      double r = rng->next() * loc.total_dish_count_; -      --loc.total_dish_count_; -      for (typename std::list<unsigned>::iterator ti = loc.table_counts_.begin(); -           ti != loc.table_counts_.end(); ++ti) { -        r -= *ti; -        if (r <= 0.0) { -          if ((--(*ti)) == 0) { -            --num_tables_; -            delta = -1; -            loc.table_counts_.erase(ti); -          } -          break; -        } -      } -      if (r > 0.0) { -        std::cerr << "Serious error: r=" << r << std::endl; -        Print(&std::cerr); -        assert(r <= 0.0); -      } -      --num_customers_; -      return delta; -    } -  } - -  double prob(const Dish& dish, const double& p0) const { -    const typename std::tr1::unordered_map<Dish, DishLocations, DishHash>::const_iterator it = dish_locs_.find(dish); -    const double r = num_tables_ * discount_ + concentration_; -    if (it == dish_locs_.end()) { -      return r * p0 / (num_customers_ + concentration_); -    } else { -      return (it->second.total_dish_count_ - discount_ * it->second.table_counts_.size() + r * p0) / -               (num_customers_ + concentration_); -    } -  } - -  double log_crp_prob() const { -    return log_crp_prob(discount_, concentration_); -  } - -  static double log_beta_density(const double& x, const double& alpha, const double& beta) { -    assert(x > 0.0); -    assert(x < 1.0); -    assert(alpha > 0.0); -    assert(beta > 0.0); -    const double lp = (alpha-1)*log(x)+(beta-1)*log(1-x)+lgamma(alpha+beta)-lgamma(alpha)-lgamma(beta); -    return lp; -  } - -  static double log_gamma_density(const double& x, const double& shape, const double& rate) { -    assert(x >= 0.0); -    assert(shape > 0.0); -    assert(rate > 0.0); -    const double lp = (shape-1)*log(x) - shape*log(rate) - x/rate - lgamma(shape); -    return lp; -  } - -  // taken from http://en.wikipedia.org/wiki/Chinese_restaurant_process -  // does not include P_0's -  double log_crp_prob(const double& discount, const double& concentration) const { -    double lp = 0.0; -    if (has_discount_prior()) -      lp = log_beta_density(discount, discount_prior_alpha_, discount_prior_beta_); -    if (has_concentration_prior()) -      lp += log_gamma_density(concentration, concentration_prior_shape_, concentration_prior_rate_); -    assert(lp <= 0.0); -    if (num_customers_) { -      if (discount > 0.0) { -        const double r = lgamma(1.0 - discount); -        lp += lgamma(concentration) - lgamma(concentration + num_customers_) -             + num_tables_ * log(discount) + lgamma(concentration / discount + num_tables_) -             - lgamma(concentration / discount); -        assert(std::isfinite(lp)); -        for (typename std::tr1::unordered_map<Dish, DishLocations, DishHash>::const_iterator it = dish_locs_.begin(); -             it != dish_locs_.end(); ++it) { -          const DishLocations& cur = it->second; -          for (std::list<unsigned>::const_iterator ti = cur.table_counts_.begin(); ti != cur.table_counts_.end(); ++ti) { -            lp += lgamma(*ti - discount) - r; -          } -        } -      } else { -        assert(!"not implemented yet"); -      } -    } -    assert(std::isfinite(lp)); -    return lp; -  } - -  void resample_hyperparameters(MT19937* rng) { -    assert(has_discount_prior() || has_concentration_prior()); -    DiscountResampler dr(*this); -    ConcentrationResampler cr(*this); -    const int niterations = 10; -    double gamma_upper = std::numeric_limits<double>::infinity(); -    for (int iter = 0; iter < 5; ++iter) { -      if (has_concentration_prior()) { -        concentration_ = slice_sampler1d(cr, concentration_, *rng, 0.0, -                               gamma_upper, 0.0, niterations, 100*niterations); -      } -      if (has_discount_prior()) { -        discount_ = slice_sampler1d(dr, discount_, *rng, std::numeric_limits<double>::min(), -                               1.0, 0.0, niterations, 100*niterations); -      } -    } -    concentration_ = slice_sampler1d(cr, concentration_, *rng, 0.0, -                             gamma_upper, 0.0, niterations, 100*niterations); -  } - -  struct DiscountResampler { -    DiscountResampler(const CCRP& crp) : crp_(crp) {} -    const CCRP& crp_; -    double operator()(const double& proposed_discount) const { -      return crp_.log_crp_prob(proposed_discount, crp_.concentration_); -    } -  }; - -  struct ConcentrationResampler { -    ConcentrationResampler(const CCRP& crp) : crp_(crp) {} -    const CCRP& crp_; -    double operator()(const double& proposed_concentration) const { -      return crp_.log_crp_prob(crp_.discount_, proposed_concentration); -    } -  }; - -  struct DishLocations { -    DishLocations() : total_dish_count_() {} -    unsigned total_dish_count_;        // customers at all tables with this dish -    std::list<unsigned> table_counts_; // list<> gives O(1) deletion and insertion, which we want -                                       // .size() is the number of tables for this dish -  }; - -  void Print(std::ostream* out) const { -    for (typename std::tr1::unordered_map<Dish, DishLocations, DishHash>::const_iterator it = dish_locs_.begin(); -         it != dish_locs_.end(); ++it) { -      (*out) << it->first << " (" << it->second.total_dish_count_ << " on " << it->second.table_counts_.size() << " tables): "; -      for (typename std::list<unsigned>::const_iterator i = it->second.table_counts_.begin(); -           i != it->second.table_counts_.end(); ++i) { -        (*out) << " " << *i; -      } -      (*out) << std::endl; -    } -  } - -  typedef typename std::tr1::unordered_map<Dish, DishLocations, DishHash>::const_iterator const_iterator; -  const_iterator begin() const { -    return dish_locs_.begin(); -  } -  const_iterator end() const { -    return dish_locs_.end(); -  } - -  unsigned num_tables_; -  unsigned num_customers_; -  std::tr1::unordered_map<Dish, DishLocations, DishHash> dish_locs_; - -  double discount_; -  double concentration_; - -  // optional beta prior on discount_ (NaN if no prior) -  double discount_prior_alpha_; -  double discount_prior_beta_; - -  // optional gamma prior on concentration_ (NaN if no prior) -  double concentration_prior_shape_; -  double concentration_prior_rate_; -}; - -template <typename T,typename H> -std::ostream& operator<<(std::ostream& o, const CCRP<T,H>& c) { -  c.Print(&o); -  return o; -} - -#endif diff --git a/gi/clda/src/clda.cc b/gi/clda/src/clda.cc deleted file mode 100644 index f548997f..00000000 --- a/gi/clda/src/clda.cc +++ /dev/null @@ -1,148 +0,0 @@ -#include <iostream> -#include <vector> -#include <map> -#include <string> - -#include "timer.h" -#include "crp.h" -#include "ccrp.h" -#include "sampler.h" -#include "tdict.h" -const size_t MAX_DOC_LEN_CHARS = 10000000; - -using namespace std; - -void ShowTopWordsForTopic(const map<WordID, int>& counts) { -  multimap<int, WordID> ms; -  for (map<WordID,int>::const_iterator it = counts.begin(); it != counts.end(); ++it) -    ms.insert(make_pair(it->second, it->first)); -  int cc = 0; -  for (multimap<int, WordID>::reverse_iterator it = ms.rbegin(); it != ms.rend(); ++it) { -    cerr << it->first << ':' << TD::Convert(it->second) << " "; -    ++cc; -    if (cc==20) break; -  } -  cerr << endl; -} - -int main(int argc, char** argv) { -  if (argc != 3) { -    cerr << "Usage: " << argv[0] << " num-classes num-samples\n"; -    return 1; -  } -  const int num_classes = atoi(argv[1]); -  const int num_iterations = atoi(argv[2]); -  const int burnin_size = num_iterations * 0.9; -  if (num_classes < 2) { -    cerr << "Must request more than 1 class\n"; -    return 1; -  } -  if (num_iterations < 5) { -    cerr << "Must request more than 5 iterations\n"; -    return 1; -  } -  cerr << "CLASSES: " << num_classes << endl; -  char* buf = new char[MAX_DOC_LEN_CHARS]; -  vector<vector<int> > wji;   // w[j][i] - observed word i of doc j -  vector<vector<int> > zji;   // z[j][i] - topic assignment for word i of doc j -  cerr << "READING DOCUMENTS\n"; -  while(cin) { -    cin.getline(buf, MAX_DOC_LEN_CHARS); -    if (buf[0] == 0) continue; -    wji.push_back(vector<WordID>()); -    TD::ConvertSentence(buf, &wji.back()); -  } -  cerr << "READ " << wji.size() << " DOCUMENTS\n"; -  MT19937 rng; -  cerr << "INITIALIZING RANDOM TOPIC ASSIGNMENTS\n"; -  zji.resize(wji.size()); -  double disc = 0.1; -  double beta = 10.0; -  double alpha = 50.0; -  const double uniform_topic = 1.0 / num_classes; -  const double uniform_word = 1.0 / TD::NumWords(); -  vector<CCRP<int> > dr(zji.size(), CCRP<int>(1,1,1,1,disc, beta)); // dr[i] describes the probability of using a topic in document i -  vector<CCRP<int> > wr(num_classes, CCRP<int>(1,1,1,1,disc, alpha)); // wr[k] describes the probability of generating a word in topic k -  for (int j = 0; j < zji.size(); ++j) { -    const size_t num_words = wji[j].size(); -    vector<int>& zj = zji[j]; -    const vector<int>& wj = wji[j]; -    zj.resize(num_words); -    for (int i = 0; i < num_words; ++i) { -      int random_topic = rng.next() * num_classes; -      if (random_topic == num_classes) { --random_topic; } -      zj[i] = random_topic; -      const int word = wj[i]; -      dr[j].increment(random_topic, uniform_topic, &rng); -      wr[random_topic].increment(word, uniform_word, &rng); -    } -  } -  cerr << "SAMPLING\n"; -  vector<map<WordID, int> > t2w(num_classes); -  Timer timer; -  SampleSet<double> ss; -  ss.resize(num_classes); -  double total_time = 0; -  for (int iter = 0; iter < num_iterations; ++iter) { -    cerr << '.'; -    if (iter && iter % 10 == 0) { -      total_time += timer.Elapsed(); -      timer.Reset(); -      double llh = 0; -#if 1 -      for (int j = 0; j < dr.size(); ++j) -        dr[j].resample_hyperparameters(&rng); -      for (int j = 0; j < wr.size(); ++j) -        wr[j].resample_hyperparameters(&rng); -#endif - -      for (int j = 0; j < dr.size(); ++j) -        llh += dr[j].log_crp_prob(); -      for (int j = 0; j < wr.size(); ++j) -        llh += wr[j].log_crp_prob(); -      cerr << " [LLH=" << llh << " I=" << iter << "]\n"; -    } -    for (int j = 0; j < zji.size(); ++j) { -      const size_t num_words = wji[j].size(); -      vector<int>& zj = zji[j]; -      const vector<int>& wj = wji[j]; -      for (int i = 0; i < num_words; ++i) { -        const int word = wj[i]; -        const int cur_topic = zj[i]; -        dr[j].decrement(cur_topic, &rng); -        wr[cur_topic].decrement(word, &rng); -  -        for (int k = 0; k < num_classes; ++k) { -          ss[k]= dr[j].prob(k, uniform_topic) * wr[k].prob(word, uniform_word); -        } -        const int new_topic = rng.SelectSample(ss); -        dr[j].increment(new_topic, uniform_topic, &rng); -        wr[new_topic].increment(word, uniform_word, &rng); -        zj[i] = new_topic; -        if (iter > burnin_size) { -          ++t2w[cur_topic][word]; -        } -      } -    } -  } -  for (int i = 0; i < num_classes; ++i) { -    cerr << "---------------------------------\n"; -    cerr << " final PYP(" << wr[i].discount() << "," << wr[i].concentration() << ")\n"; -    ShowTopWordsForTopic(t2w[i]); -  } -  cerr << "-------------\n"; -#if 0 -  for (int j = 0; j < zji.size(); ++j) { -    const size_t num_words = wji[j].size(); -    vector<int>& zj = zji[j]; -    const vector<int>& wj = wji[j]; -    zj.resize(num_words); -    for (int i = 0; i < num_words; ++i) { -      cerr << TD::Convert(wji[j][i]) << '(' << zj[i] << ") "; -    } -    cerr << endl; -  } -#endif -  return 0; -} - diff --git a/gi/clda/src/crp.h b/gi/clda/src/crp.h deleted file mode 100644 index 9d35857e..00000000 --- a/gi/clda/src/crp.h +++ /dev/null @@ -1,50 +0,0 @@ -#ifndef _CRP_H_ -#define _CRP_H_ - -// shamelessly adapted from code by Phil Blunsom and Trevor Cohn - -#include <boost/functional/hash.hpp> -#include <tr1/unordered_map> - -#include "prob.h" - -template <typename DishType, typename Hash = boost::hash<DishType> > -class CRP { - public: -  CRP(double alpha) : alpha_(alpha), palpha_(alpha), total_customers_() {} -  void increment(const DishType& dish); -  void decrement(const DishType& dish); -  void erase(const DishType& dish) { -    counts_.erase(dish); -  } -  inline int count(const DishType& dish) const { -    const typename MapType::const_iterator i = counts_.find(dish); -    if (i == counts_.end()) return 0; else return i->second; -  } -  inline prob_t prob(const DishType& dish, const prob_t& p0) const { -    return (prob_t(count(dish)) + palpha_ * p0) / prob_t(total_customers_ + alpha_); -  } - private: -  typedef std::tr1::unordered_map<DishType, int, Hash> MapType; -  MapType counts_; -  const double alpha_; -  const prob_t palpha_; -  int total_customers_; -}; - -template <typename Dish, typename Hash> -void CRP<Dish,Hash>::increment(const Dish& dish) { -  ++counts_[dish]; -  ++total_customers_; -} - -template <typename Dish, typename Hash> -void CRP<Dish,Hash>::decrement(const Dish& dish) { -  typename MapType::iterator i = counts_.find(dish); -  assert(i != counts_.end()); -  if (--i->second == 0) -    counts_.erase(i); -  --total_customers_; -} - -#endif diff --git a/gi/clda/src/slice_sampler.h b/gi/clda/src/slice_sampler.h deleted file mode 100644 index aa48a169..00000000 --- a/gi/clda/src/slice_sampler.h +++ /dev/null @@ -1,191 +0,0 @@ -//! slice-sampler.h is an MCMC slice sampler -//! -//! Mark Johnson, 1st August 2008 - -#ifndef SLICE_SAMPLER_H -#define SLICE_SAMPLER_H - -#include <algorithm> -#include <cassert> -#include <cmath> -#include <iostream> -#include <limits> - -//! slice_sampler_rfc_type{} returns the value of a user-specified -//! function if the argument is within range, or - infinity otherwise -// -template <typename F, typename Fn, typename U> -struct slice_sampler_rfc_type { -  F min_x, max_x; -  const Fn& f; -  U max_nfeval, nfeval; -  slice_sampler_rfc_type(F min_x, F max_x, const Fn& f, U max_nfeval)  -    : min_x(min_x), max_x(max_x), f(f), max_nfeval(max_nfeval), nfeval(0) { } -     -  F operator() (F x) { -    if (min_x < x && x < max_x) { -      assert(++nfeval <= max_nfeval); -      F fx = f(x); -      assert(std::isfinite(fx)); -      return fx; -    } -      return -std::numeric_limits<F>::infinity(); -  } -};  // slice_sampler_rfc_type{} - -//! slice_sampler1d() implements the univariate "range doubling" slice sampler -//! described in Neal (2003) "Slice Sampling", The Annals of Statistics 31(3), 705-767. -// -template <typename F, typename LogF, typename Uniform01> -F slice_sampler1d(const LogF& logF0,               //!< log of function to sample -		  F x,                             //!< starting point -		  Uniform01& u01,                  //!< uniform [0,1) random number generator -		  F min_x = -std::numeric_limits<F>::infinity(),  //!< minimum value of support -		  F max_x = std::numeric_limits<F>::infinity(),   //!< maximum value of support -		  F w = 0.0,                       //!< guess at initial width -		  unsigned nsamples=1,             //!< number of samples to draw -		  unsigned max_nfeval=200)         //!< max number of function evaluations -{ -  typedef unsigned U; -  slice_sampler_rfc_type<F,LogF,U> logF(min_x, max_x, logF0, max_nfeval); - -  assert(std::isfinite(x)); - -  if (w <= 0.0) {                           // set w to a default width  -    if (min_x > -std::numeric_limits<F>::infinity() && max_x < std::numeric_limits<F>::infinity()) -      w = (max_x - min_x)/4; -    else -      w = std::max(((x < 0.0) ? -x : x)/4, (F) 0.1); -  } -  assert(std::isfinite(w)); - -  F logFx = logF(x); -  for (U sample = 0; sample < nsamples; ++sample) { -    F logY = logFx + log(u01()+1e-100);     //! slice logFx at this value -    assert(std::isfinite(logY)); - -    F xl = x - w*u01();                     //! lower bound on slice interval -    F logFxl = logF(xl); -    F xr = xl + w;                          //! upper bound on slice interval -    F logFxr = logF(xr); - -    while (logY < logFxl || logY < logFxr)  // doubling procedure -      if (u01() < 0.5)  -	logFxl = logF(xl -= xr - xl); -      else -	logFxr = logF(xr += xr - xl); -	 -    F xl1 = xl; -    F xr1 = xr; -    while (true) {                          // shrinking procedure -      F x1 = xl1 + u01()*(xr1 - xl1); -      if (logY < logF(x1)) { -	F xl2 = xl;                         // acceptance procedure -	F xr2 = xr;  -	bool d = false; -	while (xr2 - xl2 > 1.1*w) { -	  F xm = (xl2 + xr2)/2; -	  if ((x < xm && x1 >= xm) || (x >= xm && x1 < xm)) -	    d = true; -	  if (x1 < xm) -	    xr2 = xm; -	  else -	    xl2 = xm; -	  if (d && logY >= logF(xl2) && logY >= logF(xr2)) -	    goto unacceptable; -	} -	x = x1; -	goto acceptable; -      } -      goto acceptable; -    unacceptable: -      if (x1 < x)                           // rest of shrinking procedure -	xl1 = x1; -      else  -	xr1 = x1; -    } -  acceptable: -    w = (4*w + (xr1 - xl1))/5;              // update width estimate -  } -  return x; -} - -/* -//! slice_sampler1d() implements a 1-d MCMC slice sampler. -//! It should be correct for unimodal distributions, but -//! not for multimodal ones. -// -template <typename F, typename LogP, typename Uniform01> -F slice_sampler1d(const LogP& logP,     //!< log of distribution to sample -		  F x,                  //!< initial sample -		  Uniform01& u01,       //!< uniform random number generator -		  F min_x = -std::numeric_limits<F>::infinity(),  //!< minimum value of support -		  F max_x = std::numeric_limits<F>::infinity(),   //!< maximum value of support -		  F w = 0.0,            //!< guess at initial width -		  unsigned nsamples=1,  //!< number of samples to draw -		  unsigned max_nfeval=200)  //!< max number of function evaluations -{ -  typedef unsigned U; -  assert(std::isfinite(x)); -  if (w <= 0.0) { -    if (min_x > -std::numeric_limits<F>::infinity() && max_x < std::numeric_limits<F>::infinity()) -      w = (max_x - min_x)/4; -    else -      w = std::max(((x < 0.0) ? -x : x)/4, 0.1); -  } -  // TRACE4(x, min_x, max_x, w); -  F logPx = logP(x); -  assert(std::isfinite(logPx)); -  U nfeval = 1; -  for (U sample = 0; sample < nsamples; ++sample) { -    F x0 = x; -    F logU = logPx + log(u01()+1e-100); -    assert(std::isfinite(logU)); -    F r = u01(); -    F xl = std::max(min_x, x - r*w); -    F xr = std::min(max_x, x + (1-r)*w); -    // TRACE3(x, logPx, logU); -    while (xl > min_x && logP(xl) > logU) { -      xl -= w; -      w *= 2; -      ++nfeval; -      if (nfeval >= max_nfeval) -	std::cerr << "## Error: nfeval = " << nfeval << ", max_nfeval = " << max_nfeval << ", sample = " << sample << ", nsamples = " << nsamples << ", r = " << r << ", w = " << w << ", xl = " << xl << std::endl; -      assert(nfeval < max_nfeval); -    } -    xl = std::max(xl, min_x); -    while (xr < max_x && logP(xr) > logU) { -      xr += w; -      w *= 2; -      ++nfeval; -      if (nfeval >= max_nfeval) -	std::cerr << "## Error: nfeval = " << nfeval << ", max_nfeval = " << max_nfeval << ", sample = " << sample << ", nsamples = " << nsamples << ", r = " << r << ", w = " << w << ", xr = " << xr << std::endl; -      assert(nfeval < max_nfeval); -    } -    xr = std::min(xr, max_x); -    while (true) { -      r = u01(); -      x = r*xl + (1-r)*xr; -      assert(std::isfinite(x)); -      logPx = logP(x); -      // TRACE4(logPx, x, xl, xr); -      assert(std::isfinite(logPx)); -      ++nfeval; -      if (nfeval >= max_nfeval) -	std::cerr << "## Error: nfeval = " << nfeval << ", max_nfeval = " << max_nfeval << ", sample = " << sample << ", nsamples = " << nsamples << ", r = " << r << ", w = " << w << ", xl = " << xl << ", xr = " << xr << ", x = " << x << std::endl; -      assert(nfeval < max_nfeval); -      if (logPx > logU) -        break; -      else if (x > x0) -          xr = x; -        else -          xl = x; -    } -    // w = (4*w + (xr-xl))/5;   // gradually adjust w -  } -  // TRACE2(logPx, x); -  return x; -}  // slice_sampler1d() -*/ - -#endif  // SLICE_SAMPLER_H diff --git a/gi/clda/src/timer.h b/gi/clda/src/timer.h deleted file mode 100644 index 123d9a94..00000000 --- a/gi/clda/src/timer.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef _TIMER_STATS_H_ -#define _TIMER_STATS_H_ - -#include <ctime> - -struct Timer { -  Timer() { Reset(); } -  void Reset() { -    start_t = clock(); -  } -  double Elapsed() const { -    const clock_t end_t = clock(); -    const double elapsed = (end_t - start_t) / 1000000.0; -    return elapsed; -  } - private: -  std::clock_t start_t; -}; - -#endif diff --git a/gi/evaluation/conditional_entropy.py b/gi/evaluation/conditional_entropy.py deleted file mode 100644 index 356d3b1d..00000000 --- a/gi/evaluation/conditional_entropy.py +++ /dev/null @@ -1,61 +0,0 @@ -#!/usr/bin/env python - -import sys, math, itertools, getopt - -def usage(): -    print >>sys.stderr, 'Usage:', sys.argv[0], '[-s slash_threshold] input-1 input-2' -    sys.exit(0) - -optlist, args = getopt.getopt(sys.argv[1:], 'hs:') -slash_threshold = None -for opt, arg in optlist: -    if opt == '-s': -        slash_threshold = int(arg) -    else: -        usage() -if len(args) != 2: -    usage() - -ginfile = open(args[0]) -pinfile = open(args[1]) - -# evaluating: H(G | P) = sum_{g,p} p(g,p) log { p(p) / p(g,p) } -#                      = sum_{g,p} c(g,p)/N { log c(p) - log N - log c(g,p) + log N } -#                      = 1/N sum_{g,p} c(g,p) { log c(p) - log c(g,p) } -# where G = gold, P = predicted, N = number of events - -N = 0 -gold_frequencies = {} -predict_frequencies = {} -joint_frequencies = {} - -for gline, pline in itertools.izip(ginfile, pinfile): -    gparts = gline.split('||| ')[1].split() -    pparts = pline.split('||| ')[1].split() -    assert len(gparts) == len(pparts) - -    for gpart, ppart in zip(gparts, pparts): -        gtag = gpart.split(':',1)[1] -        ptag = ppart.split(':',1)[1] - -        if slash_threshold == None or gtag.count('/') + gtag.count('\\') <= slash_threshold: -            joint_frequencies.setdefault((gtag, ptag), 0) -            joint_frequencies[gtag,ptag] += 1 - -            predict_frequencies.setdefault(ptag, 0) -            predict_frequencies[ptag] += 1 - -            gold_frequencies.setdefault(gtag, 0) -            gold_frequencies[gtag] += 1 - -            N += 1 - -hg2p = 0 -hp2g = 0 -for (gtag, ptag), cgp in joint_frequencies.items(): -    hp2g += cgp * (math.log(predict_frequencies[ptag], 2) - math.log(cgp, 2)) -    hg2p += cgp * (math.log(gold_frequencies[gtag], 2) - math.log(cgp, 2)) -hg2p /= N -hp2g /= N - -print 'H(P|G)', hg2p, 'H(G|P)', hp2g, 'VI', hg2p + hp2g diff --git a/gi/evaluation/confusion_matrix.py b/gi/evaluation/confusion_matrix.py deleted file mode 100644 index 2dd7aa47..00000000 --- a/gi/evaluation/confusion_matrix.py +++ /dev/null @@ -1,123 +0,0 @@ -#!/usr/bin/env python - -import sys, math, itertools, getopt - -def usage(): -    print >>sys.stderr, 'Usage:', sys.argv[0], '[-s slash_threshold] [-p output] [-m] input-1 input-2' -    sys.exit(0) - -optlist, args = getopt.getopt(sys.argv[1:], 'hs:mp:') -slash_threshold = None -output_fname = None -show_matrix = False -for opt, arg in optlist: -    if opt == '-s': -        slash_threshold = int(arg) -    elif opt == '-p': -        output_fname = arg -    elif opt == '-m': -        show_matrix = True -    else: -        usage() -if len(args) != 2 or (not show_matrix and not output_fname): -    usage() - -ginfile = open(args[0]) -pinfile = open(args[1]) - -if output_fname: -    try: -        import Image, ImageDraw -    except ImportError: -        print >>sys.stderr, "Error: Python Image Library not available. Did you forget to set your PYTHONPATH environment variable?"  -        sys.exit(1) - -N = 0 -gold_frequencies = {} -predict_frequencies = {} -joint_frequencies = {} - -for gline, pline in itertools.izip(ginfile, pinfile): -    gparts = gline.split('||| ')[1].split() -    pparts = pline.split('||| ')[1].split() -    assert len(gparts) == len(pparts) - -    for gpart, ppart in zip(gparts, pparts): -        gtag = gpart.split(':',1)[1] -        ptag = ppart.split(':',1)[1] - -        if slash_threshold == None or gtag.count('/') + gtag.count('\\') <= slash_threshold: -            joint_frequencies.setdefault((gtag, ptag), 0) -            joint_frequencies[gtag,ptag] += 1 - -            predict_frequencies.setdefault(ptag, 0) -            predict_frequencies[ptag] += 1 - -            gold_frequencies.setdefault(gtag, 0) -            gold_frequencies[gtag] += 1 - -            N += 1 - -# find top tags -gtags = gold_frequencies.items() -gtags.sort(lambda x,y: x[1]-y[1]) -gtags.reverse() -#gtags = gtags[:50] - -preds = predict_frequencies.items() -preds.sort(lambda x,y: x[1]-y[1]) -preds.reverse() - -if show_matrix: -    print '%7s %7s' % ('pred', 'cnt'), -    for gtag, gcount in gtags: print '%7s' % gtag, -    print -    print '=' * 80 - -    for ptag, pcount in preds: -        print '%7s %7d' % (ptag, pcount), -        for gtag, gcount in gtags: -            print '%7d' % joint_frequencies.get((gtag, ptag), 0), -        print - -    print '%7s %7d' % ('total', N), -    for gtag, gcount in gtags: print '%7d' % gcount, -    print - -if output_fname: -    offset=10 - -    image = Image.new("RGB", (len(preds), len(gtags)), (255, 255, 255)) -    #hsl(hue, saturation%, lightness%) - -    # re-sort preds to get a better diagonal -    ptags=[] -    if True: -        ptags = map(lambda (p,c): p, preds) -    else: -        remaining = set(predict_frequencies.keys()) -        for y, (gtag, gcount) in enumerate(gtags): -            best = (None, 0) -            for ptag in remaining: -                #pcount = predict_frequencies[ptag] -                p = joint_frequencies.get((gtag, ptag), 0)# / float(pcount) -                if p > best[1]: best = (ptag, p) -            ptags.append(ptag) -            remaining.remove(ptag) -            if not remaining: break - -    print 'Predicted tag ordering:', ' '.join(ptags) -    print 'Gold tag ordering:', ' '.join(map(lambda (t,c): t, gtags)) -     -    draw = ImageDraw.Draw(image) -    for x, ptag in enumerate(ptags): -        pcount = predict_frequencies[ptag] -        minval = math.log(offset) -        maxval = math.log(pcount + offset) -        for y, (gtag, gcount) in enumerate(gtags): -            f = math.log(offset + joint_frequencies.get((gtag, ptag), 0)) -            z = int(240. * (maxval - f) / float(maxval - minval)) -            #print x, y, z, f, maxval -            draw.point([(x,y)], fill='hsl(%d, 100%%, 50%%)' % z) -    del draw -    image.save(output_fname) diff --git a/gi/evaluation/entropy.py b/gi/evaluation/entropy.py deleted file mode 100644 index ec1ef502..00000000 --- a/gi/evaluation/entropy.py +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env python - -import sys, math, itertools, getopt - -def usage(): -    print >>sys.stderr, 'Usage:', sys.argv[0], '[-s slash_threshold] input file' -    sys.exit(0) - -optlist, args = getopt.getopt(sys.argv[1:], 'hs:') -slash_threshold = None -for opt, arg in optlist: -    if opt == '-s': -        slash_threshold = int(arg) -    else: -        usage() -if len(args) != 1: -    usage() - -infile = open(args[0]) -N = 0 -frequencies = {} - -for line in infile: - -    for part in line.split('||| ')[1].split(): -        tag = part.split(':',1)[1] - -        if slash_threshold == None or tag.count('/') + tag.count('\\') <= slash_threshold: -            frequencies.setdefault(tag, 0) -            frequencies[tag] += 1 -            N += 1 - -h = 0 -for tag, c in frequencies.items(): -    h -= c * (math.log(c, 2) - math.log(N, 2)) -h /= N - -print 'entropy', h diff --git a/gi/evaluation/extract_ccg_labels.py b/gi/evaluation/extract_ccg_labels.py deleted file mode 100644 index e0034648..00000000 --- a/gi/evaluation/extract_ccg_labels.py +++ /dev/null @@ -1,129 +0,0 @@ -#!/usr/bin/env python - -# -# Takes spans input along with treebank and spits out CG style categories for each span. -#   spans = output from CDEC's extools/extractor with --base_phrase_spans option -#   treebank = PTB format, one tree per line -#  -# Output is in CDEC labelled-span format -# - -import sys, itertools, tree - -tinfile = open(sys.argv[1]) -einfile = open(sys.argv[2]) - -def number_leaves(node, next=0): -    left, right = None, None -    for child in node.children: -        l, r = number_leaves(child, next) -        next = max(next, r+1) -        if left == None or l < left: -            left = l -        if right == None or r > right: -            right = r - -    #print node, left, right, next -    if left == None or right == None: -        assert not node.children -        left = right = next - -    node.left = left -    node.right = right - -    return left, right - -def ancestor(node, indices): -    #print node, node.left, node.right, indices -    # returns the deepest node covering all the indices -    if min(indices) >= node.left and max(indices) <= node.right: -        # try the children -        for child in node.children: -            x = ancestor(child, indices) -            if x: return x -        return node -    else: -        return None - -def frontier(node, indices): -    #print 'frontier for node', node, 'indices', indices -    if node.left > max(indices) or node.right < min(indices): -        #print '\toutside' -        return [node] -    elif node.children: -        #print '\tcovering at least part' -        ns = [] -        for child in node.children: -            n = frontier(child, indices) -            ns.extend(n) -        return ns -    else: -        return [node] - -def project_heads(node): -    #print 'project_heads', node -    is_head = node.data.tag.endswith('-HEAD') -    if node.children: -        found = 0 -        for child in node.children: -            x = project_heads(child) -            if x: -                node.data.tag = x -                found += 1 -        assert found == 1 -    elif is_head: -        node.data.tag = node.data.tag[:-len('-HEAD')] - -    if is_head: -        return node.data.tag -    else: -        return None - -for tline, eline in itertools.izip(tinfile, einfile): -    if tline.strip() != '(())': -        if tline.startswith('( '): -            tline = tline[2:-1].strip() -        tr = tree.parse_PST(tline) -	if tr != None: -		number_leaves(tr) -		#project_heads(tr) # assumes Bikel-style head annotation for the input trees -    else: -        tr = None -     -    parts = eline.strip().split(" ||| ") -    zh, en = parts[:2] -    spans = parts[-1] -    print '|||', -    for span in spans.split(): -        sps = span.split(":") -        i, j, x, y = map(int, sps[0].split("-")) - -        if tr: -            a = ancestor(tr, range(x,y)) -	    try: -		fs = frontier(a, range(x,y)) -	    except: -		print >>sys.stderr, "problem with line", tline.strip(), "--", eline.strip() -		raise - -            #print x, y -            #print 'ancestor', a -            #print 'frontier', fs - -            cat = a.data.tag -            for f in fs: -                if f.right < x: -                    cat += '\\' + f.data.tag -                else: -                    break -            fs.reverse() -            for f in fs: -                if f.left >= y: -                    cat += '/' + f.data.tag -                else: -                    break -        else: -            cat = 'FAIL' -             -        print '%d-%d:%s' % (x, y, cat), -    print diff --git a/gi/evaluation/tree.py b/gi/evaluation/tree.py deleted file mode 100644 index 702d80b6..00000000 --- a/gi/evaluation/tree.py +++ /dev/null @@ -1,485 +0,0 @@ -import re, sys - -class Symbol: -    def __init__(self, nonterm, term=None, var=None): -        assert not (term != None and var != None) -        self.tag = nonterm -        self.token = term -        self.variable = var - -    def is_variable(self): -        return self.variable != None - -    def __eq__(self, other): -        return self.tag == other.tag and self.token == other.token and self.variable == other.variable - -    def __ne__(self, other): -        return not (self == other) - -    def __hash__(self): -        return hash((self.tag, self.token, self.variable)) - -    def __repr__(self): -        return str(self) - -    def __cmp__(self, other): -        return cmp((self.tag, self.token, self.variable), -                   (other.tag, other.token, other.variable)) - -    def __str__(self): -        parts = [] -	if False: # DEPENDENCY -	    if self.token: -		parts.append(str(self.token)) -	    elif self.variable != None: -		parts.append('#%d' % self.variable) -	    if self.tag: -		parts.append(str(self.tag)) -	    return '/'.join(parts) -	else: -	    if self.tag: -		parts.append(str(self.tag)) -	    if self.token: -		parts.append(str(self.token)) -	    elif self.variable != None: -		parts.append('#%d' % self.variable) -	    return ' '.join(parts) - -class TreeNode: -    def __init__(self, data, children=None, order=-1): -        self.data = data -        self.children = [] -        self.order = order -        self.parent = None -        if children: self.children = children - -    def insert(self, child): -        self.children.append(child) -        child.parent = self - -    def leaves(self): -        ls = [] -        for node in self.xtraversal(): -            if not node.children: -                ls.append(node.data) -        return ls - -    def leaf_nodes(self): -        ls = [] -        for node in self.xtraversal(): -            if not node.children: -                ls.append(node) -        return ls - -    def max_depth(self): -        d = 1 -        for child in self.children: -            d = max(d, 1 + child.max_depth()) -        if not self.children and self.data.token: -            d = 2 -        return d - -    def max_width(self): -        w = 0 -        for child in self.children: -           w += child.max_width() -        return max(1, w) - -    def num_internal_nodes(self): -        if self.children: -            n = 1 -            for child in self.children: -                n += child.num_internal_nodes() -            return n -        elif self.data.token: -            return 1 -        else: -            return 0 - -    def postorder_traversal(self, visit): -        """ -	Postorder traversal; no guarantee that terminals will be read in the -	correct order for dep. trees. -        """ -        for child in self.children: -            child.traversal(visit) -	visit(self) - -    def traversal(self, visit): -        """ -        Preorder for phrase structure trees, and inorder for dependency trees. -        In both cases the terminals will be read off in the correct order. -        """ -        visited_self = False -        if self.order <= 0: -            visited_self = True -            visit(self) - -        for i, child in enumerate(self.children): -            child.traversal(visit) -            if i + 1 == self.order: -                visited_self = True -                visit(self) - -        assert visited_self - -    def xpostorder_traversal(self): -        for child in self.children: -            for node in child.xpostorder_traversal(): -                yield node -        yield self - -    def xtraversal(self): -        visited_self = False -        if self.order <= 0: -            visited_self = True -            yield self - -        for i, child in enumerate(self.children): -            for d in child.xtraversal(): -                yield d - -            if i + 1 == self.order: -                visited_self = True -                yield self - -        assert visited_self - -    def xpostorder_traversal(self): -        for i, child in enumerate(self.children): -            for d in child.xpostorder_traversal(): -                yield d -        yield self - -    def edges(self): -        es = [] -        self.traverse_edges(lambda h,c: es.append((h,c))) -        return es - -    def traverse_edges(self, visit): -        for child in self.children: -            visit(self.data, child.data) -            child.traverse_edges(visit) - -    def subtrees(self, include_self=False): -        st = [] -        if include_self: -            stack = [self] -        else: -            stack = self.children[:] - -        while stack: -            node = stack.pop() -            st.append(node) -            stack.extend(node.children) -        return st - -    def find_parent(self, node): -        try: -            index = self.children.index(node) -            return self, index -        except ValueError: -            for child in self.children: -                if isinstance(child, TreeNode): -                    r = child.find_parent(node) -                    if r: return r -        return None - -    def is_ancestor_of(self, node): -        if self == node: -            return True -        for child in self.children: -            if child.is_ancestor_of(child): -                return True -        return False - -    def find(self, node): -        if self == node: -            return self -        for child in self.children: -            if isinstance(child, TreeNode): -                r = child.find(node) -                if r: return r -            else: -                if child == node: -                   return r -        return None - -    def equals_ignorecase(self, other): -        if not isinstance(other, TreeNode): -            return False -        if self.data != other.data: -            return False -        if len(self.children) != len(other.children): -            return False -        for mc, oc in zip(self.children, other.children): -            if isinstance(mc, TreeNode): -                if not mc.equals_ignorecase(oc): -                    return False -            else: -                if mc.lower() != oc.lower(): -                    return False -        return True - -    def node_number(self, numbering, next=0): -        if self.order <= 0: -            numbering[id(self)] = next -            next += 1 - -        for i, child in enumerate(self.children): -            next = child.node_number(numbering, next) -            if i + 1 == self.order: -                numbering[id(self)] = next -                next += 1 - -        return next - -    def display_conll(self, out): -        numbering = {} -        self.node_number(numbering) -        next = 0 -        self.children[0].traversal(lambda x: \ -            out.write('%d\t%s\t%s\t%s\t%s\t_\t%d\tLAB\n' \ -             % (numbering[id(x)], x.data.token, x.data.token,  -                x.data.tag, x.data.tag, numbering[id(x.parent)]))) -        out.write('\n') - -    def size(self): -        sz = 1  -        for child in self.children: -            sz += child.size() -        return sz - -    def __eq__(self, other): -        if isinstance(other, TreeNode) and self.data == other.data \ -                and self.children == other.children: -            return True -        return False - -    def __cmp__(self, other): -        if not isinstance(other, TreeNode): return 1 -        n = cmp(self.data, other.data) -        if n != 0: return n -        n = len(self.children) - len(other.children) -        if n != 0: return n -        for sc, oc in zip(self.children, other.children): -            n = cmp(sc, oc) -            if n != 0: return n -        return 0 - -    def __ne__(self, other): -        return not self.__eq__(other) - -    def __hash__(self): -        return hash((self.data, tuple(self.children))) - -    def __repr__(self): -        return str(self) - -    def __str__(self): -        s = '(' -        space = False -        if self.order <= 0: -            s += str(self.data) -            space = True -        for i, child in enumerate(self.children): -            if space: s += ' ' -            s += str(child) -            space = True -            if i+1 == self.order: -                s += ' ' + str(self.data) -        return s + ')' - -def read_PSTs(fname): -    infile = open(fname) -    trees = [] -    for line in infile: -        trees.append(parse_PST(line.strip())) -    infile.close() -    return trees - -def parse_PST_multiline(infile, hash_is_var=True): -    buf = '' -    num_open = 0 -    while True: -        line = infile.readline() -        if not line: -            return None -        buf += ' ' + line.rstrip() -        num_open += line.count('(') - line.count(')') -        if num_open == 0: -            break - -    return parse_PST(buf, hash_is_var) - -def parse_PST(line, hash_is_var=True): -    line = line.rstrip() -    if not line or line.lower() == 'null': -        return None - -    # allow either (a/DT) or (DT a) -    #parts_re = re.compile(r'(\(*)([^/)]*)(?:/([^)]*))?(\)*)$') - -    # only allow (DT a) -    parts_re = re.compile(r'(\(*)([^)]*)(\)*)$') - -    root = TreeNode(Symbol('TOP')) -    stack = [root] -    for part in line.rstrip().split(): -        m = parts_re.match(part) -        #opening, tok_or_tag, tag, closing = m.groups() -        opening, tok_or_tag, closing = m.groups() -	tag = None -        #print 'token', part, 'bits', m.groups() -        for i in opening: -            node = TreeNode(Symbol(None)) -            stack[-1].insert(node) -            stack.append(node) - -        if tag: -            stack[-1].data.tag = tag -            if hash_is_var and tok_or_tag.startswith('#'): -                stack[-1].data.variable = int(tok_or_tag[1:]) -            else: -                stack[-1].data.token = tok_or_tag -        else: -            if stack[-1].data.tag == None: -                stack[-1].data.tag = tok_or_tag -            else: -                if hash_is_var and tok_or_tag.startswith('#'): -                    try: -                        stack[-1].data.variable = int(tok_or_tag[1:]) -                    except ValueError: # it's really a token! -                        #print >>sys.stderr, 'Warning: # used for token:', tok_or_tag -                        stack[-1].data.token = tok_or_tag -                else: -                    stack[-1].data.token = tok_or_tag -         -        for i in closing: -            stack.pop() - -    #assert str(root.children[0]) == line -    return root.children[0] - -def read_DTs(fname): -    infile = open(fname) -    trees = [] -    while True: -        t = parse_DT(infile) -        if t: trees.append(t) -        else: break -    infile.close() -    return trees - -def read_bracketed_DTs(fname): -    infile = open(fname) -    trees = [] -    for line in infile: -        trees.append(parse_bracketed_DT(line)) -    infile.close() -    return trees - -def parse_DT(infile): -    tokens = [Symbol('ROOT')] -    children = {} - -    for line in infile: -        parts = line.rstrip().split() -        #print parts -        if not parts: break -        index = len(tokens) -        token = parts[1] -        tag = parts[3] -        parent = int(parts[6]) -        if token.startswith('#'): -            tokens.append(Symbol(tag, var=int(token[1:]))) -        else: -            tokens.append(Symbol(tag, token)) -        children.setdefault(parent, set()).add(index) - -    if len(tokens) == 1: return None - -    root = TreeNode(Symbol('ROOT'), [], 0) -    schedule = [] -    for child in sorted(children[0]): -        schedule.append((root, child)) - -    while schedule: -        parent, index = schedule[0] -        del schedule[0] -     -        node = TreeNode(tokens[index]) -        node.order = 0 -        parent.insert(node) - -        for child in sorted(children.get(index, [])): -            schedule.append((node, child)) -            if child < index: -                node.order += 1 - -    return root - -_bracket_split_re = re.compile(r'([(]*)([^)/]*)(?:/([^)]*))?([)]*)') - -def parse_bracketed_DT(line, insert_root=True): -    line = line.rstrip() -    if not line or line == 'NULL': return None -    #print line - -    root = TreeNode(Symbol('ROOT')) -    stack = [root] -    for part in line.rstrip().split(): -        m = _bracket_split_re.match(part) - -        for c in m.group(1): -            node = TreeNode(Symbol(None)) -            stack[-1].insert(node) -            stack.append(node) - -        if m.group(3) != None: -            if m.group(2).startswith('#'): -                stack[-1].data.variable = int(m.group(2)[1:]) -            else: -                stack[-1].data.token = m.group(2) -            stack[-1].data.tag = m.group(3) -        else: -            stack[-1].data.tag = m.group(2) -        stack[-1].order = len(stack[-1].children) -        # FIXME: also check for vars - -        for c in m.group(4): -            stack.pop() - -    assert len(stack) == 1 -    if not insert_root or root.children[0].data.tag == 'ROOT': -        return root.children[0] -    else: -        return root - -_bracket_split_notag_re = re.compile(r'([(]*)([^)/]*)([)]*)') - -def parse_bracketed_untagged_DT(line): -    line = line.rstrip() -    if not line or line == 'NULL': return None - -    root = TreeNode(Symbol('TOP')) -    stack = [root] -    for part in line.rstrip().split(): -        m = _bracket_split_notag_re.match(part) - -        for c in m.group(1): -            node = TreeNode(Symbol(None)) -            stack[-1].insert(node) -            stack.append(node) - -        if stack[-1].data.token == None: -            stack[-1].data.token = m.group(2) -            stack[-1].order = len(stack[-1].children) -        else: -            child = TreeNode(Symbol(nonterm=None, term=m.group(2))) -            stack[-1].insert(child) - -        for c in m.group(3): -            stack.pop() - -    return root.children[0] diff --git a/gi/markov_al/Makefile.am b/gi/markov_al/Makefile.am deleted file mode 100644 index fe3e3349..00000000 --- a/gi/markov_al/Makefile.am +++ /dev/null @@ -1,6 +0,0 @@ -bin_PROGRAMS = ml - -ml_SOURCES = ml.cc - -AM_CPPFLAGS = -W -Wall -Wno-sign-compare -funroll-loops -I$(top_srcdir)/utils $(GTEST_CPPFLAGS) -I$(top_srcdir)/decoder -AM_LDFLAGS = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz diff --git a/gi/markov_al/README b/gi/markov_al/README deleted file mode 100644 index 9c10f7cd..00000000 --- a/gi/markov_al/README +++ /dev/null @@ -1,2 +0,0 @@ -Experimental translation models with Markovian dependencies. - diff --git a/gi/markov_al/ml.cc b/gi/markov_al/ml.cc deleted file mode 100644 index 1e71edd6..00000000 --- a/gi/markov_al/ml.cc +++ /dev/null @@ -1,470 +0,0 @@ -#include <iostream> -#include <tr1/unordered_map> - -#include <boost/shared_ptr.hpp> -#include <boost/functional.hpp> -#include <boost/program_options.hpp> -#include <boost/program_options/variables_map.hpp> - -#include "tdict.h" -#include "filelib.h" -#include "sampler.h" -#include "ccrp_onetable.h" -#include "array2d.h" - -using namespace std; -using namespace std::tr1; -namespace po = boost::program_options; - -void PrintTopCustomers(const CCRP_OneTable<WordID>& crp) { -  for (CCRP_OneTable<WordID>::const_iterator it = crp.begin(); it != crp.end(); ++it) { -    cerr << "  " << TD::Convert(it->first) << " = " << it->second << endl; -  } -} - -void PrintAlignment(const vector<WordID>& src, const vector<WordID>& trg, const vector<unsigned char>& a) { -  cerr << TD::GetString(src) << endl << TD::GetString(trg) << endl; -  Array2D<bool> al(src.size(), trg.size()); -  for (int i = 0; i < a.size(); ++i) -    if (a[i] != 255) al(a[i], i) = true; -  cerr << al << endl; -} - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { -  po::options_description opts("Configuration options"); -  opts.add_options() -        ("samples,s",po::value<unsigned>()->default_value(1000),"Number of samples") -        ("input,i",po::value<string>(),"Read parallel data from") -        ("random_seed,S",po::value<uint32_t>(), "Random seed"); -  po::options_description clo("Command line options"); -  clo.add_options() -        ("config", po::value<string>(), "Configuration file") -        ("help,h", "Print this help message and exit"); -  po::options_description dconfig_options, dcmdline_options; -  dconfig_options.add(opts); -  dcmdline_options.add(opts).add(clo); -   -  po::store(parse_command_line(argc, argv, dcmdline_options), *conf); -  if (conf->count("config")) { -    ifstream config((*conf)["config"].as<string>().c_str()); -    po::store(po::parse_config_file(config, dconfig_options), *conf); -  } -  po::notify(*conf); - -  if (conf->count("help") || (conf->count("input") == 0)) { -    cerr << dcmdline_options << endl; -    exit(1); -  } -} - -struct Unigram; -struct Bigram { -  Bigram() : trg(), cond() {} -  Bigram(WordID prev, WordID cur, WordID t) : trg(t) { cond.first = prev; cond.second = cur; } -  const pair<WordID,WordID>& ConditioningPair() const { -    return cond; -  } -  WordID& prev_src() { return cond.first; } -  WordID& cur_src() { return cond.second; } -  const WordID& prev_src() const { return cond.first; } -  const WordID& cur_src() const { return cond.second; } -  WordID trg; - private: -  pair<WordID, WordID> cond; -}; - -struct Unigram { -  Unigram() : cur_src(), trg() {} -  Unigram(WordID s, WordID t) : cur_src(s), trg(t) {} -  WordID cur_src; -  WordID trg; -}; - -ostream& operator<<(ostream& os, const Bigram& b) { -  os << "( " << TD::Convert(b.trg) << " | " << TD::Convert(b.prev_src()) << " , " << TD::Convert(b.cur_src()) << " )"; -  return os; -} - -ostream& operator<<(ostream& os, const Unigram& u) { -  os << "( " << TD::Convert(u.trg) << " | " << TD::Convert(u.cur_src) << " )"; -  return os; -} - -bool operator==(const Bigram& a, const Bigram& b) { -  return a.trg == b.trg && a.cur_src() == b.cur_src() && a.prev_src() == b.prev_src(); -} - -bool operator==(const Unigram& a, const Unigram& b) { -  return a.trg == b.trg && a.cur_src == b.cur_src; -} - -size_t hash_value(const Bigram& b) { -  size_t h = boost::hash_value(b.prev_src()); -  boost::hash_combine(h, boost::hash_value(b.cur_src())); -  boost::hash_combine(h, boost::hash_value(b.trg)); -  return h; -} - -size_t hash_value(const Unigram& u) { -  size_t h = boost::hash_value(u.cur_src); -  boost::hash_combine(h, boost::hash_value(u.trg)); -  return h; -} - -void ReadParallelCorpus(const string& filename, -                vector<vector<WordID> >* f, -                vector<vector<WordID> >* e, -                set<WordID>* vocab_f, -                set<WordID>* vocab_e) { -  f->clear(); -  e->clear(); -  vocab_f->clear(); -  vocab_e->clear(); -  istream* in; -  if (filename == "-") -    in = &cin; -  else -    in = new ifstream(filename.c_str()); -  assert(*in); -  string line; -  const WordID kDIV = TD::Convert("|||"); -  vector<WordID> tmp; -  while(*in) { -    getline(*in, line); -    if (line.empty() && !*in) break; -    e->push_back(vector<int>()); -    f->push_back(vector<int>()); -    vector<int>& le = e->back(); -    vector<int>& lf = f->back(); -    tmp.clear(); -    TD::ConvertSentence(line, &tmp); -    bool isf = true; -    for (unsigned i = 0; i < tmp.size(); ++i) { -      const int cur = tmp[i]; -      if (isf) { -        if (kDIV == cur) { isf = false; } else { -          lf.push_back(cur); -          vocab_f->insert(cur); -        } -      } else { -        assert(cur != kDIV); -        le.push_back(cur); -        vocab_e->insert(cur); -      } -    } -    assert(isf == false); -  } -  if (in != &cin) delete in; -} - -struct UnigramModel { -  UnigramModel(size_t src_voc_size, size_t trg_voc_size) : -    unigrams(TD::NumWords() + 1, CCRP_OneTable<WordID>(1,1,1,1)), -    p0(1.0 / trg_voc_size) {} - -  void increment(const Bigram& b) { -    unigrams[b.cur_src()].increment(b.trg); -  } - -  void decrement(const Bigram& b) { -    unigrams[b.cur_src()].decrement(b.trg); -  } - -  double prob(const Bigram& b) const { -    const double q0 = unigrams[b.cur_src()].prob(b.trg, p0); -    return q0; -  } - -  double LogLikelihood() const { -    double llh = 0; -    for (unsigned i = 0; i < unigrams.size(); ++i) { -      const CCRP_OneTable<WordID>& crp = unigrams[i]; -      if (crp.num_customers() > 0) { -        llh += crp.log_crp_prob(); -        llh += crp.num_tables() * log(p0); -      } -    } -    return llh; -  } - -  void ResampleHyperparameters(MT19937* rng) { -    for (unsigned i = 0; i < unigrams.size(); ++i) -      unigrams[i].resample_hyperparameters(rng); -  } - -  vector<CCRP_OneTable<WordID> > unigrams;  // unigrams[src].prob(trg, p0) = p(trg|src) - -  const double p0; -}; - -struct BigramModel { -  BigramModel(size_t src_voc_size, size_t trg_voc_size) : -    unigrams(TD::NumWords() + 1, CCRP_OneTable<WordID>(1,1,1,1)), -    p0(1.0 / trg_voc_size) {} - -  void increment(const Bigram& b) { -    BigramMap::iterator it = bigrams.find(b.ConditioningPair()); -    if (it == bigrams.end()) { -      it = bigrams.insert(make_pair(b.ConditioningPair(), CCRP_OneTable<WordID>(1,1,1,1))).first; -    } -    if (it->second.increment(b.trg)) -      unigrams[b.cur_src()].increment(b.trg); -  } - -  void decrement(const Bigram& b) { -    BigramMap::iterator it = bigrams.find(b.ConditioningPair()); -    assert(it != bigrams.end()); -    if (it->second.decrement(b.trg)) { -      unigrams[b.cur_src()].decrement(b.trg); -      if (it->second.num_customers() == 0) -        bigrams.erase(it); -    } -  } - -  double prob(const Bigram& b) const { -    const double q0 = unigrams[b.cur_src()].prob(b.trg, p0); -    const BigramMap::const_iterator it = bigrams.find(b.ConditioningPair()); -    if (it == bigrams.end()) return q0; -    return it->second.prob(b.trg, q0); -  } - -  double LogLikelihood() const { -    double llh = 0; -    for (unsigned i = 0; i < unigrams.size(); ++i) { -      const CCRP_OneTable<WordID>& crp = unigrams[i]; -      if (crp.num_customers() > 0) { -        llh += crp.log_crp_prob(); -        llh += crp.num_tables() * log(p0); -      } -    } -    for (BigramMap::const_iterator it = bigrams.begin(); it != bigrams.end(); ++it) { -      const CCRP_OneTable<WordID>& crp = it->second; -      const WordID cur_src = it->first.second; -      llh += crp.log_crp_prob(); -      for (CCRP_OneTable<WordID>::const_iterator bit = crp.begin(); bit != crp.end(); ++bit) { -        llh += log(unigrams[cur_src].prob(bit->second, p0)); -      } -    } -    return llh; -  } - -  void ResampleHyperparameters(MT19937* rng) { -    for (unsigned i = 0; i < unigrams.size(); ++i) -      unigrams[i].resample_hyperparameters(rng); -    for (BigramMap::iterator it = bigrams.begin(); it != bigrams.end(); ++it) -      it->second.resample_hyperparameters(rng); -  } - -  typedef unordered_map<pair<WordID,WordID>, CCRP_OneTable<WordID>, boost::hash<pair<WordID,WordID> > > BigramMap; -  BigramMap bigrams;  // bigrams[(src-1,src)].prob(trg, q0) = p(trg|src,src-1) -  vector<CCRP_OneTable<WordID> > unigrams;  // unigrams[src].prob(trg, p0) = p(trg|src) - -  const double p0; -}; - -struct BigramAlignmentModel { -  BigramAlignmentModel(size_t src_voc_size, size_t trg_voc_size) : bigrams(TD::NumWords() + 1, CCRP_OneTable<WordID>(1,1,1,1)), p0(1.0 / src_voc_size) {} -  void increment(WordID prev, WordID next) { -    bigrams[prev].increment(next);  // hierarchy? -  } -  void decrement(WordID prev, WordID next) { -    bigrams[prev].decrement(next);  // hierarchy? -  } -  double prob(WordID prev, WordID next) { -    return bigrams[prev].prob(next, p0); -  } -  double LogLikelihood() const { -    double llh = 0; -    for (unsigned i = 0; i < bigrams.size(); ++i) { -      const CCRP_OneTable<WordID>& crp = bigrams[i]; -      if (crp.num_customers() > 0) { -        llh += crp.log_crp_prob(); -        llh += crp.num_tables() * log(p0); -      } -    } -    return llh; -  } - -  vector<CCRP_OneTable<WordID> > bigrams;  // bigrams[prev].prob(next, p0) = p(next|prev) -  const double p0; -}; - -struct Alignment { -  vector<unsigned char> a; -}; - -int main(int argc, char** argv) { -  po::variables_map conf; -  InitCommandLine(argc, argv, &conf); -  const unsigned samples = conf["samples"].as<unsigned>(); - -  boost::shared_ptr<MT19937> prng; -  if (conf.count("random_seed")) -    prng.reset(new MT19937(conf["random_seed"].as<uint32_t>())); -  else -    prng.reset(new MT19937); -  MT19937& rng = *prng; - -  vector<vector<WordID> > corpuse, corpusf; -  set<WordID> vocabe, vocabf; -  cerr << "Reading corpus...\n"; -  ReadParallelCorpus(conf["input"].as<string>(), &corpusf, &corpuse, &vocabf, &vocabe); -  cerr << "F-corpus size: " << corpusf.size() << " sentences\t (" << vocabf.size() << " word types)\n"; -  cerr << "E-corpus size: " << corpuse.size() << " sentences\t (" << vocabe.size() << " word types)\n"; -  assert(corpusf.size() == corpuse.size()); -  const size_t corpus_len = corpusf.size(); -  const WordID kNULL = TD::Convert("<eps>"); -  const WordID kBOS = TD::Convert("<s>"); -  const WordID kEOS = TD::Convert("</s>"); -  Bigram TT(kBOS, TD::Convert("我"), TD::Convert("i")); -  Bigram TT2(kBOS, TD::Convert("要"), TD::Convert("i")); - -  UnigramModel model(vocabf.size(), vocabe.size()); -  vector<Alignment> alignments(corpus_len); -  for (unsigned ci = 0; ci < corpus_len; ++ci) { -    const vector<WordID>& src = corpusf[ci]; -    const vector<WordID>& trg = corpuse[ci]; -    vector<unsigned char>& alg = alignments[ci].a; -    alg.resize(trg.size()); -    int lenp1 = src.size() + 1; -    WordID prev_src = kBOS; -    for (int j = 0; j < trg.size(); ++j) { -      int samp = lenp1 * rng.next(); -      --samp; -      if (samp < 0) samp = 255; -      alg[j] = samp; -      WordID cur_src = (samp == 255 ? kNULL : src[alg[j]]); -      Bigram b(prev_src, cur_src, trg[j]); -      model.increment(b); -      prev_src = cur_src; -    } -    Bigram b(prev_src, kEOS, kEOS); -    model.increment(b); -  } -  cerr << "Initial LLH: " << model.LogLikelihood() << endl; - -  SampleSet<double> ss; -  for (unsigned si = 0; si < 50; ++si) { -    for (unsigned ci = 0; ci < corpus_len; ++ci) { -      const vector<WordID>& src = corpusf[ci]; -      const vector<WordID>& trg = corpuse[ci]; -      vector<unsigned char>& alg = alignments[ci].a; -      WordID prev_src = kBOS; -      for (unsigned j = 0; j < trg.size(); ++j) { -        unsigned char& a_j = alg[j]; -        WordID cur_e_a_j = (a_j == 255 ? kNULL : src[a_j]); -        Bigram b(prev_src, cur_e_a_j, trg[j]); -        //cerr << "DEC: " << b << "\t" << nextb << endl; -        model.decrement(b); -        ss.clear(); -        for (unsigned i = 0; i <= src.size(); ++i) { -          const WordID cur_src = (i ? src[i-1] : kNULL); -          b.cur_src() = cur_src; -          ss.add(model.prob(b)); -        } -        int sampled_a_j = rng.SelectSample(ss); -        a_j = (sampled_a_j ? sampled_a_j - 1 : 255); -        cur_e_a_j = (a_j == 255 ? kNULL : src[a_j]); -        b.cur_src() = cur_e_a_j; -        //cerr << "INC: " << b << "\t" << nextb << endl; -        model.increment(b); -        prev_src = cur_e_a_j; -      } -    } -    cerr << '.' << flush; -    if (si % 10 == 9) { -      cerr << "[LLH prev=" << model.LogLikelihood(); -      //model.ResampleHyperparameters(&rng); -      cerr << " new=" << model.LogLikelihood() << "]\n"; -      //pair<WordID,WordID> xx = make_pair(kBOS, TD::Convert("我")); -      //PrintTopCustomers(model.bigrams.find(xx)->second); -      cerr << "p(" << TT << ") = " << model.prob(TT) << endl; -      cerr << "p(" << TT2 << ") = " << model.prob(TT2) << endl; -      PrintAlignment(corpusf[0], corpuse[0], alignments[0].a); -    } -  } -  { -  // MODEL 2 -  BigramModel model(vocabf.size(), vocabe.size()); -  BigramAlignmentModel amodel(vocabf.size(), vocabe.size()); -  for (unsigned ci = 0; ci < corpus_len; ++ci) { -    const vector<WordID>& src = corpusf[ci]; -    const vector<WordID>& trg = corpuse[ci]; -    vector<unsigned char>& alg = alignments[ci].a; -    WordID prev_src = kBOS; -    for (int j = 0; j < trg.size(); ++j) { -      WordID cur_src = (alg[j] == 255 ? kNULL : src[alg[j]]); -      Bigram b(prev_src, cur_src, trg[j]); -      model.increment(b); -      amodel.increment(prev_src, cur_src); -      prev_src = cur_src; -    } -    amodel.increment(prev_src, kEOS); -    Bigram b(prev_src, kEOS, kEOS); -    model.increment(b); -  } -  cerr << "Initial LLH: " << model.LogLikelihood() << " " << amodel.LogLikelihood() << endl; - -  SampleSet<double> ss; -  for (unsigned si = 0; si < samples; ++si) { -    for (unsigned ci = 0; ci < corpus_len; ++ci) { -      const vector<WordID>& src = corpusf[ci]; -      const vector<WordID>& trg = corpuse[ci]; -      vector<unsigned char>& alg = alignments[ci].a; -      WordID prev_src = kBOS; -      for (unsigned j = 0; j < trg.size(); ++j) { -        unsigned char& a_j = alg[j]; -        WordID cur_e_a_j = (a_j == 255 ? kNULL : src[a_j]); -        Bigram b(prev_src, cur_e_a_j, trg[j]); -        WordID next_src = kEOS; -        WordID next_trg = kEOS; -        if (j < (trg.size() - 1)) { -          next_src = (alg[j+1] == 255 ? kNULL : src[alg[j + 1]]); -          next_trg = trg[j + 1]; -        } -        Bigram nextb(cur_e_a_j, next_src, next_trg); -        //cerr << "DEC: " << b << "\t" << nextb << endl; -        model.decrement(b); -        model.decrement(nextb); -        amodel.decrement(prev_src, cur_e_a_j); -        amodel.decrement(cur_e_a_j, next_src); -        ss.clear(); -        for (unsigned i = 0; i <= src.size(); ++i) { -          const WordID cur_src = (i ? src[i-1] : kNULL); -          b.cur_src() = cur_src; -          ss.add(model.prob(b) * model.prob(nextb) * amodel.prob(prev_src, cur_src) * amodel.prob(cur_src, next_src)); -          //cerr << log(ss[ss.size() - 1]) << "\t" << b << endl; -        } -        int sampled_a_j = rng.SelectSample(ss); -        a_j = (sampled_a_j ? sampled_a_j - 1 : 255); -        cur_e_a_j = (a_j == 255 ? kNULL : src[a_j]); -        b.cur_src() = cur_e_a_j; -        nextb.prev_src() = cur_e_a_j; -        //cerr << "INC: " << b << "\t" << nextb << endl; -        //exit(1); -        model.increment(b); -        model.increment(nextb); -        amodel.increment(prev_src, cur_e_a_j); -        amodel.increment(cur_e_a_j, next_src); -        prev_src = cur_e_a_j; -      } -    } -    cerr << '.' << flush; -    if (si % 10 == 9) { -      cerr << "[LLH prev=" << (model.LogLikelihood() + amodel.LogLikelihood()); -      //model.ResampleHyperparameters(&rng); -      cerr << " new=" << model.LogLikelihood() << "]\n"; -      pair<WordID,WordID> xx = make_pair(kBOS, TD::Convert("我")); -      cerr << "p(" << TT << ") = " << model.prob(TT) << endl; -      cerr << "p(" << TT2 << ") = " << model.prob(TT2) << endl; -      pair<WordID,WordID> xx2 = make_pair(kBOS, TD::Convert("要")); -      PrintTopCustomers(model.bigrams.find(xx)->second); -      //PrintTopCustomers(amodel.bigrams[TD::Convert("<s>")]); -      //PrintTopCustomers(model.unigrams[TD::Convert("<eps>")]); -      PrintAlignment(corpusf[0], corpuse[0], alignments[0].a); -    } -  } -  } -  return 0; -} - diff --git a/gi/morf-segmentation/filter_docs.pl b/gi/morf-segmentation/filter_docs.pl deleted file mode 100755 index a78575da..00000000 --- a/gi/morf-segmentation/filter_docs.pl +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/perl - -#Filters the phrase&cluster document set to retain only documents that correspond to words or morphs, i.e. not crossing word boundaries. - -#Usage: filter_docs.pl [mark] -#  STDIN: data in the doc.txt format (i.e. phrase\t blahblah ), most likely from cdec extractor -#  STDOUT: the matching subset, same format - -use utf8; -my $letter=qr/\p{L}\p{M}*/; # see http://www.regular-expressions.info/unicode.html - -my $morph=qr/$letter+/; - -my $m = "##"; # marker used to indicate morphemes -if ((scalar @ARGV) >= 1) { -   $m = $ARGV[0]; -   shift; -} -print STDERR "Using $m to filter for morphemes\n"; - -my $expr = qr/^($morph\Q$m\E)? ?(\Q$m\E$morph\Q$m\E)* ?(\Q$m\E$morph)?\t/; #\Q and \E bounded sections are escaped -while(<>) { -   /$expr/ && print; -} diff --git a/gi/morf-segmentation/invalid_vocab.patterns b/gi/morf-segmentation/invalid_vocab.patterns deleted file mode 100644 index 473ce1b1..00000000 --- a/gi/morf-segmentation/invalid_vocab.patterns +++ /dev/null @@ -1,6 +0,0 @@ -[[:digit:]] -[] !"#$%&()*+,./:;<=>?@[\^_`{|}~] -^'$ --$ -^- -^$ diff --git a/gi/morf-segmentation/linestripper.py b/gi/morf-segmentation/linestripper.py deleted file mode 100755 index 04e9044a..00000000 --- a/gi/morf-segmentation/linestripper.py +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/python - -import sys - -#linestripper   file file maxlen [numlines] - -if len(sys.argv) < 3: -  print "linestripper   file1 file2 maxlen [numlines]"  -  print " outputs subset of file1 to stdout, ..of file2 to stderr" -  sys.exit(1) - - -f1 = open(sys.argv[1],'r') -f2 = open(sys.argv[2],'r') - -maxlen=int(sys.argv[3]) -numlines = 0 - -if len(sys.argv) > 4: -  numlines = int(sys.argv[4]) - -count=0 -for line1 in f1: -  line2 = f2.readline() -   -  w1 = len(line1.strip().split()) -  w2 = len(line2.strip().split()) - -  if w1 <= maxlen and w2 <= maxlen: -    count = count + 1 -    sys.stdout.write(line1) -    sys.stderr.write(line2) -  -  if numlines > 0 and count >= numlines: -    break - -f1.close() -f2.close() -   - diff --git a/gi/morf-segmentation/morf-pipeline.pl b/gi/morf-segmentation/morf-pipeline.pl deleted file mode 100755 index 46eb5b46..00000000 --- a/gi/morf-segmentation/morf-pipeline.pl +++ /dev/null @@ -1,486 +0,0 @@ -#!/usr/bin/perl -w -use strict; -use File::Copy; - - -# Preprocessing pipeline to take care of word segmentation -# Learns a segmentation model for each/either side of the parallel corpus using all train/dev/test data -# Applies the segmentation where necessary. -# Learns word alignments on the preprocessed training data. -# Outputs script files used later to score output. - - -my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path cwd /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR; } - -use Getopt::Long "GetOptions"; - -my $GZIP = 'gzip'; -my $ZCAT = 'gunzip -c'; -my $SED = 'sed -e'; - -my $MORF_TRAIN = "$SCRIPT_DIR/morftrain.sh"; -my $MORF_SEGMENT = "$SCRIPT_DIR/morfsegment.py"; - -my $LINESTRIPPER = "$SCRIPT_DIR/linestripper.py"; -my $ALIGNER = "/export/ws10smt/software/berkeleyaligner/berkeleyaligner.jar"; -#java -d64 -Xmx10g -jar $ALIGNER ++word-align.conf >> aligner.log -assert_exec($MORF_TRAIN, $LINESTRIPPER, $MORF_SEGMENT, $ALIGNER); - -my $OUTPUT = './morfwork'; -my $PPL_SRC = 50; -my $PPL_TRG = 50; -my $MARKER = "#"; -my $MAX_WORDS = 40; -my $SENTENCES;# = 100000; -my $SPLIT_TYPE = ""; #possible values: s, t, st, or (empty string) -my $NAME_SHORTCUT; - -usage() unless &GetOptions('max_words=i' => \$MAX_WORDS, -                           'output=s' => \$OUTPUT, -                           'ppl_src=i' => \$PPL_SRC, -                           'ppl_trg=i' => \$PPL_TRG, -                           'sentences=i' => \$SENTENCES, -                           'marker=s' => \$MARKER, -                           'split=s' => \$SPLIT_TYPE, -                           'get_name_only' => \$NAME_SHORTCUT, -                          ); - -usage() unless scalar @ARGV >= 2; - -my %CORPUS; # for (src,trg) it has (orig, name, filtered, final) - -$CORPUS{'src'}{'orig'} = $ARGV[0]; -open F, "<$CORPUS{'src'}{'orig'}" or die "Can't read $CORPUS{'src'}{'orig'}: $!"; close F; -$CORPUS{'src'}{'name'} = get_basename($CORPUS{'src'}{'orig'}); - -$CORPUS{'trg'}{'orig'} = $ARGV[1]; -open F, "<$CORPUS{'trg'}{'orig'}" or die "Can't read $CORPUS{'trg'}{'orig'}: $!"; close F; -$CORPUS{'trg'}{'name'} = get_basename($CORPUS{'trg'}{'orig'}); - -my %DEV; # for (src,trg) has (orig, final.split final.unsplit -if (@ARGV >= 4) { -  $DEV{'src'}{'orig'} = $ARGV[2]; -  open F, "<$DEV{'src'}{'orig'}" or die "Can't read $DEV{'src'}{'orig'}: $!"; close F; -  $DEV{'src'}{'name'} = get_basename($DEV{'src'}{'orig'}); -  $DEV{'trg'}{'orig'} = $ARGV[3]; -  open F, "<$DEV{'trg'}{'orig'}" or die "Can't read $DEV{'trg'}{'orig'}: $!"; close F; -  $DEV{'trg'}{'name'} = get_basename($DEV{'trg'}{'orig'}); -} - -my %TEST; # for (src,trg) has (orig, name)  -if (@ARGV >= 6) { -  $TEST{'src'}{'orig'} = $ARGV[4]; -  open F, "<$TEST{'src'}{'orig'}" or die "Can't read $TEST{'src'}{'orig'}: $!"; close F; -  $TEST{'src'}{'name'} = get_basename($TEST{'src'}{'orig'}); -  $TEST{'trg'}{'orig'} = $ARGV[5]; -  open F, "<$TEST{'trg'}{'orig'}" or die "Can't read $TEST{'trg'}{'orig'}: $!"; close F; -  $TEST{'trg'}{'name'} = get_basename($TEST{'trg'}{'orig'}); -} - -my $SPLIT_SRC; #use these to check whether that part is being split -my $SPLIT_TRG; - -#OUTPUT WILL GO IN THESE -my $CORPUS_DIR = $OUTPUT . '/' . corpus_dir();            #subsampled corpus -my $MODEL_SRC_DIR = $OUTPUT . '/' . model_dir("src"); #splitting.. -my $MODEL_TRG_DIR = $OUTPUT . '/' . model_dir("trg"); # .. models -my $PROCESSED_DIR = $OUTPUT . '/' . processed_dir();      #segmented copora+alignments -my $ALIGNMENT_DIR = $PROCESSED_DIR . '/alignments'; - -$CORPUS{'src'}{'filtered'} = $CORPUS_DIR . "/$CORPUS{'src'}{'name'}"; -$CORPUS{'trg'}{'filtered'} = $CORPUS_DIR . "/$CORPUS{'trg'}{'name'}"; - -print STDERR "Output: $OUTPUT\n"; -print STDERR "Corpus: $CORPUS_DIR\n"; -print STDERR "Model-src: $MODEL_SRC_DIR\n"; -print STDERR "Model-trg: $MODEL_TRG_DIR\n"; -print STDERR "Finaldir: $PROCESSED_DIR\n"; - -safemkdir($OUTPUT) or die "Couldn't create output directory $OUTPUT: $!"; -safemkdir($CORPUS_DIR) or die "Couldn't create output directory $CORPUS_DIR: $!"; -filter_corpus(); - -safemkdir($PROCESSED_DIR); -safemkdir($ALIGNMENT_DIR); - -if ($SPLIT_SRC) { -  safemkdir($MODEL_SRC_DIR) or die "Couldn't create output directory $MODEL_SRC_DIR: $!"; -  learn_segmentation("src"); -  apply_segmentation_side("src", $MODEL_SRC_DIR);   -} - -#assume that unsplit hypotheses will be scored against an aritificially split target test set; thus obtain a target splitting model   -#TODO: add a flag to override this behaviour -safemkdir($MODEL_TRG_DIR) or die "Couldn't create output directory $MODEL_TRG_DIR: $!"; -learn_segmentation("trg"); -$TEST{'trg'}{'finalunsplit'} = "$PROCESSED_DIR/$TEST{'trg'}{'name'}"; -copy($TEST{'trg'}{'orig'}, $TEST{'trg'}{'finalunsplit'}) or die "Could not copy unsegmented test set";   - -if ($SPLIT_TRG) { -  apply_segmentation_side("trg", $MODEL_TRG_DIR);   -  } else { -  $TEST{'trg'}{'finalsplit'} = "$PROCESSED_DIR/$TEST{'trg'}{'name'}.split"; -  apply_segmentation_any($MODEL_TRG_DIR, $TEST{'trg'}{'finalunsplit'}, $TEST{'trg'}{'finalsplit'});   -} - -write_eval_sh("$PROCESSED_DIR/eval-devtest.sh"); - -#copy corpora if they haven't been put in place by splitting operations -place_missing_data_side('src'); -place_missing_data_side('trg'); - -do_align(); - -if ($CORPUS{'src'}{'orig'} && $DEV{'src'}{'orig'} && $TEST{'src'}{'orig'}) { -  print STDERR "Putting the config file entry in $PROCESSED_DIR/exp.config\n"; -#format is: -  # nlfr100k_unsplit /export/ws10smt/jan/nlfr/morfwork/s100k.w40.sp_0 corpus.nl-fr.al fr-3.lm.gz dev.nl dev.fr test2008.nl eval-devtest.sh -  my $line = split_name() . " $PROCESSED_DIR corpus.src-trg.al LMFILE.lm.gz"; -  $line = $line . " $DEV{'src'}{'name'} $DEV{'trg'}{'name'}"; -  $line = $line . " " . get_basename($TEST{'src'}{$SPLIT_SRC ? "finalsplit" : "finalunsplit"}) . " eval-devtest.sh"; -  safesystem("echo '$line' > $PROCESSED_DIR/exp.config"); -} - -system("date"); -print STDERR "All done. You now need to train a language model (if target split), put it in the right dir and update the config file.\n\n"; - -############################## BILINGUAL ################################### - -sub filter_corpus { -  print STDERR "\n!!!FILTERING TRAINING COPRUS!!!\n"; -  if ( -f $CORPUS{'src'}{'filtered'} && -f $CORPUS{'trg'}{'filtered'}) { -    print STDERR "$CORPUS{'src'}{'filtered'} and $CORPUS{'trg'}{'filtered'} exist, reusing...\n"; -    return; -  } -  my $args = "$CORPUS{'src'}{'orig'} $CORPUS{'trg'}{'orig'} $MAX_WORDS"; -  if ($SENTENCES) { $args = $args . " $SENTENCES"; }  -  safesystem("$LINESTRIPPER $args 1> $CORPUS{'src'}{'filtered'} 2> $CORPUS{'trg'}{'filtered'}") or die "Failed to filter training corpus for length."; -} - -sub learn_segmentation -{ -  my $WHICH = shift; -  my $corpus; my $dev; my $test; my $moddir;  my $ppl; - -  $corpus = $CORPUS{$WHICH}{'filtered'}; -  $dev = $DEV{$WHICH}{'orig'}; -  $test = $TEST{$WHICH}{'orig'}; - -  if ($WHICH eq "src") { -    $moddir = $MODEL_SRC_DIR; -    $ppl = $PPL_SRC; -  } else { -    $moddir = $MODEL_TRG_DIR; -    $ppl = $PPL_TRG; -  } -  my $cmd = "cat $corpus"; -  if ($dev) { $cmd = "$cmd $dev"; } -  if ($test) { $cmd = "$cmd $test"; } -  my $tmpfile = "$CORPUS_DIR/all.tmp.gz"; -  safesystem("$cmd | $GZIP > $tmpfile") or die "Failed to concatenate data for model learning.."; -  assert_marker($tmpfile); - -  learn_segmentation_side($tmpfile, $moddir, $ppl, $WHICH); -  safesystem("rm $tmpfile"); -} - -sub do_align { -  print STDERR "\n!!!WORD ALIGNMENT!!!\n"; -  system("date"); - -  my $ALIGNMENTS = "$ALIGNMENT_DIR/training.align"; -  if ( -f $ALIGNMENTS ) { -    print STDERR "$ALIGNMENTS  exists, reusing...\n"; -    return; -  }  -  my $conf_file = "$ALIGNMENT_DIR/word-align.conf"; -     -  #decorate training files with identifiers to stop the aligner from training on dev and test when rerun in future. -  safesystem("cd $PROCESSED_DIR && ln -s $CORPUS{'src'}{'name'} corpus.src") or die "Failed to symlink: $!"; -  safesystem("cd $PROCESSED_DIR && ln -s $CORPUS{'trg'}{'name'} corpus.trg") or die "Failed to symlink: $!"; - -  write_wconf($conf_file, $PROCESSED_DIR);   -  system("java -d64 -Xmx24g -jar $ALIGNER ++$conf_file > $ALIGNMENT_DIR/aligner.log"); - -  if (! -f $ALIGNMENTS) { die "Failed to run word alignment.";} - -  my $cmd = "paste $PROCESSED_DIR/corpus.src $PROCESSED_DIR/corpus.trg $ALIGNMENTS"; -  $cmd = $cmd . " | sed 's/\\t/ \|\|\| /g' > $PROCESSED_DIR/corpus.src-trg.al"; -  safesystem($cmd) or die "Failed to paste into aligned corpus file."; - -} - -############################# MONOLINGUAL ################################# - -#copy the necessary data files that weren't place by segmentation -sub place_missing_data_side { -  my $side = shift; - -  ifne_copy($CORPUS{$side}{'filtered'}, "$PROCESSED_DIR/$CORPUS{$side}{'name'}") ; - -  if ($DEV{$side}{'orig'} && ! -f "$PROCESSED_DIR/$DEV{$side}{'name'}") { -    $DEV{$side}{'final'} = "$PROCESSED_DIR/$DEV{$side}{'name'}"; -    copy($DEV{$side}{'orig'}, $DEV{$side}{'final'}) or die "Copy failed: $!"; -  } - -  if ($TEST{$side}{'orig'} && ! -f "$PROCESSED_DIR/$TEST{$side}{'name'}" && ! $TEST{$side}{'finalunsplit'}) { -    $TEST{$side}{'finalunsplit'} = "$PROCESSED_DIR/$TEST{$side}{'name'}"; -    copy($TEST{$side}{'orig'}, $TEST{$side}{'finalunsplit'}) or die "Copy failed: $!"; -  } - -} - -sub apply_segmentation_side { -  my ($side, $moddir) = @_; -  -  print STDERR "\n!!!APPLYING SEGMENTATION MODEL ($side)!!!\n"; -  apply_segmentation_any($moddir, $CORPUS{$side}{'filtered'}, "$PROCESSED_DIR/$CORPUS{$side}{'name'}"); -  if ($DEV{$side}{'orig'}) { -     $DEV{$side}{'final'} = "$PROCESSED_DIR/$DEV{$side}{'name'}"; -    apply_segmentation_any($moddir, $DEV{$side}{'orig'}, "$DEV{$side}{'final'}"); -  } -  if ($TEST{$side}{'orig'}) { -    $TEST{$side}{'finalsplit'} = "$PROCESSED_DIR/$TEST{$side}{'name'}.split"; -    apply_segmentation_any($moddir, $TEST{$side}{'orig'}, $TEST{$side}{'finalsplit'} ); -  }  - -} - -sub learn_segmentation_side { -  my($INPUT_FILE, $SEGOUT_DIR, $PPL, $LANG) = @_; - -  print STDERR "\n!!!LEARNING SEGMENTATION MODEL ($LANG)!!!\n"; -  system("date"); -  my $SEG_FILE = $SEGOUT_DIR . "/segmentation.ready"; -   if ( -f $SEG_FILE) { -    print STDERR "$SEG_FILE exists, reusing...\n"; -    return; -  } -  my $cmd = "$MORF_TRAIN $INPUT_FILE $SEGOUT_DIR $PPL \"$MARKER\""; -  safesystem($cmd) or die "Failed to learn segmentation model"; -} - -sub apply_segmentation_any { -  my($moddir, $datfile, $outfile) = @_; -  if ( -f $outfile) { -    print STDERR "$outfile exists, reusing...\n"; -    return; -  } -   -  my $args = "$moddir/inputvocab.gz $moddir/segmentation.ready \"$MARKER\""; -  safesystem("cat $datfile | $MORF_SEGMENT $args &> $outfile") or die "Could not segment $datfile"; -} - -##################### PATH FUNCTIONS ########################## - -sub beautify_numlines { -  return ($SENTENCES ? $SENTENCES : "_all"); -} - -sub corpus_dir { -  return "s" . beautify_numlines() . ".w" . $MAX_WORDS; -} - -sub model_dir { -  my $lang = shift; -  if ($lang eq "src") {  -    return corpus_dir() . ".PPL" . $PPL_SRC . ".src"; -  } elsif ($lang eq "trg") { -    return corpus_dir() .  ".PPL" . $PPL_TRG . ".trg"; -  } else { -    return "PPLundef"; -  }     -} - -sub processed_dir { -  return corpus_dir() . "." . split_name(); -} - -########################## HELPER FUNCTIONS ############################ - -sub ifne_copy { -  my ($src, $dest) = @_; -  if (! -f $dest) { -    copy($src, $dest) or die "Copy failed: $!"; -  } -} - -sub split_name { -  #parses SPLIT_TYPE, which can have the following values -  # t|s|ts|st (last 2 are equiv) -  # or is undefined when no splitting is done -  my $name = ""; -   -  if ($SPLIT_TYPE) {  -    $SPLIT_SRC = lc($SPLIT_TYPE) =~ /s/; -    $SPLIT_TRG = lc($SPLIT_TYPE) =~ /t/; -    $name = $name . ($SPLIT_SRC ? $PPL_SRC : "0"); -    $name = $name . "_" . ($SPLIT_TRG ? $PPL_TRG : "0");  -  } else { -    #no splitting -    $name = "0"; -  } - -  return "sp_" . $name; -   -} - -sub usage { -  print <<EOT; - -Usage: $0 [OPTIONS] corpus.src corpus.trg [dev.src dev.trg [test.src test.trg]] - -Learns a segmentation model and splits up corpora as necessary. Word alignments are trained on a specified subset of the training corpus. - -EOT -  exit 1; -}; - -sub safemkdir { -  my $dir = shift; -  if (-d $dir) { return 1; } -  return mkdir($dir); -} - -sub assert_exec { -  my @files = @_; -  for my $file (@files) { -    die "Can't find $file - did you run make?\n" unless -e $file; -    die "Can't execute $file" unless -e $file; -  } -}; -sub safesystem { -  print STDERR "Executing: @_\n"; -  system(@_); -  if ($? == -1) { -      print STDERR "ERROR: Failed to execute: @_\n  $!\n"; -      exit(1); -  } -  elsif ($? & 127) { -      printf STDERR "ERROR: Execution of: @_\n  died with signal %d, %s coredump\n", -          ($? & 127),  ($? & 128) ? 'with' : 'without'; -      exit(1); -  } -  else { -    my $exitcode = $? >> 8; -    print STDERR "Exit code: $exitcode\n" if $exitcode; -    return ! $exitcode; -  } -} - -sub get_basename -{ -  my $x = shift; -  $x = `basename $x`; -  $x =~ s/\n//; -  return $x; -} - -sub assert_marker { -  my $file = shift; -  my $result = `zcat $file| grep '$MARKER' | wc -l` or die "Cannot read $file: $!"; -  print $result;  -  if (scalar($result) != 0) { die "Data contains marker '$MARKER'; use something else.";} -} -########################### Dynamic config files ############################## - -sub write_wconf { -  my ($filename, $train_dir) = @_; -  open WCONF, ">$filename" or die "Can't write $filename: $!"; - -  print WCONF <<EOT; -## ---------------------- -## This is an example training script for the Berkeley -## word aligner.  In this configuration it uses two HMM -## alignment models trained jointly and then decoded -## using the competitive thresholding heuristic. - -########################################## -# Training: Defines the training regimen -########################################## -forwardModels   MODEL1 HMM -reverseModels   MODEL1 HMM -mode    JOINT JOINT -iters   5 5 - -############################################### -# Execution: Controls output and program flow -############################################### -execDir $ALIGNMENT_DIR -create -overwriteExecDir -saveParams  true -numThreads  1 -msPerLine   10000 -alignTraining - -################# -# Language/Data -################# -foreignSuffix   src -englishSuffix   trg - -# Choose the training sources, which can either be directories or files that list files/directories -trainSources    $train_dir/ -#trainSources     $train_dir/sources -testSources      -sentences   MAX - -################# -# 1-best output -################# -competitiveThresholding - -EOT -  close WCONF; -} - -sub write_eval_sh -{ -  my ($filename) = @_; -  open EVALFILE, ">$filename" or die "Can't write $filename: $!"; - -  print EVALFILE <<EOT; -#!/bin/bash - -EVAL_MAIN=/export/ws10smt/data/eval.sh -marker="$MARKER" -EOT - -  if ($SPLIT_TRG) { -    print EVALFILE <<EOT; -echo "OUTPUT EVALUATION" -echo "-----------------" -\$EVAL_MAIN "\$1" $TEST{'trg'}{'finalsplit'} - -echo "RECOMBINED OUTPUT EVALUATION" -echo "----------------------------" -cat "\$1" | sed -e "s/\$marker \$marker//g" -e "s/\$marker//g" > "\$1.recombined" - -\$EVAL_MAIN "\$1.recombined" $TEST{'trg'}{'finalunsplit'} -EOT - -  } else { -    print EVALFILE <<EOT; -echo "ARTIFICIAL SPLIT EVALUATION" -echo "--------------------------" - -#split the output translation -cat "\$1" | $MORF_SEGMENT $MODEL_TRG_DIR/inputvocab.gz $MODEL_TRG_DIR/segmentation.ready "\$MARKER" > "\$1.split" - -\$EVAL_MAIN "\$1.split" $TEST{'trg'}{'finalsplit'} - -echo "DIRECT EVALUATION" -echo "--------------------------" -\$EVAL_MAIN "\$1" $TEST{'trg'}{'finalunsplit'} -   -EOT - -  } -  close EVALFILE; - -} - - - - diff --git a/gi/morf-segmentation/morfsegment.py b/gi/morf-segmentation/morfsegment.py deleted file mode 100755 index 85b9d4fb..00000000 --- a/gi/morf-segmentation/morfsegment.py +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/python - -import sys -import gzip - -#usage: morfsegment.py inputvocab.gz segmentation.ready -#  stdin: the data to segment -#  stdout: the segmented data - -if len(sys.argv) < 3: -  print "usage: morfsegment.py inputvocab.gz segmentation.ready [marker]" -  print "  stdin: the data to segment" -  print "  stdout: the segmented data" -  sys.exit() - -#read index: -split_index={} - -marker="##" - -if len(sys.argv) > 3: -  marker=sys.argv[3] - -word_vocab=gzip.open(sys.argv[1], 'rb') #inputvocab.gz -seg_vocab=open(sys.argv[2], 'r') #segm.ready.. - -for seg in seg_vocab: -  #seg = ver# #wonder\n -  #wordline = 1 verwonder\n -  word = word_vocab.readline().strip().split(' ') -  assert(len(word) == 2) -  word = word[1] -  seg=seg.strip() - -  if seg != word: -    split_index[word] = seg - -word_vocab.close() -seg_vocab.close() - -for line in sys.stdin: -  words = line.strip().split() - -  newsent = [] -  for word in words: -    splitword = split_index.get(word, word) -    newsent.append(splitword) - -  print ' '.join(newsent) - diff --git a/gi/morf-segmentation/morftrain.sh b/gi/morf-segmentation/morftrain.sh deleted file mode 100755 index 9004922f..00000000 --- a/gi/morf-segmentation/morftrain.sh +++ /dev/null @@ -1,110 +0,0 @@ -#!/bin/bash - -if [[ $# -lt 3 ]]; then -	echo "Trains a morfessor model and places the result in writedir" -	echo -	echo "Usage: `basename $0` corpus_input_file writedir [PPL] [marker] [lines]" -	echo -e "\tcorpus_input_file contains a sentence per line." -	exit 1 -fi - -MORFESSOR_DIR="/export/ws10smt/software/morfessor_catmap0.9.2" -SCRIPT_DIR=$(dirname `readlink -f $0`) - -MORFBINDIR="$MORFESSOR_DIR/bin" -MORFMAKEFILE_TRAIN="$MORFESSOR_DIR/train/Makefile" -VOCABEXT="$SCRIPT_DIR/vocabextractor.sh" - -MARKER="#" - -if [[ ! -f $VOCABEXT ]]; then -  echo "$VOCABEXT doesn't exist!" -  exit 1 -fi -if [[ ! -f $MORFMAKEFILE_TRAIN ]]; then -  echo "$MORFMAKEFILE_TRAIN doesn't exist!" -  exit 1 -fi - - -CORPUS="$1" -WRITETODIR=$2 - -if [[ ! -f $CORPUS ]]; then -  echo "$CORPUS doesn't exist!" -  exit 1 -fi - -PPL=10 -LINES=0 -if [[ $# -gt 2 ]]; then -  PPL=$3 -fi -if [[ $# -gt 3 ]]; then -  MARKER="$4" -fi -if [[ $# -gt 4 ]]; then -  LINES=$5 -fi - -mkdir -p $WRITETODIR - -#extract vocabulary to train on -echo "Extracting vocabulary..." -if [[ -f $WRITETODIR/inputvocab.gz ]]; then -  echo " ....$WRITETODIR/inputvocab.gz exists, reusing." -else -  if [[ $LINES -gt 0 ]]; then -    $VOCABEXT $CORPUS $LINES | gzip > $WRITETODIR/inputvocab.gz -  else -    $VOCABEXT $CORPUS | gzip > $WRITETODIR/inputvocab.gz -  fi -fi - - -#train it -echo "Training morf model..." -if [[ -f $WRITETODIR/segmentation.final.gz ]]; then -  echo " ....$WRITETODIR/segmentation.final.gz exists, reusing.." -else -  OLDPWD=`pwd` -  cd $WRITETODIR -   -  #put the training Makefile in place, with appropriate modifications -  sed -e "s/^GZIPPEDINPUTDATA = .*$/GZIPPEDINPUTDATA = inputvocab.gz/"  \ -    -e "s/^PPLTHRESH = .*$/PPLTHRESH = $PPL/" \ -    -e "s;^BINDIR = .*$;BINDIR = $MORFBINDIR;" \ -    $MORFMAKEFILE_TRAIN > ./Makefile - -  date -  make > ./trainmorf.log 2>&1 -  cd $OLDPWD -   -   -  echo "Post processing..." -  #remove comments, counts and morph types -  #mark morphs -   -  if [[ ! -f $WRITETODIR/segmentation.final.gz ]]; then -     echo "Failed to learn segmentation model: $WRITETODIR/segmentation.final.gz not written" -     exit 1 -  fi - -  zcat $WRITETODIR/segmentation.final.gz | \ -    awk '$1 !~ /^#/ {print}' | \ -    cut -d ' ' --complement -f 1 | \ -    sed -e "s/\/...//g" -e "s/ + /$MARKER $MARKER/g" \ -    > $WRITETODIR/segmentation.ready - -  if [[ ! -f $WRITETODIR/segmentation.ready ]]; then -     echo "Failed to learn segmentation model: $WRITETODIR/segmentation.final.gz not written" -     exit 1 -  fi - - - -  echo "Done training." -  date -fi -echo "Segmentation model is $WRITETODIR/segmentation.ready." - diff --git a/gi/morf-segmentation/vocabextractor.sh b/gi/morf-segmentation/vocabextractor.sh deleted file mode 100755 index 00ae7109..00000000 --- a/gi/morf-segmentation/vocabextractor.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/bash - -d=$(dirname `readlink -f $0`) -if [ $# -lt 1 ]; then -	echo "Extracts unique words and their frequencies from a subset of a corpus." -	echo -	echo "Usage: `basename $0` input_file [number_of_lines] > output_file" -	echo -e "\tinput_file contains a sentence per line." -	echo -	echo "Script also removes words from the vocabulary if they contain a digit or a special character. Output is printed to stdout in a format suitable for use with Morfessor." -	echo -	exit -fi - -srcname=$1 -reallen=0 - -if [[ $# -gt 1 ]]; then -  reallen=$2 -fi - -pattern_file=$d/invalid_vocab.patterns - -if [[ ! -f $pattern_file ]]; then -  echo "Pattern file missing" -  exit 1  -fi - -#this awk strips entries from the vocabulary if they contain invalid characters -#invalid characters are digits and punctuation marks, and words beginning or ending with a dash -#uniq -c extracts the unique words and counts the occurrences - -if [[ $reallen -eq 0 ]]; then -	#when a zero is passed, use the whole file -  zcat -f $srcname | sed 's/ /\n/g' | egrep -v -f $pattern_file | sort | uniq -c | sed 's/^  *//'  - -else -	zcat -f $srcname | head -n $reallen | sed 's/ /\n/g' | egrep -v -f $pattern_file | sort | uniq -c | sed 's/^  *//' -fi - diff --git a/gi/pf/Makefile.am b/gi/pf/Makefile.am deleted file mode 100644 index 86f8e07b..00000000 --- a/gi/pf/Makefile.am +++ /dev/null @@ -1,44 +0,0 @@ -bin_PROGRAMS = cbgi brat dpnaive pfbrat pfdist itg pfnaive condnaive align-lexonly-pyp learn_cfg pyp_lm nuisance_test align-tl pf_test bayes_lattice_score - -noinst_LIBRARIES = libpf.a - -libpf_a_SOURCES = base_distributions.cc reachability.cc cfg_wfst_composer.cc corpus.cc unigrams.cc ngram_base.cc transliterations.cc backward.cc hpyp_tm.cc pyp_tm.cc - -bayes_lattice_score_SOURCES = bayes_lattice_score.cc -bayes_lattice_score_LDADD = libpf.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz - -pf_test_SOURCES = pf_test.cc -pf_test_LDADD = libpf.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz - -nuisance_test_SOURCES = nuisance_test.cc -nuisance_test_LDADD = libpf.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz - -align_lexonly_pyp_SOURCES = align-lexonly-pyp.cc -align_lexonly_pyp_LDADD = libpf.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz - -align_tl_SOURCES = align-tl.cc -align_tl_LDADD = libpf.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz - -itg_SOURCES = itg.cc - -pyp_lm_SOURCES = pyp_lm.cc - -learn_cfg_SOURCES = learn_cfg.cc - -condnaive_SOURCES = condnaive.cc - -dpnaive_SOURCES = dpnaive.cc - -pfdist_SOURCES = pfdist.cc - -pfnaive_SOURCES = pfnaive.cc - -cbgi_SOURCES = cbgi.cc - -brat_SOURCES = brat.cc - -pfbrat_SOURCES = pfbrat.cc - -AM_CPPFLAGS = -W -Wall -Wno-sign-compare -funroll-loops -I$(top_srcdir)/utils $(GTEST_CPPFLAGS) -I$(top_srcdir)/decoder -I$(top_srcdir)/klm - -AM_LDFLAGS = libpf.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a $(top_srcdir)/utils/libutils.a -lz diff --git a/gi/pf/README b/gi/pf/README deleted file mode 100644 index 62e47541..00000000 --- a/gi/pf/README +++ /dev/null @@ -1,2 +0,0 @@ -Experimental Bayesian alignment tools. Nothing to see here. - diff --git a/gi/pf/align-lexonly-pyp.cc b/gi/pf/align-lexonly-pyp.cc deleted file mode 100644 index e7509f57..00000000 --- a/gi/pf/align-lexonly-pyp.cc +++ /dev/null @@ -1,243 +0,0 @@ -#include <iostream> -#include <queue> - -#include <boost/program_options.hpp> -#include <boost/program_options/variables_map.hpp> - -#include "tdict.h" -#include "stringlib.h" -#include "filelib.h" -#include "array2d.h" -#include "sampler.h" -#include "corpus.h" -#include "pyp_tm.h" -#include "hpyp_tm.h" -#include "quasi_model2.h" - -using namespace std; -namespace po = boost::program_options; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { -  po::options_description opts("Configuration options"); -  opts.add_options() -        ("samples,s",po::value<unsigned>()->default_value(1000),"Number of samples") -        ("infer_alignment_hyperparameters,I", "Infer alpha and p_null, otherwise fixed values will be assumed") -        ("p_null,0", po::value<double>()->default_value(0.08), "probability of aligning to null") -        ("align_alpha,a", po::value<double>()->default_value(4.0), "how 'tight' is the bias toward be along the diagonal?") -        ("input,i",po::value<string>(),"Read parallel data from") -        ("random_seed,S",po::value<uint32_t>(), "Random seed"); -  po::options_description clo("Command line options"); -  clo.add_options() -        ("config", po::value<string>(), "Configuration file") -        ("help,h", "Print this help message and exit"); -  po::options_description dconfig_options, dcmdline_options; -  dconfig_options.add(opts); -  dcmdline_options.add(opts).add(clo); -   -  po::store(parse_command_line(argc, argv, dcmdline_options), *conf); -  if (conf->count("config")) { -    ifstream config((*conf)["config"].as<string>().c_str()); -    po::store(po::parse_config_file(config, dconfig_options), *conf); -  } -  po::notify(*conf); - -  if (conf->count("help") || (conf->count("input") == 0)) { -    cerr << dcmdline_options << endl; -    exit(1); -  } -} - -MT19937* prng; - -struct LexicalAlignment { -  unsigned char src_index; -  bool is_transliteration; -  vector<pair<short, short> > derivation; -}; - -struct AlignedSentencePair { -  vector<WordID> src; -  vector<WordID> trg; -  vector<LexicalAlignment> a; -  Array2D<short> posterior; -}; - -template <class LexicalTranslationModel> -struct Aligner { -  Aligner(const vector<vector<WordID> >& lets, -          int vocab_size, -          int num_letters, -          const po::variables_map& conf, -          vector<AlignedSentencePair>* c) : -      corpus(*c), -      paj_model(conf["align_alpha"].as<double>(), conf["p_null"].as<double>()), -      infer_paj(conf.count("infer_alignment_hyperparameters") > 0), -      model(lets, vocab_size, num_letters), -      kNULL(TD::Convert("NULL")) { -    assert(lets[kNULL].size() == 0); -  } - -  vector<AlignedSentencePair>& corpus; -  QuasiModel2 paj_model; -  const bool infer_paj; -  LexicalTranslationModel model; -  const WordID kNULL; - -  void ResampleHyperparameters() { -    model.ResampleHyperparameters(prng); -    if (infer_paj) paj_model.ResampleHyperparameters(prng); -  } - -  void InitializeRandom() { -    cerr << "Initializing with random alignments ...\n"; -    for (unsigned i = 0; i < corpus.size(); ++i) { -      AlignedSentencePair& asp = corpus[i]; -      asp.a.resize(asp.trg.size()); -      for (unsigned j = 0; j < asp.trg.size(); ++j) { -        unsigned char& a_j = asp.a[j].src_index; -        a_j = prng->next() * (1 + asp.src.size()); -        const WordID f_a_j = (a_j ? asp.src[a_j - 1] : kNULL); -        model.Increment(f_a_j, asp.trg[j], &*prng); -        paj_model.Increment(a_j, j, asp.src.size(), asp.trg.size()); -      } -    } -    cerr << "Corpus intialized randomly." << endl; -    cerr << "LLH = " << Likelihood() << "    \t(Amodel=" << paj_model.Likelihood() -         << " TModel=" << model.Likelihood() << ") contexts=" << model.UniqueConditioningContexts() << endl; -  } - -  void ResampleCorpus() { -    for (unsigned i = 0; i < corpus.size(); ++i) { -      AlignedSentencePair& asp = corpus[i]; -      SampleSet<prob_t> ss; ss.resize(asp.src.size() + 1); -      for (unsigned j = 0; j < asp.trg.size(); ++j) { -        unsigned char& a_j = asp.a[j].src_index; -        const WordID e_j = asp.trg[j]; -        WordID f_a_j = (a_j ? asp.src[a_j - 1] : kNULL); -        model.Decrement(f_a_j, e_j, prng); -        paj_model.Decrement(a_j, j, asp.src.size(), asp.trg.size()); - -        for (unsigned prop_a_j = 0; prop_a_j <= asp.src.size(); ++prop_a_j) { -          const WordID prop_f = (prop_a_j ? asp.src[prop_a_j - 1] : kNULL); -          ss[prop_a_j] = model.Prob(prop_f, e_j); -          ss[prop_a_j] *= paj_model.Prob(prop_a_j, j, asp.src.size(), asp.trg.size()); -        } -        a_j = prng->SelectSample(ss); -        f_a_j = (a_j ? asp.src[a_j - 1] : kNULL); -        model.Increment(f_a_j, e_j, prng); -        paj_model.Increment(a_j, j, asp.src.size(), asp.trg.size()); -      } -    } -  } - -  prob_t Likelihood() const { -    return model.Likelihood() * paj_model.Likelihood(); -  } -}; - -void ExtractLetters(const set<WordID>& v, vector<vector<WordID> >* l, set<WordID>* letset = NULL) { -  for (set<WordID>::const_iterator it = v.begin(); it != v.end(); ++it) { -    vector<WordID>& letters = (*l)[*it]; -    if (letters.size()) continue;   // if e and f have the same word - -    const string& w = TD::Convert(*it); -     -    size_t cur = 0; -    while (cur < w.size()) { -      const size_t len = UTF8Len(w[cur]); -      letters.push_back(TD::Convert(w.substr(cur, len))); -      if (letset) letset->insert(letters.back()); -      cur += len; -    } -  } -} - -void Debug(const AlignedSentencePair& asp) { -  cerr << TD::GetString(asp.src) << endl << TD::GetString(asp.trg) << endl; -  Array2D<bool> a(asp.src.size(), asp.trg.size()); -  for (unsigned j = 0; j < asp.trg.size(); ++j) { -    assert(asp.a[j].src_index <= asp.src.size()); -    if (asp.a[j].src_index) a(asp.a[j].src_index - 1, j) = true; -  } -  cerr << a << endl; -} - -void AddSample(AlignedSentencePair* asp) { -  for (unsigned j = 0; j < asp->trg.size(); ++j) -    asp->posterior(asp->a[j].src_index, j)++; -} - -void WriteAlignments(const AlignedSentencePair& asp) { -  bool first = true; -  for (unsigned j = 0; j < asp.trg.size(); ++j) { -    int src_index = -1; -    int mc = -1; -    for (unsigned i = 0; i <= asp.src.size(); ++i) { -      if (asp.posterior(i, j) > mc) { -        mc = asp.posterior(i, j); -        src_index = i; -      } -    } - -    if (src_index) { -      if (first) first = false; else cout << ' '; -      cout << (src_index - 1) << '-' << j; -    } -  } -  cout << endl; -} - -int main(int argc, char** argv) { -  po::variables_map conf; -  InitCommandLine(argc, argv, &conf); - -  if (conf.count("random_seed")) -    prng = new MT19937(conf["random_seed"].as<uint32_t>()); -  else -    prng = new MT19937; - -  vector<vector<int> > corpuse, corpusf; -  set<int> vocabe, vocabf; -  corpus::ReadParallelCorpus(conf["input"].as<string>(), &corpusf, &corpuse, &vocabf, &vocabe); -  cerr << "f-Corpus size: " << corpusf.size() << " sentences\n"; -  cerr << "f-Vocabulary size: " << vocabf.size() << " types\n"; -  cerr << "f-Corpus size: " << corpuse.size() << " sentences\n"; -  cerr << "f-Vocabulary size: " << vocabe.size() << " types\n"; -  assert(corpusf.size() == corpuse.size()); - -  vector<AlignedSentencePair> corpus(corpuse.size()); -  for (unsigned i = 0; i < corpuse.size(); ++i) { -    corpus[i].src.swap(corpusf[i]); -    corpus[i].trg.swap(corpuse[i]); -    corpus[i].posterior.resize(corpus[i].src.size() + 1, corpus[i].trg.size()); -  } -  corpusf.clear(); corpuse.clear(); - -  vocabf.insert(TD::Convert("NULL")); -  vector<vector<WordID> > letters(TD::NumWords()); -  set<WordID> letset; -  ExtractLetters(vocabe, &letters, &letset); -  ExtractLetters(vocabf, &letters, NULL); -  letters[TD::Convert("NULL")].clear(); - -  //Aligner<PYPLexicalTranslation> aligner(letters, vocabe.size(), letset.size(), conf, &corpus); -  Aligner<HPYPLexicalTranslation> aligner(letters, vocabe.size(), letset.size(), conf, &corpus); -  aligner.InitializeRandom(); - -  const unsigned samples = conf["samples"].as<unsigned>(); -  for (int i = 0; i < samples; ++i) { -    for (int j = 65; j < 67; ++j) Debug(corpus[j]); -    if (i % 10 == 9) { -      aligner.ResampleHyperparameters(); -      cerr << "LLH = " << aligner.Likelihood() << "    \t(Amodel=" << aligner.paj_model.Likelihood() -           << " TModel=" << aligner.model.Likelihood() << ") contexts=" << aligner.model.UniqueConditioningContexts() << endl; -    } -    aligner.ResampleCorpus(); -    if (i > (samples / 5) && (i % 6 == 5)) for (int j = 0; j < corpus.size(); ++j) AddSample(&corpus[j]); -  } -  for (unsigned i = 0; i < corpus.size(); ++i) -    WriteAlignments(corpus[i]); -  aligner.model.Summary(); - -  return 0; -} diff --git a/gi/pf/align-tl.cc b/gi/pf/align-tl.cc deleted file mode 100644 index f6608f1d..00000000 --- a/gi/pf/align-tl.cc +++ /dev/null @@ -1,339 +0,0 @@ -#include <iostream> -#include <tr1/memory> -#include <queue> - -#include <boost/multi_array.hpp> -#include <boost/program_options.hpp> -#include <boost/program_options/variables_map.hpp> - -#include "backward.h" -#include "array2d.h" -#include "base_distributions.h" -#include "monotonic_pseg.h" -#include "conditional_pseg.h" -#include "trule.h" -#include "tdict.h" -#include "stringlib.h" -#include "filelib.h" -#include "dict.h" -#include "sampler.h" -#include "mfcr.h" -#include "corpus.h" -#include "ngram_base.h" -#include "transliterations.h" - -using namespace std; -using namespace tr1; -namespace po = boost::program_options; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { -  po::options_description opts("Configuration options"); -  opts.add_options() -        ("samples,s",po::value<unsigned>()->default_value(1000),"Number of samples") -        ("input,i",po::value<string>(),"Read parallel data from") -        ("s2t", po::value<string>(), "character level source-to-target prior transliteration probabilities") -        ("t2s", po::value<string>(), "character level target-to-source prior transliteration probabilities") -        ("max_src_chunk", po::value<unsigned>()->default_value(4), "Maximum size of translitered chunk in source") -        ("max_trg_chunk", po::value<unsigned>()->default_value(4), "Maximum size of translitered chunk in target") -        ("expected_src_to_trg_ratio", po::value<double>()->default_value(1.0), "If a word is transliterated, what is the expected length ratio from source to target?") -        ("random_seed,S",po::value<uint32_t>(), "Random seed"); -  po::options_description clo("Command line options"); -  clo.add_options() -        ("config", po::value<string>(), "Configuration file") -        ("help,h", "Print this help message and exit"); -  po::options_description dconfig_options, dcmdline_options; -  dconfig_options.add(opts); -  dcmdline_options.add(opts).add(clo); -   -  po::store(parse_command_line(argc, argv, dcmdline_options), *conf); -  if (conf->count("config")) { -    ifstream config((*conf)["config"].as<string>().c_str()); -    po::store(po::parse_config_file(config, dconfig_options), *conf); -  } -  po::notify(*conf); - -  if (conf->count("help") || (conf->count("input") == 0)) { -    cerr << dcmdline_options << endl; -    exit(1); -  } -} - -boost::shared_ptr<MT19937> prng; - -struct LexicalAlignment { -  unsigned char src_index; -  bool is_transliteration; -  vector<pair<short, short> > derivation; -}; - -struct AlignedSentencePair { -  vector<WordID> src; -  vector<WordID> trg; -  vector<LexicalAlignment> a; -  Array2D<short> posterior; -}; - -struct HierarchicalWordBase { -  explicit HierarchicalWordBase(const unsigned vocab_e_size) : -      base(prob_t::One()), r(1,1,1,1,0.66,50.0), u0(-log(vocab_e_size)), l(1,prob_t::One()), v(1, prob_t::Zero()) {} - -  void ResampleHyperparameters(MT19937* rng) { -    r.resample_hyperparameters(rng); -  } - -  inline double logp0(const vector<WordID>& s) const { -    return Md::log_poisson(s.size(), 7.5) + s.size() * u0; -  } - -  // return p0 of rule.e_ -  prob_t operator()(const TRule& rule) const { -    v[0].logeq(logp0(rule.e_)); -    return r.prob(rule.e_, v.begin(), l.begin()); -  } - -  void Increment(const TRule& rule) { -    v[0].logeq(logp0(rule.e_)); -    if (r.increment(rule.e_, v.begin(), l.begin(), &*prng).count) { -      base *= v[0] * l[0]; -    } -  } - -  void Decrement(const TRule& rule) { -    if (r.decrement(rule.e_, &*prng).count) { -      base /= prob_t(exp(logp0(rule.e_))); -    } -  } - -  prob_t Likelihood() const { -    prob_t p; p.logeq(r.log_crp_prob()); -    p *= base; -    return p; -  } - -  void Summary() const { -    cerr << "NUMBER OF CUSTOMERS: " << r.num_customers() << "  (d=" << r.discount() << ",s=" << r.strength() << ')' << endl; -    for (MFCR<1,vector<WordID> >::const_iterator it = r.begin(); it != r.end(); ++it) -      cerr << "   " << it->second.total_dish_count_ << " (on " << it->second.table_counts_.size() << " tables) " << TD::GetString(it->first) << endl; -  } - -  prob_t base; -  MFCR<1,vector<WordID> > r; -  const double u0; -  const vector<prob_t> l; -  mutable vector<prob_t> v; -}; - -struct BasicLexicalAlignment { -  explicit BasicLexicalAlignment(const vector<vector<WordID> >& lets, -                                 const unsigned words_e, -                                 const unsigned letters_e, -                                 vector<AlignedSentencePair>* corp) : -      letters(lets), -      corpus(*corp), -      //up0(words_e), -      //up0("en.chars.1gram", letters_e), -      //up0("en.words.1gram"), -      up0(letters_e), -      //up0("en.chars.2gram"), -      tmodel(up0) { -  } - -  void InstantiateRule(const WordID src, -                       const WordID trg, -                       TRule* rule) const { -    static const WordID kX = TD::Convert("X") * -1; -    rule->lhs_ = kX; -    rule->e_ = letters[trg]; -    rule->f_ = letters[src]; -  } - -  void InitializeRandom() { -    const WordID kNULL = TD::Convert("NULL"); -    cerr << "Initializing with random alignments ...\n"; -    for (unsigned i = 0; i < corpus.size(); ++i) { -      AlignedSentencePair& asp = corpus[i]; -      asp.a.resize(asp.trg.size()); -      for (unsigned j = 0; j < asp.trg.size(); ++j) { -        const unsigned char a_j = prng->next() * (1 + asp.src.size()); -        const WordID f_a_j = (a_j ? asp.src[a_j - 1] : kNULL); -        TRule r; -        InstantiateRule(f_a_j, asp.trg[j], &r); -        asp.a[j].is_transliteration = false; -        asp.a[j].src_index = a_j; -        if (tmodel.IncrementRule(r, &*prng)) -          up0.Increment(r); -      } -    } -    cerr << "  LLH = " << Likelihood() << endl; -  } - -  prob_t Likelihood() const { -    prob_t p = tmodel.Likelihood(); -    p *= up0.Likelihood(); -    return p; -  } - -  void ResampleHyperparemeters() { -    tmodel.ResampleHyperparameters(&*prng); -    up0.ResampleHyperparameters(&*prng); -    cerr << "  (base d=" << up0.r.discount() << ",s=" << up0.r.strength() << ")\n"; -  } - -  void ResampleCorpus(); - -  const vector<vector<WordID> >& letters; // spelling dictionary -  vector<AlignedSentencePair>& corpus; -  //PhraseConditionalUninformativeBase up0; -  //PhraseConditionalUninformativeUnigramBase up0; -  //UnigramWordBase up0; -  //HierarchicalUnigramBase up0; -  HierarchicalWordBase up0; -  //CompletelyUniformBase up0; -  //FixedNgramBase up0; -  //ConditionalTranslationModel<PhraseConditionalUninformativeBase> tmodel; -  //ConditionalTranslationModel<PhraseConditionalUninformativeUnigramBase> tmodel; -  //ConditionalTranslationModel<UnigramWordBase> tmodel; -  //ConditionalTranslationModel<HierarchicalUnigramBase> tmodel; -  MConditionalTranslationModel<HierarchicalWordBase> tmodel; -  //ConditionalTranslationModel<FixedNgramBase> tmodel; -  //ConditionalTranslationModel<CompletelyUniformBase> tmodel; -}; - -void BasicLexicalAlignment::ResampleCorpus() { -  static const WordID kNULL = TD::Convert("NULL"); -  for (unsigned i = 0; i < corpus.size(); ++i) { -    AlignedSentencePair& asp = corpus[i]; -    SampleSet<prob_t> ss; ss.resize(asp.src.size() + 1); -    for (unsigned j = 0; j < asp.trg.size(); ++j) { -      TRule r; -      unsigned char& a_j = asp.a[j].src_index; -      WordID f_a_j = (a_j ? asp.src[a_j - 1] : kNULL); -      InstantiateRule(f_a_j, asp.trg[j], &r); -      if (tmodel.DecrementRule(r, &*prng)) -        up0.Decrement(r); - -      for (unsigned prop_a_j = 0; prop_a_j <= asp.src.size(); ++prop_a_j) { -        const WordID prop_f = (prop_a_j ? asp.src[prop_a_j - 1] : kNULL); -        InstantiateRule(prop_f, asp.trg[j], &r); -        ss[prop_a_j] = tmodel.RuleProbability(r); -      } -      a_j = prng->SelectSample(ss); -      f_a_j = (a_j ? asp.src[a_j - 1] : kNULL); -      InstantiateRule(f_a_j, asp.trg[j], &r); -      if (tmodel.IncrementRule(r, &*prng)) -        up0.Increment(r); -    } -  } -  cerr << "  LLH = " << Likelihood() << endl; -} - -void ExtractLetters(const set<WordID>& v, vector<vector<WordID> >* l, set<WordID>* letset = NULL) { -  for (set<WordID>::const_iterator it = v.begin(); it != v.end(); ++it) { -    vector<WordID>& letters = (*l)[*it]; -    if (letters.size()) continue;   // if e and f have the same word - -    const string& w = TD::Convert(*it); -     -    size_t cur = 0; -    while (cur < w.size()) { -      const size_t len = UTF8Len(w[cur]); -      letters.push_back(TD::Convert(w.substr(cur, len))); -      if (letset) letset->insert(letters.back()); -      cur += len; -    } -  } -} - -void Debug(const AlignedSentencePair& asp) { -  cerr << TD::GetString(asp.src) << endl << TD::GetString(asp.trg) << endl; -  Array2D<bool> a(asp.src.size(), asp.trg.size()); -  for (unsigned j = 0; j < asp.trg.size(); ++j) -    if (asp.a[j].src_index) a(asp.a[j].src_index - 1, j) = true; -  cerr << a << endl; -} - -void AddSample(AlignedSentencePair* asp) { -  for (unsigned j = 0; j < asp->trg.size(); ++j) -    asp->posterior(asp->a[j].src_index, j)++; -} - -void WriteAlignments(const AlignedSentencePair& asp) { -  bool first = true; -  for (unsigned j = 0; j < asp.trg.size(); ++j) { -    int src_index = -1; -    int mc = -1; -    for (unsigned i = 0; i <= asp.src.size(); ++i) { -      if (asp.posterior(i, j) > mc) { -        mc = asp.posterior(i, j); -        src_index = i; -      } -    } - -    if (src_index) { -      if (first) first = false; else cout << ' '; -      cout << (src_index - 1) << '-' << j; -    } -  } -  cout << endl; -} - -int main(int argc, char** argv) { -  po::variables_map conf; -  InitCommandLine(argc, argv, &conf); - -  if (conf.count("random_seed")) -    prng.reset(new MT19937(conf["random_seed"].as<uint32_t>())); -  else -    prng.reset(new MT19937); -//  MT19937& rng = *prng; - -  vector<vector<int> > corpuse, corpusf; -  set<int> vocabe, vocabf; -  corpus::ReadParallelCorpus(conf["input"].as<string>(), &corpusf, &corpuse, &vocabf, &vocabe); -  cerr << "f-Corpus size: " << corpusf.size() << " sentences\n"; -  cerr << "f-Vocabulary size: " << vocabf.size() << " types\n"; -  cerr << "f-Corpus size: " << corpuse.size() << " sentences\n"; -  cerr << "f-Vocabulary size: " << vocabe.size() << " types\n"; -  assert(corpusf.size() == corpuse.size()); - -  vector<AlignedSentencePair> corpus(corpuse.size()); -  for (unsigned i = 0; i < corpuse.size(); ++i) { -    corpus[i].src.swap(corpusf[i]); -    corpus[i].trg.swap(corpuse[i]); -    corpus[i].posterior.resize(corpus[i].src.size() + 1, corpus[i].trg.size()); -  } -  corpusf.clear(); corpuse.clear(); - -  vocabf.insert(TD::Convert("NULL")); -  vector<vector<WordID> > letters(TD::NumWords() + 1); -  set<WordID> letset; -  ExtractLetters(vocabe, &letters, &letset); -  ExtractLetters(vocabf, &letters, NULL); -  letters[TD::Convert("NULL")].clear(); - -  // TODO configure this -  const int max_src_chunk = conf["max_src_chunk"].as<unsigned>(); -  const int max_trg_chunk = conf["max_trg_chunk"].as<unsigned>(); -  const double s2t_rat = conf["expected_src_to_trg_ratio"].as<double>(); -  const BackwardEstimator be(conf["s2t"].as<string>(), conf["t2s"].as<string>()); -  Transliterations tl(max_src_chunk, max_trg_chunk, s2t_rat, be);  - -  cerr << "Initializing transliteration graph structures ...\n"; -  for (int i = 0; i < corpus.size(); ++i) { -    const vector<int>& src = corpus[i].src; -    const vector<int>& trg = corpus[i].trg; -    for (int j = 0; j < src.size(); ++j) { -      const vector<int>& src_let = letters[src[j]]; -      for (int k = 0; k < trg.size(); ++k) { -        const vector<int>& trg_let = letters[trg[k]]; -        tl.Initialize(src[j], src_let, trg[k], trg_let); -        //if (src_let.size() < min_trans_src) -        //  tl.Forbid(src[j], src_let, trg[k], trg_let); -      } -    } -  } -  cerr << endl; -  tl.GraphSummary(); - -  return 0; -} diff --git a/gi/pf/backward.cc b/gi/pf/backward.cc deleted file mode 100644 index b92629fd..00000000 --- a/gi/pf/backward.cc +++ /dev/null @@ -1,89 +0,0 @@ -#include "backward.h" - -#include <queue> -#include <utility> - -#include "array2d.h" -#include "reachability.h" -#include "base_distributions.h" - -using namespace std; - -BackwardEstimator::BackwardEstimator(const string& s2t, -                    const string& t2s) : m1(new Model1(s2t)), m1inv(new Model1(t2s)) {} - -BackwardEstimator::~BackwardEstimator() { -  delete m1; m1 = NULL; -  delete m1inv; m1inv = NULL; -} - -float BackwardEstimator::ComputeBackwardProb(const std::vector<WordID>& src, -                                             const std::vector<WordID>& trg, -                                             unsigned src_covered, -                                             unsigned trg_covered, -                                             double s2t_ratio) const { -  if (src_covered == src.size() || trg_covered == trg.size()) { -    assert(src_covered == src.size()); -    assert(trg_covered == trg.size()); -    return 0; -  } -  static const WordID kNULL = TD::Convert("<eps>"); -  const prob_t uniform_alignment(1.0 / (src.size() - src_covered + 1)); -  // TODO factor in expected length ratio -  prob_t e; e.logeq(Md::log_poisson(trg.size() - trg_covered, (src.size() - src_covered) * s2t_ratio)); // p(trg len remaining | src len remaining) -  for (unsigned j = trg_covered; j < trg.size(); ++j) { -    prob_t p = (*m1)(kNULL, trg[j]) + prob_t(1e-12); -    for (unsigned i = src_covered; i < src.size(); ++i) -      p += (*m1)(src[i], trg[j]); -    if (p.is_0()) { -      cerr << "ERROR: p(" << TD::Convert(trg[j]) << " | " << TD::GetString(src) << ") = 0!\n"; -      assert(!"failed"); -    } -    p *= uniform_alignment; -    e *= p; -  } -  // TODO factor in expected length ratio -  const prob_t inv_uniform(1.0 / (trg.size() - trg_covered + 1.0)); -  prob_t inv; -  inv.logeq(Md::log_poisson(src.size() - src_covered, (trg.size() - trg_covered) / s2t_ratio)); -  for (unsigned i = src_covered; i < src.size(); ++i) { -    prob_t p = (*m1inv)(kNULL, src[i]) + prob_t(1e-12); -    for (unsigned j = trg_covered; j < trg.size(); ++j) -      p += (*m1inv)(trg[j], src[i]); -    if (p.is_0()) { -      cerr << "ERROR: p_inv(" << TD::Convert(src[i]) << " | " << TD::GetString(trg) << ") = 0!\n"; -      assert(!"failed"); -    } -    p *= inv_uniform; -    inv *= p; -  } -  return (log(e) + log(inv)) / 2; -} - -void BackwardEstimator::InitializeGrid(const vector<WordID>& src, -                      const vector<WordID>& trg, -                      const Reachability& r, -                      double s2t_ratio, -                      float* grid) const { -  queue<pair<int,int> > q; -  q.push(make_pair(0,0)); -  Array2D<bool> done(src.size()+1, trg.size()+1, false); -  //cerr << TD::GetString(src) << " ||| " << TD::GetString(trg) << endl; -  while(!q.empty()) { -    const pair<int,int> n = q.front(); -    q.pop(); -    if (done(n.first,n.second)) continue; -    done(n.first,n.second) = true; - -    float lp = ComputeBackwardProb(src, trg, n.first, n.second, s2t_ratio); -    if (n.first == 0 && n.second == 0) grid[0] = lp; -    //cerr << "  " << n.first << "," << n.second << "\t" << lp << endl; - -    if (n.first == src.size() || n.second == trg.size()) continue; -    const vector<pair<short,short> >& edges = r.valid_deltas[n.first][n.second]; -    for (int i = 0; i < edges.size(); ++i) -      q.push(make_pair(n.first + edges[i].first, n.second + edges[i].second)); -  } -  //static int cc = 0; ++cc; if (cc == 80) exit(1); -} - diff --git a/gi/pf/backward.h b/gi/pf/backward.h deleted file mode 100644 index e67eff0c..00000000 --- a/gi/pf/backward.h +++ /dev/null @@ -1,33 +0,0 @@ -#ifndef _BACKWARD_H_ -#define _BACKWARD_H_ - -#include <vector> -#include <string> -#include "wordid.h" - -struct Reachability; -struct Model1; - -struct BackwardEstimator { -  BackwardEstimator(const std::string& s2t, -                    const std::string& t2s); -  ~BackwardEstimator(); - -  void InitializeGrid(const std::vector<WordID>& src, -                      const std::vector<WordID>& trg, -                      const Reachability& r, -                      double src2trg_ratio, -                      float* grid) const; - - private: -  float ComputeBackwardProb(const std::vector<WordID>& src, -                            const std::vector<WordID>& trg, -                            unsigned src_covered, -                            unsigned trg_covered, -                            double src2trg_ratio) const; - -  Model1* m1; -  Model1* m1inv; -}; - -#endif diff --git a/gi/pf/base_distributions.cc b/gi/pf/base_distributions.cc deleted file mode 100644 index 57e0bbe1..00000000 --- a/gi/pf/base_distributions.cc +++ /dev/null @@ -1,241 +0,0 @@ -#include "base_distributions.h" - -#include <iostream> - -#include "filelib.h" - -using namespace std; - -TableLookupBase::TableLookupBase(const string& fname) { -  cerr << "TableLookupBase reading from " << fname << " ..." << endl; -  ReadFile rf(fname); -  istream& in = *rf.stream(); -  string line; -  unsigned lc = 0; -  const WordID kDIV = TD::Convert("|||"); -  vector<WordID> tmp; -  vector<int> le, lf; -  TRule x; -  x.lhs_ = -TD::Convert("X"); -  bool flag = false; -  while(getline(in, line)) { -    ++lc; -    if (lc % 1000000 == 0) { cerr << " [" << lc << ']' << endl; flag = false; } -    else if (lc % 25000 == 0) { cerr << '.' << flush; flag = true; } -    tmp.clear(); -    TD::ConvertSentence(line, &tmp); -    x.f_.clear(); -    x.e_.clear(); -    size_t pos = 0; -    int cc = 0; -    while(pos < tmp.size()) { -      const WordID cur = tmp[pos++]; -      if (cur == kDIV) { -        ++cc; -      } else if (cc == 0) { -        x.f_.push_back(cur);     -      } else if (cc == 1) { -        x.e_.push_back(cur); -      } else if (cc == 2) { -        table[x].logeq(atof(TD::Convert(cur).c_str())); -        ++cc; -      } else { -        if (flag) cerr << endl; -        cerr << "Bad format in " << lc << ": " << line << endl; abort(); -      } -    } -    if (cc != 3) { -      if (flag) cerr << endl; -      cerr << "Bad format in " << lc << ": " << line << endl; abort(); -    } -  } -  if (flag) cerr << endl; -  cerr << " read " << lc << " entries\n"; -} - -prob_t PhraseConditionalUninformativeUnigramBase::p0(const vector<WordID>& vsrc, -                                                     const vector<WordID>& vtrg, -                                                     int start_src, int start_trg) const { -  const int flen = vsrc.size() - start_src; -  const int elen = vtrg.size() - start_trg; -  prob_t p; -  p.logeq(Md::log_poisson(elen, flen + 0.01));       // elen | flen          ~Pois(flen + 0.01) -  //p.logeq(log_poisson(elen, 1));       // elen | flen          ~Pois(flen + 0.01) -  for (int i = 0; i < elen; ++i) -    p *= u(vtrg[i + start_trg]);                        // draw e_i             ~Uniform -  return p; -} - -prob_t PhraseConditionalUninformativeBase::p0(const vector<WordID>& vsrc, -                                              const vector<WordID>& vtrg, -                                              int start_src, int start_trg) const { -  const int flen = vsrc.size() - start_src; -  const int elen = vtrg.size() - start_trg; -  prob_t p; -  //p.logeq(log_poisson(elen, flen + 0.01));       // elen | flen          ~Pois(flen + 0.01) -  p.logeq(Md::log_poisson(elen, 1));       // elen | flen          ~Pois(flen + 0.01) -  for (int i = 0; i < elen; ++i) -    p *= kUNIFORM_TARGET;                        // draw e_i             ~Uniform -  return p; -} - -void Model1::LoadModel1(const string& fname) { -  cerr << "Loading Model 1 parameters from " << fname << " ..." << endl; -  ReadFile rf(fname); -  istream& in = *rf.stream(); -  string line; -  unsigned lc = 0; -  while(getline(in, line)) { -    ++lc; -    int cur = 0; -    int start = 0; -    while(cur < line.size() && line[cur] != ' ') { ++cur; } -    assert(cur != line.size()); -    line[cur] = 0; -    const WordID src = TD::Convert(&line[0]); -    ++cur; -    start = cur; -    while(cur < line.size() && line[cur] != ' ') { ++cur; } -    assert(cur != line.size()); -    line[cur] = 0; -    WordID trg = TD::Convert(&line[start]); -    const double logprob = strtod(&line[cur + 1], NULL); -    if (src >= ttable.size()) ttable.resize(src + 1); -    ttable[src][trg].logeq(logprob); -  } -  cerr << "  read " << lc << " parameters.\n"; -} - -prob_t PhraseConditionalBase::p0(const vector<WordID>& vsrc, -                                 const vector<WordID>& vtrg, -                                 int start_src, int start_trg) const { -  const int flen = vsrc.size() - start_src; -  const int elen = vtrg.size() - start_trg; -  prob_t uniform_src_alignment; uniform_src_alignment.logeq(-log(flen + 1)); -  prob_t p; -  p.logeq(Md::log_poisson(elen, flen + 0.01));       // elen | flen          ~Pois(flen + 0.01) -  for (int i = 0; i < elen; ++i) {               // for each position i in e-RHS -    const WordID trg = vtrg[i + start_trg]; -    prob_t tp = prob_t::Zero(); -    for (int j = -1; j < flen; ++j) { -      const WordID src = j < 0 ? 0 : vsrc[j + start_src]; -      tp += kM1MIXTURE * model1(src, trg); -      tp += kUNIFORM_MIXTURE * kUNIFORM_TARGET; -    } -    tp *= uniform_src_alignment;                 //     draw a_i         ~uniform -    p *= tp;                                     //     draw e_i         ~Model1(f_a_i) / uniform -  } -  if (p.is_0()) { -    cerr << "Zero! " << vsrc << "\nTRG=" << vtrg << endl; -    abort(); -  } -  return p; -} - -prob_t PhraseJointBase::p0(const vector<WordID>& vsrc, -                           const vector<WordID>& vtrg, -                           int start_src, int start_trg) const { -  const int flen = vsrc.size() - start_src; -  const int elen = vtrg.size() - start_trg; -  prob_t uniform_src_alignment; uniform_src_alignment.logeq(-log(flen + 1)); -  prob_t p; -  p.logeq(Md::log_poisson(flen, 1.0));               // flen                 ~Pois(1) -                                                 // elen | flen          ~Pois(flen + 0.01) -  prob_t ptrglen; ptrglen.logeq(Md::log_poisson(elen, flen + 0.01)); -  p *= ptrglen; -  p *= kUNIFORM_SOURCE.pow(flen);                // each f in F ~Uniform -  for (int i = 0; i < elen; ++i) {               // for each position i in E -    const WordID trg = vtrg[i + start_trg]; -    prob_t tp = prob_t::Zero(); -    for (int j = -1; j < flen; ++j) { -      const WordID src = j < 0 ? 0 : vsrc[j + start_src]; -      tp += kM1MIXTURE * model1(src, trg); -      tp += kUNIFORM_MIXTURE * kUNIFORM_TARGET; -    } -    tp *= uniform_src_alignment;                 //     draw a_i         ~uniform -    p *= tp;                                     //     draw e_i         ~Model1(f_a_i) / uniform -  } -  if (p.is_0()) { -    cerr << "Zero! " << vsrc << "\nTRG=" << vtrg << endl; -    abort(); -  } -  return p; -} - -prob_t PhraseJointBase_BiDir::p0(const vector<WordID>& vsrc, -                                 const vector<WordID>& vtrg, -                                 int start_src, int start_trg) const { -  const int flen = vsrc.size() - start_src; -  const int elen = vtrg.size() - start_trg; -  prob_t uniform_src_alignment; uniform_src_alignment.logeq(-log(flen + 1)); -  prob_t uniform_trg_alignment; uniform_trg_alignment.logeq(-log(elen + 1)); - -  prob_t p1; -  p1.logeq(Md::log_poisson(flen, 1.0));               // flen                 ~Pois(1) -                                                 // elen | flen          ~Pois(flen + 0.01) -  prob_t ptrglen; ptrglen.logeq(Md::log_poisson(elen, flen + 0.01)); -  p1 *= ptrglen; -  p1 *= kUNIFORM_SOURCE.pow(flen);                // each f in F ~Uniform -  for (int i = 0; i < elen; ++i) {               // for each position i in E -    const WordID trg = vtrg[i + start_trg]; -    prob_t tp = prob_t::Zero(); -    for (int j = -1; j < flen; ++j) { -      const WordID src = j < 0 ? 0 : vsrc[j + start_src]; -      tp += kM1MIXTURE * model1(src, trg); -      tp += kUNIFORM_MIXTURE * kUNIFORM_TARGET; -    } -    tp *= uniform_src_alignment;                 //     draw a_i         ~uniform -    p1 *= tp;                                     //     draw e_i         ~Model1(f_a_i) / uniform -  } -  if (p1.is_0()) { -    cerr << "Zero! " << vsrc << "\nTRG=" << vtrg << endl; -    abort(); -  } - -  prob_t p2; -  p2.logeq(Md::log_poisson(elen, 1.0));               // elen                 ~Pois(1) -                                                 // flen | elen          ~Pois(flen + 0.01) -  prob_t psrclen; psrclen.logeq(Md::log_poisson(flen, elen + 0.01)); -  p2 *= psrclen; -  p2 *= kUNIFORM_TARGET.pow(elen);                // each f in F ~Uniform -  for (int i = 0; i < flen; ++i) {               // for each position i in E -    const WordID src = vsrc[i + start_src]; -    prob_t tp = prob_t::Zero(); -    for (int j = -1; j < elen; ++j) { -      const WordID trg = j < 0 ? 0 : vtrg[j + start_trg]; -      tp += kM1MIXTURE * invmodel1(trg, src); -      tp += kUNIFORM_MIXTURE * kUNIFORM_SOURCE; -    } -    tp *= uniform_trg_alignment;                 //     draw a_i         ~uniform -    p2 *= tp;                                     //     draw e_i         ~Model1(f_a_i) / uniform -  } -  if (p2.is_0()) { -    cerr << "Zero! " << vsrc << "\nTRG=" << vtrg << endl; -    abort(); -  } - -  static const prob_t kHALF(0.5); -  return (p1 + p2) * kHALF; -} - -JumpBase::JumpBase() : p(200) { -  for (unsigned src_len = 1; src_len < 200; ++src_len) { -    map<int, prob_t>& cpd = p[src_len]; -    int min_jump = 1 - src_len; -    int max_jump = src_len; -    prob_t z; -    for (int j = min_jump; j <= max_jump; ++j) { -      prob_t& cp = cpd[j]; -      if (j < 0) -        cp.logeq(Md::log_poisson(1.5-j, 1)); -      else if (j > 0) -        cp.logeq(Md::log_poisson(j, 1)); -      cp.poweq(0.2); -      z += cp; -    } -    for (int j = min_jump; j <= max_jump; ++j) { -      cpd[j] /= z; -    } -  } -} - diff --git a/gi/pf/base_distributions.h b/gi/pf/base_distributions.h deleted file mode 100644 index 41b513f8..00000000 --- a/gi/pf/base_distributions.h +++ /dev/null @@ -1,238 +0,0 @@ -#ifndef _BASE_MEASURES_H_ -#define _BASE_MEASURES_H_ - -#include <vector> -#include <map> -#include <string> -#include <cmath> -#include <iostream> -#include <cassert> - -#include "unigrams.h" -#include "trule.h" -#include "prob.h" -#include "tdict.h" -#include "sampler.h" -#include "m.h" -#include "os_phrase.h" - -struct Model1 { -  explicit Model1(const std::string& fname) : -      kNULL(TD::Convert("<eps>")), -      kZERO() { -    LoadModel1(fname); -  } - -  void LoadModel1(const std::string& fname); - -  // returns prob 0 if src or trg is not found -  const prob_t& operator()(WordID src, WordID trg) const { -    if (src == 0) src = kNULL; -    if (src < ttable.size()) { -      const std::map<WordID, prob_t>& cpd = ttable[src]; -      const std::map<WordID, prob_t>::const_iterator it = cpd.find(trg); -      if (it != cpd.end()) -        return it->second; -    } -    return kZERO; -  } - -  const WordID kNULL; -  const prob_t kZERO; -  std::vector<std::map<WordID, prob_t> > ttable; -}; - -struct PoissonUniformUninformativeBase { -  explicit PoissonUniformUninformativeBase(const unsigned ves) : kUNIFORM(1.0 / ves) {} -  prob_t operator()(const TRule& r) const { -    prob_t p; p.logeq(Md::log_poisson(r.e_.size(), 1.0)); -    prob_t q = kUNIFORM; q.poweq(r.e_.size()); -    p *= q; -    return p; -  } -  void Summary() const {} -  void ResampleHyperparameters(MT19937*) {} -  void Increment(const TRule&) {} -  void Decrement(const TRule&) {} -  prob_t Likelihood() const { return prob_t::One(); } -  const prob_t kUNIFORM; -}; - -struct CompletelyUniformBase { -  explicit CompletelyUniformBase(const unsigned ves) : kUNIFORM(1.0 / ves) {} -  prob_t operator()(const TRule&) const { -    return kUNIFORM; -  } -  void Summary() const {} -  void ResampleHyperparameters(MT19937*) {} -  void Increment(const TRule&) {} -  void Decrement(const TRule&) {} -  prob_t Likelihood() const { return prob_t::One(); } -  const prob_t kUNIFORM; -}; - -struct UnigramWordBase { -  explicit UnigramWordBase(const std::string& fname) : un(fname) {} -  prob_t operator()(const TRule& r) const { -    return un(r.e_); -  } -  const UnigramWordModel un; -}; - -struct RuleHasher { -  size_t operator()(const TRule& r) const { -    return hash_value(r); -  } -}; - -struct TableLookupBase { -  TableLookupBase(const std::string& fname); - -  prob_t operator()(const TRule& rule) const { -    const std::tr1::unordered_map<TRule,prob_t,RuleHasher>::const_iterator it = table.find(rule); -    if (it == table.end()) { -      std::cerr << rule << " not found\n"; -      abort(); -    } -    return it->second; -  } - -  void ResampleHyperparameters(MT19937*) {} -  void Increment(const TRule&) {} -  void Decrement(const TRule&) {} -  prob_t Likelihood() const { return prob_t::One(); } -  void Summary() const {} - -  std::tr1::unordered_map<TRule,prob_t,RuleHasher> table; -}; - -struct PhraseConditionalUninformativeBase { -  explicit PhraseConditionalUninformativeBase(const unsigned vocab_e_size) : -      kUNIFORM_TARGET(1.0 / vocab_e_size) { -    assert(vocab_e_size > 0); -  } - -  // return p0 of rule.e_ | rule.f_ -  prob_t operator()(const TRule& rule) const { -    return p0(rule.f_, rule.e_, 0, 0); -  } - -  prob_t p0(const std::vector<WordID>& vsrc, const std::vector<WordID>& vtrg, int start_src, int start_trg) const; - -  void Summary() const {} -  void ResampleHyperparameters(MT19937*) {} -  void Increment(const TRule&) {} -  void Decrement(const TRule&) {} -  prob_t Likelihood() const { return prob_t::One(); } -  const prob_t kUNIFORM_TARGET; -}; - -struct PhraseConditionalUninformativeUnigramBase { -  explicit PhraseConditionalUninformativeUnigramBase(const std::string& file, const unsigned vocab_e_size) : u(file, vocab_e_size) {} - -  // return p0 of rule.e_ | rule.f_ -  prob_t operator()(const TRule& rule) const { -    return p0(rule.f_, rule.e_, 0, 0); -  } - -  prob_t p0(const std::vector<WordID>& vsrc, const std::vector<WordID>& vtrg, int start_src, int start_trg) const; - -  const UnigramModel u; -}; - -struct PhraseConditionalBase { -  explicit PhraseConditionalBase(const Model1& m1, const double m1mixture, const unsigned vocab_e_size) : -      model1(m1), -      kM1MIXTURE(m1mixture), -      kUNIFORM_MIXTURE(1.0 - m1mixture), -      kUNIFORM_TARGET(1.0 / vocab_e_size) { -    assert(m1mixture >= 0.0 && m1mixture <= 1.0); -    assert(vocab_e_size > 0); -  } - -  // return p0 of rule.e_ | rule.f_ -  prob_t operator()(const TRule& rule) const { -    return p0(rule.f_, rule.e_, 0, 0); -  } - -  prob_t p0(const std::vector<WordID>& vsrc, const std::vector<WordID>& vtrg, int start_src, int start_trg) const; - -  const Model1& model1; -  const prob_t kM1MIXTURE;  // Model 1 mixture component -  const prob_t kUNIFORM_MIXTURE; // uniform mixture component -  const prob_t kUNIFORM_TARGET; -}; - -struct PhraseJointBase { -  explicit PhraseJointBase(const Model1& m1, const double m1mixture, const unsigned vocab_e_size, const unsigned vocab_f_size) : -      model1(m1), -      kM1MIXTURE(m1mixture), -      kUNIFORM_MIXTURE(1.0 - m1mixture), -      kUNIFORM_SOURCE(1.0 / vocab_f_size), -      kUNIFORM_TARGET(1.0 / vocab_e_size) { -    assert(m1mixture >= 0.0 && m1mixture <= 1.0); -    assert(vocab_e_size > 0); -  } - -  // return p0 of rule.e_ , rule.f_ -  prob_t operator()(const TRule& rule) const { -    return p0(rule.f_, rule.e_, 0, 0); -  } - -  prob_t p0(const std::vector<WordID>& vsrc, const std::vector<WordID>& vtrg, int start_src, int start_trg) const; - -  const Model1& model1; -  const prob_t kM1MIXTURE;  // Model 1 mixture component -  const prob_t kUNIFORM_MIXTURE; // uniform mixture component -  const prob_t kUNIFORM_SOURCE; -  const prob_t kUNIFORM_TARGET; -}; - -struct PhraseJointBase_BiDir { -  explicit PhraseJointBase_BiDir(const Model1& m1, -                                 const Model1& im1, -                                 const double m1mixture, -                                 const unsigned vocab_e_size, -                                 const unsigned vocab_f_size) : -      model1(m1), -      invmodel1(im1), -      kM1MIXTURE(m1mixture), -      kUNIFORM_MIXTURE(1.0 - m1mixture), -      kUNIFORM_SOURCE(1.0 / vocab_f_size), -      kUNIFORM_TARGET(1.0 / vocab_e_size) { -    assert(m1mixture >= 0.0 && m1mixture <= 1.0); -    assert(vocab_e_size > 0); -  } - -  // return p0 of rule.e_ , rule.f_ -  prob_t operator()(const TRule& rule) const { -    return p0(rule.f_, rule.e_, 0, 0); -  } - -  prob_t p0(const std::vector<WordID>& vsrc, const std::vector<WordID>& vtrg, int start_src, int start_trg) const; - -  const Model1& model1; -  const Model1& invmodel1; -  const prob_t kM1MIXTURE;  // Model 1 mixture component -  const prob_t kUNIFORM_MIXTURE; // uniform mixture component -  const prob_t kUNIFORM_SOURCE; -  const prob_t kUNIFORM_TARGET; -}; - -// base distribution for jump size multinomials -// basically p(0) = 0 and then, p(1) is max, and then -// you drop as you move to the max jump distance -struct JumpBase { -  JumpBase(); - -  const prob_t& operator()(int jump, unsigned src_len) const { -    assert(jump != 0); -    const std::map<int, prob_t>::const_iterator it = p[src_len].find(jump); -    assert(it != p[src_len].end()); -    return it->second; -  } -  std::vector<std::map<int, prob_t> > p; -}; - - -#endif diff --git a/gi/pf/bayes_lattice_score.cc b/gi/pf/bayes_lattice_score.cc deleted file mode 100644 index 70cb8dc2..00000000 --- a/gi/pf/bayes_lattice_score.cc +++ /dev/null @@ -1,309 +0,0 @@ -#include <iostream> -#include <queue> - -#include <boost/functional.hpp> -#include <boost/program_options.hpp> -#include <boost/program_options/variables_map.hpp> - -#include "inside_outside.h" -#include "hg.h" -#include "hg_io.h" -#include "bottom_up_parser.h" -#include "fdict.h" -#include "grammar.h" -#include "m.h" -#include "trule.h" -#include "tdict.h" -#include "filelib.h" -#include "dict.h" -#include "sampler.h" -#include "ccrp.h" -#include "ccrp_onetable.h" - -using namespace std; -using namespace tr1; -namespace po = boost::program_options; - -boost::shared_ptr<MT19937> prng; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { -  po::options_description opts("Configuration options"); -  opts.add_options() -        ("samples,s",po::value<unsigned>()->default_value(1000),"Number of samples") -        ("input,i",po::value<string>(),"Read parallel data from") -        ("random_seed,S",po::value<uint32_t>(), "Random seed"); -  po::options_description clo("Command line options"); -  clo.add_options() -        ("config", po::value<string>(), "Configuration file") -        ("help", "Print this help message and exit"); -  po::options_description dconfig_options, dcmdline_options; -  dconfig_options.add(opts); -  dcmdline_options.add(opts).add(clo); -   -  po::store(parse_command_line(argc, argv, dcmdline_options), *conf); -  if (conf->count("config")) { -    ifstream config((*conf)["config"].as<string>().c_str()); -    po::store(po::parse_config_file(config, dconfig_options), *conf); -  } -  po::notify(*conf); - -  if (conf->count("help") || (conf->count("input") == 0)) { -    cerr << dcmdline_options << endl; -    exit(1); -  } -} - -unsigned ReadCorpus(const string& filename, -                    vector<Lattice>* e, -                    set<WordID>* vocab_e) { -  e->clear(); -  vocab_e->clear(); -  ReadFile rf(filename); -  istream* in = rf.stream(); -  assert(*in); -  string line; -  unsigned toks = 0; -  while(*in) { -    getline(*in, line); -    if (line.empty() && !*in) break; -    e->push_back(Lattice()); -    Lattice& le = e->back(); -    LatticeTools::ConvertTextOrPLF(line, & le); -    for (unsigned i = 0; i < le.size(); ++i) -      for (unsigned j = 0; j < le[i].size(); ++j) -        vocab_e->insert(le[i][j].label); -    toks += le.size(); -  } -  return toks; -} - -struct BaseModel { -  explicit BaseModel(unsigned tc) : -      unif(1.0 / tc), p(prob_t::One()) {} -  prob_t prob(const TRule& r) const { -    return unif; -  } -  void increment(const TRule& r, MT19937* rng) { -    p *= prob(r); -  } -  void decrement(const TRule& r, MT19937* rng) { -    p /= prob(r); -  } -  prob_t Likelihood() const { -    return p; -  } -  const prob_t unif; -  prob_t p; -}; - -struct UnigramModel { -  explicit UnigramModel(unsigned tc) : base(tc), crp(1,1,1,1), glue(1,1,1,1) {} -  BaseModel base; -  CCRP<TRule> crp; -  CCRP<TRule> glue; - -  prob_t Prob(const TRule& r) const { -    if (r.Arity() != 0) { -      return glue.prob(r, prob_t(0.5)); -    } -    return crp.prob(r, base.prob(r)); -  } - -  int Increment(const TRule& r, MT19937* rng) { -    if (r.Arity() != 0) { -      glue.increment(r, 0.5, rng); -      return 0; -    } else { -      if (crp.increment(r, base.prob(r), rng)) { -        base.increment(r, rng); -        return 1; -      } -      return 0; -    } -  } - -  int Decrement(const TRule& r, MT19937* rng) { -    if (r.Arity() != 0) { -      glue.decrement(r, rng); -      return 0; -    } else { -      if (crp.decrement(r, rng)) { -        base.decrement(r, rng); -        return -1; -      } -      return 0; -    } -  } - -  prob_t Likelihood() const { -    prob_t p; -    p.logeq(crp.log_crp_prob() + glue.log_crp_prob()); -    p *= base.Likelihood(); -    return p; -  } - -  void ResampleHyperparameters(MT19937* rng) { -    crp.resample_hyperparameters(rng); -    glue.resample_hyperparameters(rng); -    cerr << " d=" << crp.discount() << ", s=" << crp.strength() << "\t STOP d=" << glue.discount() << ", s=" << glue.strength() << endl; -  } -}; - -UnigramModel* plm; - -void SampleDerivation(const Hypergraph& hg, MT19937* rng, vector<unsigned>* sampled_deriv) { -  vector<prob_t> node_probs; -  Inside<prob_t, EdgeProb>(hg, &node_probs); -  queue<unsigned> q; -  q.push(hg.nodes_.size() - 2); -  while(!q.empty()) { -    unsigned cur_node_id = q.front(); -//    cerr << "NODE=" << cur_node_id << endl; -    q.pop(); -    const Hypergraph::Node& node = hg.nodes_[cur_node_id]; -    const unsigned num_in_edges = node.in_edges_.size(); -    unsigned sampled_edge = 0; -    if (num_in_edges == 1) { -      sampled_edge = node.in_edges_[0]; -    } else { -      //prob_t z; -      assert(num_in_edges > 1); -      SampleSet<prob_t> ss; -      for (unsigned j = 0; j < num_in_edges; ++j) { -        const Hypergraph::Edge& edge = hg.edges_[node.in_edges_[j]]; -        prob_t p = edge.edge_prob_; -        for (unsigned k = 0; k < edge.tail_nodes_.size(); ++k) -          p *= node_probs[edge.tail_nodes_[k]]; -        ss.add(p); -//        cerr << log(ss[j]) << " ||| " << edge.rule_->AsString() << endl; -        //z += p; -      } -//      for (unsigned j = 0; j < num_in_edges; ++j) { -//        const Hypergraph::Edge& edge = hg.edges_[node.in_edges_[j]]; -//        cerr << exp(log(ss[j] / z)) << " ||| " << edge.rule_->AsString() << endl; -//      } -//      cerr << " --- \n"; -      sampled_edge = node.in_edges_[rng->SelectSample(ss)]; -    } -    sampled_deriv->push_back(sampled_edge); -    const Hypergraph::Edge& edge = hg.edges_[sampled_edge]; -    for (unsigned j = 0; j < edge.tail_nodes_.size(); ++j) { -      q.push(edge.tail_nodes_[j]); -    } -  } -//  for (unsigned i = 0; i < sampled_deriv->size(); ++i) { -//    cerr << *hg.edges_[(*sampled_deriv)[i]].rule_ << endl; -//  } -} - -void IncrementDerivation(const Hypergraph& hg, const vector<unsigned>& d, UnigramModel* plm, MT19937* rng) { -  for (unsigned i = 0; i < d.size(); ++i) -    plm->Increment(*hg.edges_[d[i]].rule_, rng); -} - -void DecrementDerivation(const Hypergraph& hg, const vector<unsigned>& d, UnigramModel* plm, MT19937* rng) { -  for (unsigned i = 0; i < d.size(); ++i) -    plm->Decrement(*hg.edges_[d[i]].rule_, rng); -} - -prob_t TotalProb(const Hypergraph& hg) { -  return Inside<prob_t, EdgeProb>(hg); -} - -void IncrementLatticePath(const Hypergraph& hg, const vector<unsigned>& d, Lattice* pl) { -  Lattice& lat = *pl; -  for (int i = 0; i < d.size(); ++i) { -    const Hypergraph::Edge& edge = hg.edges_[d[i]]; -    if (edge.rule_->Arity() != 0) continue; -    WordID sym = edge.rule_->e_[0]; -    vector<LatticeArc>& las = lat[edge.i_]; -    int dist = edge.j_ - edge.i_; -    assert(dist > 0); -    for (int j = 0; j < las.size(); ++j) { -      if (las[j].dist2next == dist && -          las[j].label == sym) { -        las[j].cost += 1; -      } -    } -  } -} - -int main(int argc, char** argv) { -  po::variables_map conf; - -  InitCommandLine(argc, argv, &conf); -  vector<GrammarPtr> grammars(2); -  grammars[0].reset(new GlueGrammar("S","X")); -  const unsigned samples = conf["samples"].as<unsigned>(); - -  if (conf.count("random_seed")) -    prng.reset(new MT19937(conf["random_seed"].as<uint32_t>())); -  else -    prng.reset(new MT19937); -  MT19937& rng = *prng; -  vector<Lattice> corpuse; -  set<WordID> vocabe; -  cerr << "Reading corpus...\n"; -  const unsigned toks = ReadCorpus(conf["input"].as<string>(), &corpuse, &vocabe); -  cerr << "E-corpus size: " << corpuse.size() << " lattices\t (" << vocabe.size() << " word types)\n"; -  UnigramModel lm(vocabe.size()); -  vector<Hypergraph> hgs(corpuse.size()); -  vector<vector<unsigned> > derivs(corpuse.size()); -  for (int i = 0; i < corpuse.size(); ++i) { -    grammars[1].reset(new PassThroughGrammar(corpuse[i], "X")); -    ExhaustiveBottomUpParser parser("S", grammars); -    bool res = parser.Parse(corpuse[i], &hgs[i]);  // exhaustive parse -    assert(res); -  } - -  double csamples = 0; -  for (int SS=0; SS < samples; ++SS) { -    const bool is_last = ((samples - 1) == SS); -    prob_t dlh = prob_t::One(); -    bool record_sample = (SS > (samples * 1 / 3) && (SS % 5 == 3)); -    if (record_sample) csamples++; -    for (int ci = 0; ci < corpuse.size(); ++ci) { -      Lattice& lat = corpuse[ci]; -      Hypergraph& hg = hgs[ci]; -      vector<unsigned>& d = derivs[ci]; -      if (!is_last) DecrementDerivation(hg, d, &lm, &rng); -      for (unsigned i = 0; i < hg.edges_.size(); ++i) { -        TRule& r = *hg.edges_[i].rule_; -        if (r.Arity() != 0) -          hg.edges_[i].edge_prob_ = prob_t::One(); -        else -          hg.edges_[i].edge_prob_ = lm.Prob(r); -      } -      if (!is_last) { -        d.clear(); -        SampleDerivation(hg, &rng, &d); -        IncrementDerivation(hg, derivs[ci], &lm, &rng); -      } else { -        prob_t p = TotalProb(hg); -        dlh *= p; -        cerr << " p(sentence) = " << log(p) << "\t" << log(dlh) << endl; -      } -      if (record_sample) IncrementLatticePath(hg, derivs[ci], &lat); -    } -    double llh = log(lm.Likelihood()); -    cerr << "LLH=" << llh << "\tENTROPY=" << (-llh / log(2) / toks) << "\tPPL=" << pow(2, -llh / log(2) / toks) << endl; -    if (SS % 10 == 9) lm.ResampleHyperparameters(&rng); -    if (is_last) { -      double z = log(dlh); -      cerr << "TOTAL_PROB=" << z << "\tENTROPY=" << (-z / log(2) / toks) << "\tPPL=" << pow(2, -z / log(2) / toks) << endl; -    } -  } -  cerr << lm.crp << endl; -  cerr << lm.glue << endl; -  for (int i = 0; i < corpuse.size(); ++i) { -    for (int j = 0; j < corpuse[i].size(); ++j) -      for (int k = 0; k < corpuse[i][j].size(); ++k) { -        corpuse[i][j][k].cost /= csamples; -        corpuse[i][j][k].cost += 1e-3; -        corpuse[i][j][k].cost = log(corpuse[i][j][k].cost); -      } -    cout << HypergraphIO::AsPLF(corpuse[i]) << endl; -  } -  return 0; -} - diff --git a/gi/pf/brat.cc b/gi/pf/brat.cc deleted file mode 100644 index 832f22cf..00000000 --- a/gi/pf/brat.cc +++ /dev/null @@ -1,543 +0,0 @@ -#include <iostream> -#include <tr1/memory> -#include <queue> - -#include <boost/functional.hpp> -#include <boost/multi_array.hpp> -#include <boost/program_options.hpp> -#include <boost/program_options/variables_map.hpp> - -#include "viterbi.h" -#include "hg.h" -#include "trule.h" -#include "tdict.h" -#include "filelib.h" -#include "dict.h" -#include "sampler.h" -#include "ccrp_nt.h" -#include "cfg_wfst_composer.h" - -using namespace std; -using namespace tr1; -namespace po = boost::program_options; - -static unsigned kMAX_SRC_PHRASE; -static unsigned kMAX_TRG_PHRASE; -struct FSTState; - -double log_poisson(unsigned x, const double& lambda) { -  assert(lambda > 0.0); -  return log(lambda) * x - lgamma(x + 1) - lambda; -} - -struct ConditionalBase { -  explicit ConditionalBase(const double m1mixture, const unsigned vocab_e_size, const string& model1fname) : -      kM1MIXTURE(m1mixture), -      kUNIFORM_MIXTURE(1.0 - m1mixture), -      kUNIFORM_TARGET(1.0 / vocab_e_size), -      kNULL(TD::Convert("<eps>")) { -    assert(m1mixture >= 0.0 && m1mixture <= 1.0); -    assert(vocab_e_size > 0); -    LoadModel1(model1fname); -  } - -  void LoadModel1(const string& fname) { -    cerr << "Loading Model 1 parameters from " << fname << " ..." << endl; -    ReadFile rf(fname); -    istream& in = *rf.stream(); -    string line; -    unsigned lc = 0; -    while(getline(in, line)) { -      ++lc; -      int cur = 0; -      int start = 0; -      while(cur < line.size() && line[cur] != ' ') { ++cur; } -      assert(cur != line.size()); -      line[cur] = 0; -      const WordID src = TD::Convert(&line[0]); -      ++cur; -      start = cur; -      while(cur < line.size() && line[cur] != ' ') { ++cur; } -      assert(cur != line.size()); -      line[cur] = 0; -      WordID trg = TD::Convert(&line[start]); -      const double logprob = strtod(&line[cur + 1], NULL); -      if (src >= ttable.size()) ttable.resize(src + 1); -      ttable[src][trg].logeq(logprob); -    } -    cerr << "  read " << lc << " parameters.\n"; -  } - -  // return logp0 of rule.e_ | rule.f_ -  prob_t operator()(const TRule& rule) const { -    const int flen = rule.f_.size(); -    const int elen = rule.e_.size(); -    prob_t uniform_src_alignment; uniform_src_alignment.logeq(-log(flen + 1)); -    prob_t p; -    p.logeq(log_poisson(elen, flen + 0.01));       // elen | flen          ~Pois(flen + 0.01) -    for (int i = 0; i < elen; ++i) {               // for each position i in e-RHS -      const WordID trg = rule.e_[i]; -      prob_t tp = prob_t::Zero(); -      for (int j = -1; j < flen; ++j) { -        const WordID src = j < 0 ? kNULL : rule.f_[j]; -        const map<WordID, prob_t>::const_iterator it = ttable[src].find(trg); -        if (it != ttable[src].end()) { -          tp += kM1MIXTURE * it->second; -        } -        tp += kUNIFORM_MIXTURE * kUNIFORM_TARGET; -      } -      tp *= uniform_src_alignment;                 //     draw a_i         ~uniform -      p *= tp;                                     //     draw e_i         ~Model1(f_a_i) / uniform -    } -    return p; -  } - -  const prob_t kM1MIXTURE;  // Model 1 mixture component -  const prob_t kUNIFORM_MIXTURE; // uniform mixture component -  const prob_t kUNIFORM_TARGET; -  const WordID kNULL; -  vector<map<WordID, prob_t> > ttable; -}; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { -  po::options_description opts("Configuration options"); -  opts.add_options() -        ("samples,s",po::value<unsigned>()->default_value(1000),"Number of samples") -        ("input,i",po::value<string>(),"Read parallel data from") -        ("max_src_phrase",po::value<unsigned>()->default_value(3),"Maximum length of source language phrases") -        ("max_trg_phrase",po::value<unsigned>()->default_value(3),"Maximum length of target language phrases") -        ("model1,m",po::value<string>(),"Model 1 parameters (used in base distribution)") -        ("model1_interpolation_weight",po::value<double>()->default_value(0.95),"Mixing proportion of model 1 with uniform target distribution") -        ("random_seed,S",po::value<uint32_t>(), "Random seed"); -  po::options_description clo("Command line options"); -  clo.add_options() -        ("config", po::value<string>(), "Configuration file") -        ("help,h", "Print this help message and exit"); -  po::options_description dconfig_options, dcmdline_options; -  dconfig_options.add(opts); -  dcmdline_options.add(opts).add(clo); -   -  po::store(parse_command_line(argc, argv, dcmdline_options), *conf); -  if (conf->count("config")) { -    ifstream config((*conf)["config"].as<string>().c_str()); -    po::store(po::parse_config_file(config, dconfig_options), *conf); -  } -  po::notify(*conf); - -  if (conf->count("help") || (conf->count("input") == 0)) { -    cerr << dcmdline_options << endl; -    exit(1); -  } -} - -void ReadParallelCorpus(const string& filename, -                vector<vector<WordID> >* f, -                vector<vector<int> >* e, -                set<int>* vocab_f, -                set<int>* vocab_e) { -  f->clear(); -  e->clear(); -  vocab_f->clear(); -  vocab_e->clear(); -  istream* in; -  if (filename == "-") -    in = &cin; -  else -    in = new ifstream(filename.c_str()); -  assert(*in); -  string line; -  const WordID kDIV = TD::Convert("|||"); -  vector<WordID> tmp; -  while(*in) { -    getline(*in, line); -    if (line.empty() && !*in) break; -    e->push_back(vector<int>()); -    f->push_back(vector<int>()); -    vector<int>& le = e->back(); -    vector<int>& lf = f->back(); -    tmp.clear(); -    TD::ConvertSentence(line, &tmp); -    bool isf = true; -    for (unsigned i = 0; i < tmp.size(); ++i) { -      const int cur = tmp[i]; -      if (isf) { -        if (kDIV == cur) { isf = false; } else { -          lf.push_back(cur); -          vocab_f->insert(cur); -        } -      } else { -        assert(cur != kDIV); -        le.push_back(cur); -        vocab_e->insert(cur); -      } -    } -    assert(isf == false); -  } -  if (in != &cin) delete in; -} - -struct UniphraseLM { -  UniphraseLM(const vector<vector<int> >& corpus, -              const set<int>& vocab, -              const po::variables_map& conf) : -    phrases_(1,1), -    gen_(1,1), -    corpus_(corpus), -    uniform_word_(1.0 / vocab.size()), -    gen_p0_(0.5), -    p_end_(0.5), -    use_poisson_(conf.count("poisson_length") > 0) {} - -  void ResampleHyperparameters(MT19937* rng) { -    phrases_.resample_hyperparameters(rng); -    gen_.resample_hyperparameters(rng); -    cerr << " " << phrases_.alpha(); -  } - -  CCRP_NoTable<vector<int> > phrases_; -  CCRP_NoTable<bool> gen_; -  vector<vector<bool> > z_;   // z_[i] is there a phrase boundary after the ith word -  const vector<vector<int> >& corpus_; -  const double uniform_word_; -  const double gen_p0_; -  const double p_end_; // in base length distribution, p of the end of a phrase -  const bool use_poisson_; -}; - -struct Reachability { -  boost::multi_array<bool, 4> edges;  // edges[src_covered][trg_covered][x][trg_delta] is this edge worth exploring? -  boost::multi_array<short, 2> max_src_delta; // msd[src_covered][trg_covered] -- the largest src delta that's valid - -  Reachability(int srclen, int trglen, int src_max_phrase_len, int trg_max_phrase_len) : -      edges(boost::extents[srclen][trglen][src_max_phrase_len+1][trg_max_phrase_len+1]), -      max_src_delta(boost::extents[srclen][trglen]) { -    ComputeReachability(srclen, trglen, src_max_phrase_len, trg_max_phrase_len); -  } - - private: -  struct SState { -    SState() : prev_src_covered(), prev_trg_covered() {} -    SState(int i, int j) : prev_src_covered(i), prev_trg_covered(j) {} -    int prev_src_covered; -    int prev_trg_covered; -  }; - -  struct NState { -    NState() : next_src_covered(), next_trg_covered() {} -    NState(int i, int j) : next_src_covered(i), next_trg_covered(j) {} -    int next_src_covered; -    int next_trg_covered; -  }; - -  void ComputeReachability(int srclen, int trglen, int src_max_phrase_len, int trg_max_phrase_len) { -    typedef boost::multi_array<vector<SState>, 2> array_type; -    array_type a(boost::extents[srclen + 1][trglen + 1]); -    a[0][0].push_back(SState()); -    for (int i = 0; i < srclen; ++i) { -      for (int j = 0; j < trglen; ++j) { -        if (a[i][j].size() == 0) continue; -        const SState prev(i,j); -        for (int k = 1; k <= src_max_phrase_len; ++k) { -          if ((i + k) > srclen) continue; -          for (int l = 1; l <= trg_max_phrase_len; ++l) { -            if ((j + l) > trglen) continue; -            a[i + k][j + l].push_back(prev); -          } -        } -      } -    } -    a[0][0].clear(); -    cerr << "Final cell contains " << a[srclen][trglen].size() << " back pointers\n"; -    assert(a[srclen][trglen].size() > 0); - -    typedef boost::multi_array<bool, 2> rarray_type; -    rarray_type r(boost::extents[srclen + 1][trglen + 1]); -//    typedef boost::multi_array<vector<NState>, 2> narray_type; -//    narray_type b(boost::extents[srclen + 1][trglen + 1]); -    r[srclen][trglen] = true; -    for (int i = srclen; i >= 0; --i) { -      for (int j = trglen; j >= 0; --j) { -        vector<SState>& prevs = a[i][j]; -        if (!r[i][j]) { prevs.clear(); } -//        const NState nstate(i,j); -        for (int k = 0; k < prevs.size(); ++k) { -          r[prevs[k].prev_src_covered][prevs[k].prev_trg_covered] = true; -          int src_delta = i - prevs[k].prev_src_covered; -          edges[prevs[k].prev_src_covered][prevs[k].prev_trg_covered][src_delta][j - prevs[k].prev_trg_covered] = true; -          short &msd = max_src_delta[prevs[k].prev_src_covered][prevs[k].prev_trg_covered]; -          if (src_delta > msd) msd = src_delta; -//          b[prevs[k].prev_src_covered][prevs[k].prev_trg_covered].push_back(nstate); -        } -      } -    } -    assert(!edges[0][0][1][0]); -    assert(!edges[0][0][0][1]); -    assert(!edges[0][0][0][0]); -    cerr << "  MAX SRC DELTA[0][0] = " << max_src_delta[0][0] << endl; -    assert(max_src_delta[0][0] > 0); -    //cerr << "First cell contains " << b[0][0].size() << " forward pointers\n"; -    //for (int i = 0; i < b[0][0].size(); ++i) { -    //  cerr << "  -> (" << b[0][0][i].next_src_covered << "," << b[0][0][i].next_trg_covered << ")\n"; -    //} -  } -}; - -ostream& operator<<(ostream& os, const FSTState& q); -struct FSTState { -  explicit FSTState(int src_size) : -      trg_covered_(), -      src_covered_(), -      src_coverage_(src_size) {} - -  FSTState(short trg_covered, short src_covered, const vector<bool>& src_coverage, const vector<short>& src_prefix) : -      trg_covered_(trg_covered), -      src_covered_(src_covered), -      src_coverage_(src_coverage), -      src_prefix_(src_prefix) { -    if (src_coverage_.size() == src_covered) { -      assert(src_prefix.size() == 0); -    } -  } - -  // if we extend by the word at src_position, what are -  // the next states that are reachable and lie on a valid -  // path to the final state? -  vector<FSTState> Extensions(int src_position, int src_len, int trg_len, const Reachability& r) const { -    assert(src_position < src_coverage_.size()); -    if (src_coverage_[src_position]) { -      cerr << "Trying to extend " << *this << " with position " << src_position << endl; -      abort(); -    } -    vector<bool> ncvg = src_coverage_; -    ncvg[src_position] = true; - -    vector<FSTState> res; -    const int trg_remaining = trg_len - trg_covered_; -    if (trg_remaining <= 0) { -      cerr << "Target appears to have been covered: " << *this << " (trg_len=" << trg_len << ",trg_covered=" << trg_covered_ << ")" << endl; -      abort(); -    } -    const int src_remaining = src_len - src_covered_; -    if (src_remaining <= 0) { -      cerr << "Source appears to have been covered: " << *this << endl; -      abort(); -    } - -    for (int tc = 1; tc <= kMAX_TRG_PHRASE; ++tc) { -      if (r.edges[src_covered_][trg_covered_][src_prefix_.size() + 1][tc]) { -        int nc = src_prefix_.size() + 1 + src_covered_; -        res.push_back(FSTState(trg_covered_ + tc, nc, ncvg, vector<short>())); -      } -    } - -    if ((src_prefix_.size() + 1) < r.max_src_delta[src_covered_][trg_covered_]) { -      vector<short> nsp = src_prefix_; -      nsp.push_back(src_position); -      res.push_back(FSTState(trg_covered_, src_covered_, ncvg, nsp)); -    } - -    if (res.size() == 0) { -      cerr << *this << " can't be extended!\n"; -      abort(); -    } -    return res; -  } - -  short trg_covered_, src_covered_; -  vector<bool> src_coverage_; -  vector<short> src_prefix_; -}; -bool operator<(const FSTState& q, const FSTState& r) { -  if (q.trg_covered_ != r.trg_covered_) return q.trg_covered_ < r.trg_covered_; -  if (q.src_covered_!= r.src_covered_) return q.src_covered_ < r.src_covered_; -  if (q.src_coverage_ != r.src_coverage_) return q.src_coverage_ < r.src_coverage_; -  return q.src_prefix_ < r.src_prefix_; -} - -ostream& operator<<(ostream& os, const FSTState& q) { -  os << "[" << q.trg_covered_ << " : "; -  for (int i = 0; i < q.src_coverage_.size(); ++i) -    os << q.src_coverage_[i]; -  os << " : <"; -  for (int i = 0; i < q.src_prefix_.size(); ++i) { -    if (i != 0) os << ' '; -    os << q.src_prefix_[i]; -  } -  return os << ">]"; -} - -struct MyModel { -  MyModel(ConditionalBase& rcp0) : rp0(rcp0) {} -  typedef unordered_map<vector<WordID>, CCRP_NoTable<TRule>, boost::hash<vector<WordID> > > SrcToRuleCRPMap; - -  void DecrementRule(const TRule& rule) { -    SrcToRuleCRPMap::iterator it = rules.find(rule.f_); -    assert(it != rules.end()); -    it->second.decrement(rule); -    if (it->second.num_customers() == 0) rules.erase(it); -  } - -  void IncrementRule(const TRule& rule) { -    SrcToRuleCRPMap::iterator it = rules.find(rule.f_); -    if (it == rules.end()) { -      CCRP_NoTable<TRule> crp(1,1); -      it = rules.insert(make_pair(rule.f_, crp)).first; -    } -    it->second.increment(rule); -  } - -  // conditioned on rule.f_ -  prob_t RuleConditionalProbability(const TRule& rule) const { -    const prob_t base = rp0(rule); -    SrcToRuleCRPMap::const_iterator it = rules.find(rule.f_); -    if (it == rules.end()) { -      return base; -    } else { -      const double lp = it->second.logprob(rule, log(base)); -      prob_t q; q.logeq(lp); -      return q; -    } -  } - -  const ConditionalBase& rp0; -  SrcToRuleCRPMap rules; -}; - -struct MyFST : public WFST { -  MyFST(const vector<WordID>& ssrc, const vector<WordID>& strg, MyModel* m) : -      src(ssrc), trg(strg), -      r(src.size(),trg.size(),kMAX_SRC_PHRASE, kMAX_TRG_PHRASE), -      model(m) { -    FSTState in(src.size()); -    cerr << " INIT: " << in << endl; -    init = GetNode(in); -    for (int i = 0; i < in.src_coverage_.size(); ++i) in.src_coverage_[i] = true; -    in.src_covered_ = src.size(); -    in.trg_covered_ = trg.size(); -    cerr << "FINAL: " << in << endl; -    final = GetNode(in); -  } -  virtual const WFSTNode* Final() const; -  virtual const WFSTNode* Initial() const; - -  const WFSTNode* GetNode(const FSTState& q); -  map<FSTState, boost::shared_ptr<WFSTNode> > m; -  const vector<WordID>& src; -  const vector<WordID>& trg; -  Reachability r; -  const WFSTNode* init; -  const WFSTNode* final; -  MyModel* model; -}; - -struct MyNode : public WFSTNode { -  MyNode(const FSTState& q, MyFST* fst) : state(q), container(fst) {} -  virtual vector<pair<const WFSTNode*, TRulePtr> > ExtendInput(unsigned srcindex) const; -  const FSTState state; -  mutable MyFST* container; -}; - -vector<pair<const WFSTNode*, TRulePtr> > MyNode::ExtendInput(unsigned srcindex) const { -  cerr << "EXTEND " << state << " with " << srcindex << endl; -  vector<FSTState> ext = state.Extensions(srcindex, container->src.size(), container->trg.size(), container->r); -  vector<pair<const WFSTNode*,TRulePtr> > res(ext.size()); -  for (unsigned i = 0; i < ext.size(); ++i) { -    res[i].first = container->GetNode(ext[i]); -    if (ext[i].src_prefix_.size() == 0) { -      const unsigned trg_from = state.trg_covered_; -      const unsigned trg_to = ext[i].trg_covered_; -      const unsigned prev_prfx_size = state.src_prefix_.size(); -      res[i].second.reset(new TRule); -      res[i].second->lhs_ = -TD::Convert("X"); -      vector<WordID>& src = res[i].second->f_; -      vector<WordID>& trg = res[i].second->e_; -      src.resize(prev_prfx_size + 1); -      for (unsigned j = 0; j < prev_prfx_size; ++j) -        src[j] = container->src[state.src_prefix_[j]]; -      src[prev_prfx_size] = container->src[srcindex]; -      for (unsigned j = trg_from; j < trg_to; ++j) -        trg.push_back(container->trg[j]); -      res[i].second->scores_.set_value(FD::Convert("Proposal"), log(container->model->RuleConditionalProbability(*res[i].second))); -    } -  } -  return res; -} - -const WFSTNode* MyFST::GetNode(const FSTState& q) { -  boost::shared_ptr<WFSTNode>& res = m[q]; -  if (!res) { -    res.reset(new MyNode(q, this)); -  } -  return &*res; -} - -const WFSTNode* MyFST::Final() const { -  return final; -} - -const WFSTNode* MyFST::Initial() const { -  return init; -} - -int main(int argc, char** argv) { -  po::variables_map conf; -  InitCommandLine(argc, argv, &conf); -  kMAX_TRG_PHRASE = conf["max_trg_phrase"].as<unsigned>(); -  kMAX_SRC_PHRASE = conf["max_src_phrase"].as<unsigned>(); - -  if (!conf.count("model1")) { -    cerr << argv[0] << "Please use --model1 to specify model 1 parameters\n"; -    return 1; -  } -  boost::shared_ptr<MT19937> prng; -  if (conf.count("random_seed")) -    prng.reset(new MT19937(conf["random_seed"].as<uint32_t>())); -  else -    prng.reset(new MT19937); -  MT19937& rng = *prng; - -  vector<vector<int> > corpuse, corpusf; -  set<int> vocabe, vocabf; -  ReadParallelCorpus(conf["input"].as<string>(), &corpusf, &corpuse, &vocabf, &vocabe); -  cerr << "f-Corpus size: " << corpusf.size() << " sentences\n"; -  cerr << "f-Vocabulary size: " << vocabf.size() << " types\n"; -  cerr << "f-Corpus size: " << corpuse.size() << " sentences\n"; -  cerr << "f-Vocabulary size: " << vocabe.size() << " types\n"; -  assert(corpusf.size() == corpuse.size()); - -  ConditionalBase lp0(conf["model1_interpolation_weight"].as<double>(), -                      vocabe.size(), -                      conf["model1"].as<string>()); -  MyModel m(lp0); - -  TRule x("[X] ||| kAnwntR myN ||| at the convent ||| 0"); -  m.IncrementRule(x); -  TRule y("[X] ||| nY dyN ||| gave ||| 0"); -  m.IncrementRule(y); - - -  MyFST fst(corpusf[0], corpuse[0], &m); -  ifstream in("./kimura.g"); -  assert(in); -  CFG_WFSTComposer comp(fst); -  Hypergraph hg; -  bool succeed = comp.Compose(&in, &hg); -  hg.PrintGraphviz(); -  if (succeed) { cerr << "SUCCESS.\n"; } else { cerr << "FAILURE REPORTED.\n"; } - -#if 0 -  ifstream in2("./amnabooks.g"); -  assert(in2); -  MyFST fst2(corpusf[1], corpuse[1], &m); -  CFG_WFSTComposer comp2(fst2); -  Hypergraph hg2; -  bool succeed2 = comp2.Compose(&in2, &hg2); -  if (succeed2) { cerr << "SUCCESS.\n"; } else { cerr << "FAILURE REPORTED.\n"; } -#endif - -  SparseVector<double> w; w.set_value(FD::Convert("Proposal"), 1.0); -  hg.Reweight(w); -  cerr << ViterbiFTree(hg) << endl; -  return 0; -} - diff --git a/gi/pf/cbgi.cc b/gi/pf/cbgi.cc deleted file mode 100644 index 97f1ba34..00000000 --- a/gi/pf/cbgi.cc +++ /dev/null @@ -1,330 +0,0 @@ -#include <queue> -#include <sstream> -#include <iostream> - -#include <boost/unordered_map.hpp> -#include <boost/functional/hash.hpp> - -#include "sampler.h" -#include "filelib.h" -#include "hg_io.h" -#include "hg.h" -#include "ccrp_nt.h" -#include "trule.h" -#include "inside_outside.h" - -using namespace std; -using namespace std::tr1; - -double log_poisson(unsigned x, const double& lambda) { -  assert(lambda > 0.0); -  return log(lambda) * x - lgamma(x + 1) - lambda; -} - -double log_decay(unsigned x, const double& b) { -  assert(b > 1.0); -  assert(x > 0); -  return log(b - 1) - x * log(b); -} - -struct SimpleBase { -  SimpleBase(unsigned esize, unsigned fsize, unsigned ntsize = 144) : -    uniform_e(-log(esize)), -    uniform_f(-log(fsize)), -    uniform_nt(-log(ntsize)) { -  } - -  // binomial coefficient -  static double choose(unsigned n, unsigned k) { -    return exp(lgamma(n + 1) - lgamma(k + 1) - lgamma(n - k + 1)); -  } - -  // count the number of patterns of terminals and NTs in the rule, given elen and flen -  static double log_number_of_patterns(const unsigned flen, const unsigned elen) { -    static vector<vector<double> > counts; -    if (elen >= counts.size()) counts.resize(elen + 1); -    if (flen >= counts[elen].size()) counts[elen].resize(flen + 1); -    double& count = counts[elen][flen]; -    if (count) return log(count); -    const unsigned max_arity = min(elen, flen); -    for (unsigned a = 0; a <= max_arity; ++a) -      count += choose(elen, a) * choose(flen, a); -    return log(count); -  } - -  // return logp0 of rule | LHS -  double operator()(const TRule& rule) const { -    const unsigned flen = rule.f_.size(); -    const unsigned elen = rule.e_.size(); -#if 0 -    double p = 0; -    p += log_poisson(flen, 0.5);                   // flen                 ~Pois(0.5) -    p += log_poisson(elen, flen);                  // elen | flen          ~Pois(flen) -    p -= log_number_of_patterns(flen, elen);       // pattern | flen,elen  ~Uniform -    for (unsigned i = 0; i < flen; ++i) {          // for each position in f-RHS -      if (rule.f_[i] <= 0)                         //   according to pattern -        p += uniform_nt;                           //     draw NT          ~Uniform -      else -        p += uniform_f;                            //     draw f terminal  ~Uniform -    } -    p -= lgamma(rule.Arity() + 1);                 // draw permutation     ~Uniform  -    for (unsigned i = 0; i < elen; ++i) {          // for each position in e-RHS -      if (rule.e_[i] > 0)                          //   according to pattern -        p += uniform_e;                            //     draw e|f term    ~Uniform -        // TODO this should prob be model 1 -    } -#else -    double p = 0; -    bool is_abstract = rule.f_[0] <= 0; -    p += log(0.5); -    if (is_abstract) { -      if (flen == 2) p += log(0.99); else p += log(0.01); -    } else { -      p += log_decay(flen, 3); -    } - -    for (unsigned i = 0; i < flen; ++i) {          // for each position in f-RHS -      if (rule.f_[i] <= 0)                         //   according to pattern -        p += uniform_nt;                           //     draw NT          ~Uniform -      else -        p += uniform_f;                            //     draw f terminal  ~Uniform -    } -#endif -    return p; -  } -  const double uniform_e; -  const double uniform_f; -  const double uniform_nt; -  vector<double> arities; -}; - -MT19937* rng = NULL; - -template <typename Base> -struct MHSamplerEdgeProb { -  MHSamplerEdgeProb(const Hypergraph& hg, -                  const map<int, CCRP_NoTable<TRule> >& rdp, -                  const Base& logp0, -                  const bool exclude_multiword_terminals) : edge_probs(hg.edges_.size()) { -    for (int i = 0; i < edge_probs.size(); ++i) { -      const TRule& rule = *hg.edges_[i].rule_; -      const map<int, CCRP_NoTable<TRule> >::const_iterator it = rdp.find(rule.lhs_); -      assert(it != rdp.end()); -      const CCRP_NoTable<TRule>& crp = it->second; -      edge_probs[i].logeq(crp.logprob(rule, logp0(rule))); -      if (exclude_multiword_terminals && rule.f_[0] > 0 && rule.f_.size() > 1) -        edge_probs[i] = prob_t::Zero(); -    } -  } -  inline prob_t operator()(const Hypergraph::Edge& e) const { -    return edge_probs[e.id_]; -  } -  prob_t DerivationProb(const vector<int>& d) const { -    prob_t p = prob_t::One(); -    for (unsigned i = 0; i < d.size(); ++i) -      p *= edge_probs[d[i]]; -    return p; -  } -  vector<prob_t> edge_probs; -}; - -template <typename Base> -struct ModelAndData { -  ModelAndData() : -     base_lh(prob_t::One()), -     logp0(10000, 10000), -     mh_samples(), -     mh_rejects() {} - -  void SampleCorpus(const string& hgpath, int i); -  void ResampleHyperparameters() { -    for (map<int, CCRP_NoTable<TRule> >::iterator it = rules.begin(); it != rules.end(); ++it) -      it->second.resample_hyperparameters(rng); -  } - -  CCRP_NoTable<TRule>& RuleCRP(int lhs) { -    map<int, CCRP_NoTable<TRule> >::iterator it = rules.find(lhs); -    if (it == rules.end()) { -      rules.insert(make_pair(lhs, CCRP_NoTable<TRule>(1,1))); -      it = rules.find(lhs); -    } -    return it->second; -  } - -  void IncrementRule(const TRule& rule) { -    CCRP_NoTable<TRule>& crp = RuleCRP(rule.lhs_); -    if (crp.increment(rule)) { -      prob_t p; p.logeq(logp0(rule)); -      base_lh *= p; -    } -  } - -  void DecrementRule(const TRule& rule) { -    CCRP_NoTable<TRule>& crp = RuleCRP(rule.lhs_); -    if (crp.decrement(rule)) { -      prob_t p; p.logeq(logp0(rule)); -      base_lh /= p; -    } -  } - -  void DecrementDerivation(const Hypergraph& hg, const vector<int>& d) { -    for (unsigned i = 0; i < d.size(); ++i) { -      const TRule& rule = *hg.edges_[d[i]].rule_; -      DecrementRule(rule); -    } -  } - -  void IncrementDerivation(const Hypergraph& hg, const vector<int>& d) { -    for (unsigned i = 0; i < d.size(); ++i) { -      const TRule& rule = *hg.edges_[d[i]].rule_; -      IncrementRule(rule); -    } -  } - -  prob_t Likelihood() const { -    prob_t p = prob_t::One(); -    for (map<int, CCRP_NoTable<TRule> >::const_iterator it = rules.begin(); it != rules.end(); ++it) { -      prob_t q; q.logeq(it->second.log_crp_prob()); -      p *= q; -    } -    p *= base_lh; -    return p; -  } - -  void ResampleDerivation(const Hypergraph& hg, vector<int>* sampled_derivation); - -  map<int, CCRP_NoTable<TRule> > rules;  // [lhs] -> distribution over RHSs -  prob_t base_lh; -  SimpleBase logp0; -  vector<vector<int> > samples;   // sampled derivations -  unsigned int mh_samples; -  unsigned int mh_rejects; -}; - -template <typename Base> -void ModelAndData<Base>::SampleCorpus(const string& hgpath, int n) { -  vector<Hypergraph> hgs(n); hgs.clear(); -  boost::unordered_map<TRule, unsigned> acc; -  map<int, unsigned> tot; -  for (int i = 0; i < n; ++i) { -    ostringstream os; -    os << hgpath << '/' << i << ".json.gz"; -    if (!FileExists(os.str())) continue; -    hgs.push_back(Hypergraph()); -    ReadFile rf(os.str()); -    HypergraphIO::ReadFromJSON(rf.stream(), &hgs.back()); -  } -  cerr << "Read " << hgs.size() << " alignment hypergraphs.\n"; -  samples.resize(hgs.size()); -  const unsigned SAMPLES = 2000; -  const unsigned burnin = 3 * SAMPLES / 4; -  const unsigned every = 20; -  for (unsigned s = 0; s < SAMPLES; ++s) { -    if (s % 10 == 0) { -      if (s > 0) { cerr << endl; ResampleHyperparameters(); } -      cerr << "[" << s << " LLH=" << log(Likelihood()) << " REJECTS=" << ((double)mh_rejects / mh_samples) << " LHS's=" << rules.size() << " base=" << log(base_lh) << "] "; -    } -    cerr << '.'; -    for (unsigned i = 0; i < hgs.size(); ++i) { -      ResampleDerivation(hgs[i], &samples[i]); -      if (s > burnin && s % every == 0) { -        for (unsigned j = 0; j < samples[i].size(); ++j) { -          const TRule& rule = *hgs[i].edges_[samples[i][j]].rule_; -          ++acc[rule]; -          ++tot[rule.lhs_]; -        } -      } -    } -  } -  cerr << endl; -  for (boost::unordered_map<TRule,unsigned>::iterator it = acc.begin(); it != acc.end(); ++it) { -    cout << it->first << " MyProb=" << log(it->second)-log(tot[it->first.lhs_]) << endl; -  } -} - -template <typename Base> -void ModelAndData<Base>::ResampleDerivation(const Hypergraph& hg, vector<int>* sampled_deriv) { -  vector<int> cur; -  cur.swap(*sampled_deriv); - -  const prob_t p_cur = Likelihood(); -  DecrementDerivation(hg, cur); -  if (cur.empty()) { -    // first iteration, create restaurants -    for (int i = 0; i < hg.edges_.size(); ++i) -      RuleCRP(hg.edges_[i].rule_->lhs_); -  } -  MHSamplerEdgeProb<SimpleBase> wf(hg, rules, logp0, cur.empty()); -//  MHSamplerEdgeProb<SimpleBase> wf(hg, rules, logp0, false); -  const prob_t q_cur = wf.DerivationProb(cur); -  vector<prob_t> node_probs; -  Inside<prob_t, MHSamplerEdgeProb<SimpleBase> >(hg, &node_probs, wf); -  queue<unsigned> q; -  q.push(hg.nodes_.size() - 3); -  while(!q.empty()) { -    unsigned cur_node_id = q.front(); -//    cerr << "NODE=" << cur_node_id << endl; -    q.pop(); -    const Hypergraph::Node& node = hg.nodes_[cur_node_id]; -    const unsigned num_in_edges = node.in_edges_.size(); -    unsigned sampled_edge = 0; -    if (num_in_edges == 1) { -      sampled_edge = node.in_edges_[0]; -    } else { -      prob_t z; -      assert(num_in_edges > 1); -      SampleSet<prob_t> ss; -      for (unsigned j = 0; j < num_in_edges; ++j) { -        const Hypergraph::Edge& edge = hg.edges_[node.in_edges_[j]]; -        prob_t p = wf.edge_probs[edge.id_];             // edge proposal prob -        for (unsigned k = 0; k < edge.tail_nodes_.size(); ++k) -          p *= node_probs[edge.tail_nodes_[k]]; -        ss.add(p); -//        cerr << log(ss[j]) << " ||| " << edge.rule_->AsString() << endl; -        z += p; -      } -//      for (unsigned j = 0; j < num_in_edges; ++j) { -//        const Hypergraph::Edge& edge = hg.edges_[node.in_edges_[j]]; -//        cerr << exp(log(ss[j] / z)) << " ||| " << edge.rule_->AsString() << endl; -//      } -//      cerr << " --- \n"; -      sampled_edge = node.in_edges_[rng->SelectSample(ss)]; -    } -    sampled_deriv->push_back(sampled_edge); -    const Hypergraph::Edge& edge = hg.edges_[sampled_edge]; -    for (unsigned j = 0; j < edge.tail_nodes_.size(); ++j) { -      q.push(edge.tail_nodes_[j]); -    } -  } -  IncrementDerivation(hg, *sampled_deriv); - -//  cerr << "sampled derivation contains " << sampled_deriv->size() << " edges\n"; -//  cerr << "DERIV:\n"; -//  for (int i = 0; i < sampled_deriv->size(); ++i) { -//    cerr << "  " << hg.edges_[(*sampled_deriv)[i]].rule_->AsString() << endl; -//  } - -  if (cur.empty()) return;  // accept first sample - -  ++mh_samples; -  // only need to do MH if proposal is different to current state -  if (cur != *sampled_deriv) { -    const prob_t q_prop = wf.DerivationProb(*sampled_deriv); -    const prob_t p_prop = Likelihood(); -    if (!rng->AcceptMetropolisHastings(p_prop, p_cur, q_prop, q_cur)) { -      ++mh_rejects; -      DecrementDerivation(hg, *sampled_deriv); -      IncrementDerivation(hg, cur); -      swap(cur, *sampled_deriv); -    } -  } -} - -int main(int argc, char** argv) { -  rng = new MT19937; -  ModelAndData<SimpleBase> m; -  m.SampleCorpus("./hgs", 50); -  // m.SampleCorpus("./btec/hgs", 5000); -  return 0; -} - diff --git a/gi/pf/cfg_wfst_composer.cc b/gi/pf/cfg_wfst_composer.cc deleted file mode 100644 index 21d5ec5b..00000000 --- a/gi/pf/cfg_wfst_composer.cc +++ /dev/null @@ -1,731 +0,0 @@ -#include "cfg_wfst_composer.h" - -#include <iostream> -#include <fstream> -#include <map> -#include <queue> -#include <tr1/unordered_map> -#include <tr1/unordered_set> - -#include <boost/shared_ptr.hpp> -#include <boost/program_options.hpp> -#include <boost/program_options/variables_map.hpp> -#include "fast_lexical_cast.hpp" - -#include "phrasetable_fst.h" -#include "sparse_vector.h" -#include "tdict.h" -#include "hg.h" -#include "hg_remove_eps.h" - -namespace po = boost::program_options; -using namespace std; -using namespace std::tr1; - -WFSTNode::~WFSTNode() {} -WFST::~WFST() {} - -// Define the following macro if you want to see lots of debugging output -// when you run the chart parser -#undef DEBUG_CHART_PARSER - -// A few constants used by the chart parser /////////////// -static const int kMAX_NODES = 2000000; -static const string kPHRASE_STRING = "X"; -static bool constants_need_init = true; -static WordID kUNIQUE_START; -static WordID kPHRASE; -static TRulePtr kX1X2; -static TRulePtr kX1; -static WordID kEPS; -static TRulePtr kEPSRule; - -static void InitializeConstants() { -  if (constants_need_init) { -    kPHRASE = TD::Convert(kPHRASE_STRING) * -1; -    kUNIQUE_START = TD::Convert("S") * -1; -    kX1X2.reset(new TRule("[X] ||| [X,1] [X,2] ||| [X,1] [X,2]")); -    kX1.reset(new TRule("[X] ||| [X,1] ||| [X,1]")); -    kEPSRule.reset(new TRule("[X] ||| <eps> ||| <eps>")); -    kEPS = TD::Convert("<eps>"); -    constants_need_init = false; -  } -} -//////////////////////////////////////////////////////////// - -class EGrammarNode { -  friend bool CFG_WFSTComposer::Compose(const Hypergraph& src_forest, Hypergraph* trg_forest); -  friend void AddGrammarRule(const string& r, map<WordID, EGrammarNode>* g); - public: -#ifdef DEBUG_CHART_PARSER -  string hint; -#endif -  EGrammarNode() : is_some_rule_complete(false), is_root(false) {} -  const map<WordID, EGrammarNode>& GetTerminals() const { return tptr; } -  const map<WordID, EGrammarNode>& GetNonTerminals() const { return ntptr; } -  bool HasNonTerminals() const { return (!ntptr.empty()); } -  bool HasTerminals() const { return (!tptr.empty()); } -  bool RuleCompletes() const { -    return (is_some_rule_complete || (ntptr.empty() && tptr.empty())); -  } -  bool GrammarContinues() const { -    return !(ntptr.empty() && tptr.empty()); -  } -  bool IsRoot() const { -    return is_root; -  } -  // these are the features associated with the rule from the start -  // node up to this point.  If you use these features, you must -  // not Extend() this rule. -  const SparseVector<double>& GetCFGProductionFeatures() const { -    return input_features; -  } - -  const EGrammarNode* Extend(const WordID& t) const { -    if (t < 0) { -      map<WordID, EGrammarNode>::const_iterator it = ntptr.find(t); -      if (it == ntptr.end()) return NULL; -      return &it->second; -    } else { -      map<WordID, EGrammarNode>::const_iterator it = tptr.find(t); -      if (it == tptr.end()) return NULL; -      return &it->second; -    } -  } - - private: -  map<WordID, EGrammarNode> tptr; -  map<WordID, EGrammarNode> ntptr; -  SparseVector<double> input_features; -  bool is_some_rule_complete; -  bool is_root; -}; -typedef map<WordID, EGrammarNode> EGrammar;    // indexed by the rule LHS - -// edges are immutable once created -struct Edge { -#ifdef DEBUG_CHART_PARSER -  static int id_count; -  const int id; -#endif -  const WordID cat;                   // lhs side of rule proved/being proved -  const EGrammarNode* const dot;      // dot position -  const WFSTNode* const q;             // start of span -  const WFSTNode* const r;             // end of span -  const Edge* const active_parent;    // back pointer, NULL for PREDICT items -  const Edge* const passive_parent;   // back pointer, NULL for SCAN and PREDICT items -  TRulePtr tps;   // translations -  boost::shared_ptr<SparseVector<double> > features; // features from CFG rule - -  bool IsPassive() const { -    // when a rule is completed, this value will be set -    return static_cast<bool>(features); -  } -  bool IsActive() const { return !IsPassive(); } -  bool IsInitial() const { -    return !(active_parent || passive_parent); -  } -  bool IsCreatedByScan() const { -    return active_parent && !passive_parent && !dot->IsRoot(); -  } -  bool IsCreatedByPredict() const { -    return dot->IsRoot(); -  } -  bool IsCreatedByComplete() const { -    return active_parent && passive_parent; -  } - -  // constructor for PREDICT -  Edge(WordID c, const EGrammarNode* d, const WFSTNode* q_and_r) : -#ifdef DEBUG_CHART_PARSER -    id(++id_count), -#endif -    cat(c), dot(d), q(q_and_r), r(q_and_r), active_parent(NULL), passive_parent(NULL), tps() {} -  Edge(WordID c, const EGrammarNode* d, const WFSTNode* q_and_r, const Edge* act_parent) : -#ifdef DEBUG_CHART_PARSER -    id(++id_count), -#endif -    cat(c), dot(d), q(q_and_r), r(q_and_r), active_parent(act_parent), passive_parent(NULL), tps() {} - -  // constructors for SCAN -  Edge(WordID c, const EGrammarNode* d, const WFSTNode* i, const WFSTNode* j, -       const Edge* act_par, const TRulePtr& translations) : -#ifdef DEBUG_CHART_PARSER -    id(++id_count), -#endif -    cat(c), dot(d), q(i), r(j), active_parent(act_par), passive_parent(NULL), tps(translations) {} - -  Edge(WordID c, const EGrammarNode* d, const WFSTNode* i, const WFSTNode* j, -       const Edge* act_par, const TRulePtr& translations, -       const SparseVector<double>& feats) : -#ifdef DEBUG_CHART_PARSER -    id(++id_count), -#endif -    cat(c), dot(d), q(i), r(j), active_parent(act_par), passive_parent(NULL), tps(translations), -    features(new SparseVector<double>(feats)) {} - -  // constructors for COMPLETE -  Edge(WordID c, const EGrammarNode* d, const WFSTNode* i, const WFSTNode* j, -       const Edge* act_par, const Edge *pas_par) : -#ifdef DEBUG_CHART_PARSER -    id(++id_count), -#endif -    cat(c), dot(d), q(i), r(j), active_parent(act_par), passive_parent(pas_par), tps() { -      assert(pas_par->IsPassive()); -      assert(act_par->IsActive()); -    } - -  Edge(WordID c, const EGrammarNode* d, const WFSTNode* i, const WFSTNode* j, -       const Edge* act_par, const Edge *pas_par, const SparseVector<double>& feats) : -#ifdef DEBUG_CHART_PARSER -    id(++id_count), -#endif -    cat(c), dot(d), q(i), r(j), active_parent(act_par), passive_parent(pas_par), tps(), -    features(new SparseVector<double>(feats)) { -      assert(pas_par->IsPassive()); -      assert(act_par->IsActive()); -    } - -  // constructor for COMPLETE query -  Edge(const WFSTNode* _r) : -#ifdef DEBUG_CHART_PARSER -    id(0), -#endif -    cat(0), dot(NULL), q(NULL), -    r(_r), active_parent(NULL), passive_parent(NULL), tps() {} -  // constructor for MERGE quere -  Edge(const WFSTNode* _q, int) : -#ifdef DEBUG_CHART_PARSER -    id(0), -#endif -    cat(0), dot(NULL), q(_q), -    r(NULL), active_parent(NULL), passive_parent(NULL), tps() {} -}; -#ifdef DEBUG_CHART_PARSER -int Edge::id_count = 0; -#endif - -ostream& operator<<(ostream& os, const Edge& e) { -  string type = "PREDICT"; -  if (e.IsCreatedByScan()) -    type = "SCAN"; -  else if (e.IsCreatedByComplete()) -    type = "COMPLETE"; -  os << "[" -#ifdef DEBUG_CHART_PARSER -     << '(' << e.id << ") " -#else -     << '(' << &e << ") " -#endif -     << "q=" << e.q << ", r=" << e.r -     << ", cat="<< TD::Convert(e.cat*-1) << ", dot=" -     << e.dot -#ifdef DEBUG_CHART_PARSER -     << e.dot->hint -#endif -     << (e.IsActive() ? ", Active" : ", Passive") -     << ", " << type; -#ifdef DEBUG_CHART_PARSER -  if (e.active_parent) { os << ", act.parent=(" << e.active_parent->id << ')'; } -  if (e.passive_parent) { os << ", psv.parent=(" << e.passive_parent->id << ')'; } -#endif -  if (e.tps) { os << ", tps=" << e.tps->AsString(); } -  return os << ']'; -} - -struct Traversal { -  const Edge* const edge;      // result from the active / passive combination -  const Edge* const active; -  const Edge* const passive; -  Traversal(const Edge* me, const Edge* a, const Edge* p) : edge(me), active(a), passive(p) {} -}; - -struct UniqueTraversalHash { -  size_t operator()(const Traversal* t) const { -    size_t x = 5381; -    x = ((x << 5) + x) ^ reinterpret_cast<size_t>(t->active); -    x = ((x << 5) + x) ^ reinterpret_cast<size_t>(t->passive); -    x = ((x << 5) + x) ^ t->edge->IsActive(); -    return x; -  } -}; - -struct UniqueTraversalEquals { -  size_t operator()(const Traversal* a, const Traversal* b) const { -    return (a->passive == b->passive && a->active == b->active && a->edge->IsActive() == b->edge->IsActive()); -  } -}; - -struct UniqueEdgeHash { -  size_t operator()(const Edge* e) const { -    size_t x = 5381; -    if (e->IsActive()) { -      x = ((x << 5) + x) ^ reinterpret_cast<size_t>(e->dot); -      x = ((x << 5) + x) ^ reinterpret_cast<size_t>(e->q); -      x = ((x << 5) + x) ^ reinterpret_cast<size_t>(e->r); -      x = ((x << 5) + x) ^ static_cast<size_t>(e->cat); -      x += 13; -    } else {  // with passive edges, we don't care about the dot -      x = ((x << 5) + x) ^ reinterpret_cast<size_t>(e->q); -      x = ((x << 5) + x) ^ reinterpret_cast<size_t>(e->r); -      x = ((x << 5) + x) ^ static_cast<size_t>(e->cat); -    } -    return x; -  } -}; - -struct UniqueEdgeEquals { -  bool operator()(const Edge* a, const Edge* b) const { -    if (a->IsActive() != b->IsActive()) return false; -    if (a->IsActive()) { -      return (a->cat == b->cat) && (a->dot == b->dot) && (a->q == b->q) && (a->r == b->r); -    } else { -      return (a->cat == b->cat) && (a->q == b->q) && (a->r == b->r); -    } -  } -}; - -struct REdgeHash { -  size_t operator()(const Edge* e) const { -    size_t x = 5381; -    x = ((x << 5) + x) ^ reinterpret_cast<size_t>(e->r); -    return x; -  } -}; - -struct REdgeEquals { -  bool operator()(const Edge* a, const Edge* b) const { -    return (a->r == b->r); -  } -}; - -struct QEdgeHash { -  size_t operator()(const Edge* e) const { -    size_t x = 5381; -    x = ((x << 5) + x) ^ reinterpret_cast<size_t>(e->q); -    return x; -  } -}; - -struct QEdgeEquals { -  bool operator()(const Edge* a, const Edge* b) const { -    return (a->q == b->q); -  } -}; - -struct EdgeQueue { -  queue<const Edge*> q; -  EdgeQueue() {} -  void clear() { while(!q.empty()) q.pop(); } -  bool HasWork() const { return !q.empty(); } -  const Edge* Next() { const Edge* res = q.front(); q.pop(); return res; } -  void AddEdge(const Edge* s) { q.push(s); } -}; - -class CFG_WFSTComposerImpl { - public: -  CFG_WFSTComposerImpl(WordID start_cat, -                       const WFSTNode* q_0, -                       const WFSTNode* q_final) : start_cat_(start_cat), q_0_(q_0), q_final_(q_final) {} - -  // returns false if the intersection is empty -  bool Compose(const EGrammar& g, Hypergraph* forest) { -    goal_node = NULL; -    EGrammar::const_iterator sit = g.find(start_cat_); -    forest->ReserveNodes(kMAX_NODES); -    assert(sit != g.end()); -    Edge* init = new Edge(start_cat_, &sit->second, q_0_); -    assert(IncorporateNewEdge(init)); -    while (exp_agenda.HasWork() || agenda.HasWork()) { -      while(exp_agenda.HasWork()) { -        const Edge* edge = exp_agenda.Next(); -        FinishEdge(edge, forest); -      } -      if (agenda.HasWork()) { -        const Edge* edge = agenda.Next(); -#ifdef DEBUG_CHART_PARSER -        cerr << "processing (" << edge->id << ')' << endl; -#endif -        if (edge->IsActive()) { -          if (edge->dot->HasTerminals()) -            DoScan(edge); -          if (edge->dot->HasNonTerminals()) { -            DoMergeWithPassives(edge); -            DoPredict(edge, g); -          } -        } else { -          DoComplete(edge); -        } -      } -    } -    if (goal_node) { -      forest->PruneUnreachable(goal_node->id_); -      RemoveEpsilons(forest, kEPS); -    } -    FreeAll(); -    return goal_node; -  } - -  void FreeAll() { -    for (int i = 0; i < free_list_.size(); ++i) -      delete free_list_[i]; -    free_list_.clear(); -    for (int i = 0; i < traversal_free_list_.size(); ++i) -      delete traversal_free_list_[i]; -    traversal_free_list_.clear(); -    all_traversals.clear(); -    exp_agenda.clear(); -    agenda.clear(); -    tps2node.clear(); -    edge2node.clear(); -    all_edges.clear(); -    passive_edges.clear(); -    active_edges.clear(); -  } - -  ~CFG_WFSTComposerImpl() { -    FreeAll(); -  } - -  // returns the total number of edges created during composition -  int EdgesCreated() const { -    return free_list_.size(); -  } - - private: -  void DoScan(const Edge* edge) { -    // here, we assume that the FST will potentially have many more outgoing -    // edges than the grammar, which will be just a couple.  If you want to -    // efficiently handle the case where both are relatively large, this code -    // will need to change how the intersection is done.  The best general -    // solution would probably be the Baeza-Yates double binary search. - -    const EGrammarNode* dot = edge->dot; -    const WFSTNode* r = edge->r; -    const map<WordID, EGrammarNode>& terms = dot->GetTerminals(); -    for (map<WordID, EGrammarNode>::const_iterator git = terms.begin(); -         git != terms.end(); ++git) { - -      if (!(TD::Convert(git->first)[0] >= '0' && TD::Convert(git->first)[0] <= '9')) { -        std::cerr << "TERMINAL SYMBOL: " << TD::Convert(git->first) << endl; -        abort(); -      } -      std::vector<std::pair<const WFSTNode*, TRulePtr> > extensions = r->ExtendInput(atoi(TD::Convert(git->first).c_str())); -      for (unsigned nsi = 0; nsi < extensions.size(); ++nsi) { -        const WFSTNode* next_r = extensions[nsi].first; -        const EGrammarNode* next_dot = &git->second; -        const bool grammar_continues = next_dot->GrammarContinues(); -        const bool rule_completes    = next_dot->RuleCompletes(); -        if (extensions[nsi].second) -          cerr << "!!! " << extensions[nsi].second->AsString() << endl; -        // cerr << "  rule completes: " << rule_completes << " after consuming " << TD::Convert(git->first) << endl; -        assert(grammar_continues || rule_completes); -        const SparseVector<double>& input_features = next_dot->GetCFGProductionFeatures(); -        if (rule_completes) -          IncorporateNewEdge(new Edge(edge->cat, next_dot, edge->q, next_r, edge, extensions[nsi].second, input_features)); -        if (grammar_continues) -          IncorporateNewEdge(new Edge(edge->cat, next_dot, edge->q, next_r, edge, extensions[nsi].second)); -      } -    } -  } - -  void DoPredict(const Edge* edge, const EGrammar& g) { -    const EGrammarNode* dot = edge->dot; -    const map<WordID, EGrammarNode>& non_terms = dot->GetNonTerminals(); -    for (map<WordID, EGrammarNode>::const_iterator git = non_terms.begin(); -         git != non_terms.end(); ++git) { -      const WordID nt_to_predict = git->first; -      //cerr << edge->id << " -- " << TD::Convert(nt_to_predict*-1) << endl; -      EGrammar::const_iterator egi = g.find(nt_to_predict); -      if (egi == g.end()) { -        cerr << "[ERROR] Can't find any grammar rules with a LHS of type " -             << TD::Convert(-1*nt_to_predict) << '!' << endl; -        continue; -      } -      assert(edge->IsActive()); -      const EGrammarNode* new_dot = &egi->second; -      Edge* new_edge = new Edge(nt_to_predict, new_dot, edge->r, edge); -      IncorporateNewEdge(new_edge); -    } -  } - -  void DoComplete(const Edge* passive) { -#ifdef DEBUG_CHART_PARSER -    cerr << "  complete: " << *passive << endl; -#endif -    const WordID completed_nt = passive->cat; -    const WFSTNode* q = passive->q; -    const WFSTNode* next_r = passive->r; -    const Edge query(q); -    const pair<unordered_multiset<const Edge*, REdgeHash, REdgeEquals>::iterator, -         unordered_multiset<const Edge*, REdgeHash, REdgeEquals>::iterator > p = -      active_edges.equal_range(&query); -    for (unordered_multiset<const Edge*, REdgeHash, REdgeEquals>::iterator it = p.first; -         it != p.second; ++it) { -      const Edge* active = *it; -#ifdef DEBUG_CHART_PARSER -      cerr << "    pos: " << *active << endl; -#endif -      const EGrammarNode* next_dot = active->dot->Extend(completed_nt); -      if (!next_dot) continue; -      const SparseVector<double>& input_features = next_dot->GetCFGProductionFeatures(); -      // add up to 2 rules -      if (next_dot->RuleCompletes()) -        IncorporateNewEdge(new Edge(active->cat, next_dot, active->q, next_r, active, passive, input_features)); -      if (next_dot->GrammarContinues()) -        IncorporateNewEdge(new Edge(active->cat, next_dot, active->q, next_r, active, passive)); -    } -  } - -  void DoMergeWithPassives(const Edge* active) { -    // edge is active, has non-terminals, we need to find the passives that can extend it -    assert(active->IsActive()); -    assert(active->dot->HasNonTerminals()); -#ifdef DEBUG_CHART_PARSER -    cerr << "  merge active with passives: ACT=" << *active << endl; -#endif -    const Edge query(active->r, 1); -    const pair<unordered_multiset<const Edge*, QEdgeHash, QEdgeEquals>::iterator, -         unordered_multiset<const Edge*, QEdgeHash, QEdgeEquals>::iterator > p = -      passive_edges.equal_range(&query); -    for (unordered_multiset<const Edge*, QEdgeHash, QEdgeEquals>::iterator it = p.first; -         it != p.second; ++it) { -      const Edge* passive = *it; -      const EGrammarNode* next_dot = active->dot->Extend(passive->cat); -      if (!next_dot) continue; -      const WFSTNode* next_r = passive->r; -      const SparseVector<double>& input_features = next_dot->GetCFGProductionFeatures(); -      if (next_dot->RuleCompletes()) -        IncorporateNewEdge(new Edge(active->cat, next_dot, active->q, next_r, active, passive, input_features)); -      if (next_dot->GrammarContinues()) -        IncorporateNewEdge(new Edge(active->cat, next_dot, active->q, next_r, active, passive)); -    } -  } - -  // take ownership of edge memory, add to various indexes, etc -  // returns true if this edge is new -  bool IncorporateNewEdge(Edge* edge) { -    free_list_.push_back(edge); -    if (edge->passive_parent && edge->active_parent) { -      Traversal* t = new Traversal(edge, edge->active_parent, edge->passive_parent); -      traversal_free_list_.push_back(t); -      if (all_traversals.find(t) != all_traversals.end()) { -        return false; -      } else { -        all_traversals.insert(t); -      } -    } -    exp_agenda.AddEdge(edge); -    return true; -  } - -  bool FinishEdge(const Edge* edge, Hypergraph* hg) { -    bool is_new = false; -    if (all_edges.find(edge) == all_edges.end()) { -#ifdef DEBUG_CHART_PARSER -      cerr << *edge << " is NEW\n"; -#endif -      all_edges.insert(edge); -      is_new = true; -      if (edge->IsPassive()) passive_edges.insert(edge); -      if (edge->IsActive()) active_edges.insert(edge); -      agenda.AddEdge(edge); -    } else { -#ifdef DEBUG_CHART_PARSER -      cerr << *edge << " is NOT NEW.\n"; -#endif -    } -    AddEdgeToTranslationForest(edge, hg); -    return is_new; -  } - -  // build the translation forest -  void AddEdgeToTranslationForest(const Edge* edge, Hypergraph* hg) { -    assert(hg->nodes_.size() < kMAX_NODES); -    Hypergraph::Node* tps = NULL; -    // first add any target language rules -    if (edge->tps) { -      Hypergraph::Node*& node = tps2node[(size_t)edge->tps.get()]; -      if (!node) { -        // cerr << "Creating phrases for " << edge->tps << endl; -        const TRulePtr& rule = edge->tps; -        node = hg->AddNode(kPHRASE); -        Hypergraph::Edge* hg_edge = hg->AddEdge(rule, Hypergraph::TailNodeVector()); -        hg_edge->feature_values_ += rule->GetFeatureValues(); -        hg->ConnectEdgeToHeadNode(hg_edge, node); -      } -      tps = node; -    } -    Hypergraph::Node*& head_node = edge2node[edge]; -    if (!head_node) -      head_node = hg->AddNode(kPHRASE); -    if (edge->cat == start_cat_ && edge->q == q_0_ && edge->r == q_final_ && edge->IsPassive()) { -      assert(goal_node == NULL || goal_node == head_node); -      goal_node = head_node; -    } -    Hypergraph::TailNodeVector tail; -    SparseVector<double> extra; -    if (edge->IsCreatedByPredict()) { -      // extra.set_value(FD::Convert("predict"), 1); -    } else if (edge->IsCreatedByScan()) { -      tail.push_back(edge2node[edge->active_parent]->id_); -      if (tps) { -        tail.push_back(tps->id_); -      } -      //extra.set_value(FD::Convert("scan"), 1); -    } else if (edge->IsCreatedByComplete()) { -      tail.push_back(edge2node[edge->active_parent]->id_); -      tail.push_back(edge2node[edge->passive_parent]->id_); -      //extra.set_value(FD::Convert("complete"), 1); -    } else { -      assert(!"unexpected edge type!"); -    } -    //cerr << head_node->id_ << "<--" << *edge << endl; - -#ifdef DEBUG_CHART_PARSER -      for (int i = 0; i < tail.size(); ++i) -        if (tail[i] == head_node->id_) { -          cerr << "ERROR: " << *edge << "\n   i=" << i << endl; -          if (i == 1) { cerr << "\tP: " << *edge->passive_parent << endl; } -          if (i == 0) { cerr << "\tA: " << *edge->active_parent << endl; } -          assert(!"self-loop found!"); -        } -#endif -    Hypergraph::Edge* hg_edge = NULL; -    if (tail.size() == 0) { -      hg_edge = hg->AddEdge(kEPSRule, tail); -    } else if (tail.size() == 1) { -      hg_edge = hg->AddEdge(kX1, tail); -    } else if (tail.size() == 2) { -      hg_edge = hg->AddEdge(kX1X2, tail); -    } -    if (edge->features) -      hg_edge->feature_values_ += *edge->features; -    hg_edge->feature_values_ += extra; -    hg->ConnectEdgeToHeadNode(hg_edge, head_node); -  } - -  Hypergraph::Node* goal_node; -  EdgeQueue exp_agenda; -  EdgeQueue agenda; -  unordered_map<size_t, Hypergraph::Node*> tps2node; -  unordered_map<const Edge*, Hypergraph::Node*, UniqueEdgeHash, UniqueEdgeEquals> edge2node; -  unordered_set<const Traversal*, UniqueTraversalHash, UniqueTraversalEquals> all_traversals; -  unordered_set<const Edge*, UniqueEdgeHash, UniqueEdgeEquals> all_edges; -  unordered_multiset<const Edge*, QEdgeHash, QEdgeEquals> passive_edges; -  unordered_multiset<const Edge*, REdgeHash, REdgeEquals> active_edges; -  vector<Edge*> free_list_; -  vector<Traversal*> traversal_free_list_; -  const WordID start_cat_; -  const WFSTNode* const q_0_; -  const WFSTNode* const q_final_; -}; - -#ifdef DEBUG_CHART_PARSER -static string TrimRule(const string& r) { -  size_t start = r.find(" |||") + 5; -  size_t end = r.rfind(" |||"); -  return r.substr(start, end - start); -} -#endif - -void AddGrammarRule(const string& r, EGrammar* g) { -  const size_t pos = r.find(" ||| "); -  if (pos == string::npos || r[0] != '[') { -    cerr << "Bad rule: " << r << endl; -    return; -  } -  const size_t rpos = r.rfind(" ||| "); -  string feats; -  string rs = r; -  if (rpos != pos) { -    feats = r.substr(rpos + 5); -    rs = r.substr(0, rpos); -  } -  string rhs = rs.substr(pos + 5); -  string trule = rs + " ||| " + rhs + " ||| " + feats; -  TRule tr(trule); -  cerr << "X: " << tr.e_[0] << endl; -#ifdef DEBUG_CHART_PARSER -  string hint_last_rule; -#endif -  EGrammarNode* cur = &(*g)[tr.GetLHS()]; -  cur->is_root = true; -  for (int i = 0; i < tr.FLength(); ++i) { -    WordID sym = tr.f()[i]; -#ifdef DEBUG_CHART_PARSER -    hint_last_rule = TD::Convert(sym < 0 ? -sym : sym); -    cur->hint += " <@@> (*" + hint_last_rule + ") " + TrimRule(tr.AsString()); -#endif -    if (sym < 0) -      cur = &cur->ntptr[sym]; -    else -      cur = &cur->tptr[sym]; -  } -#ifdef DEBUG_CHART_PARSER -  cur->hint += " <@@> (" + hint_last_rule + "*) " + TrimRule(tr.AsString()); -#endif -  cur->is_some_rule_complete = true; -  cur->input_features = tr.GetFeatureValues(); -} - -CFG_WFSTComposer::~CFG_WFSTComposer() { -  delete pimpl_; -} - -CFG_WFSTComposer::CFG_WFSTComposer(const WFST& wfst) { -  InitializeConstants(); -  pimpl_ = new CFG_WFSTComposerImpl(kUNIQUE_START, wfst.Initial(), wfst.Final()); -} - -bool CFG_WFSTComposer::Compose(const Hypergraph& src_forest, Hypergraph* trg_forest) { -  // first, convert the src forest into an EGrammar -  EGrammar g; -  const int nedges = src_forest.edges_.size(); -  const int nnodes = src_forest.nodes_.size(); -  vector<int> cats(nnodes); -  bool assign_cats = false; -  for (int i = 0; i < nnodes; ++i) -    if (assign_cats) { -      cats[i] = TD::Convert("CAT_" + boost::lexical_cast<string>(i)) * -1; -    } else { -      cats[i] = src_forest.nodes_[i].cat_; -    } -  // construct the grammar -  for (int i = 0; i < nedges; ++i) { -    const Hypergraph::Edge& edge = src_forest.edges_[i]; -    const vector<WordID>& src = edge.rule_->f(); -    EGrammarNode* cur = &g[cats[edge.head_node_]]; -    cur->is_root = true; -    int ntc = 0; -    for (int j = 0; j < src.size(); ++j) { -      WordID sym = src[j]; -      if (sym <= 0) { -        sym = cats[edge.tail_nodes_[ntc]]; -        ++ntc; -        cur = &cur->ntptr[sym]; -      } else { -        cur = &cur->tptr[sym]; -      } -    } -    cur->is_some_rule_complete = true; -    cur->input_features = edge.feature_values_; -  } -  EGrammarNode& goal_rule = g[kUNIQUE_START]; -  assert((goal_rule.ntptr.size() == 1 && goal_rule.tptr.size() == 0) || -         (goal_rule.ntptr.size() == 0 && goal_rule.tptr.size() == 1)); - -  return pimpl_->Compose(g, trg_forest); -} - -bool CFG_WFSTComposer::Compose(istream* in, Hypergraph* trg_forest) { -  EGrammar g; -  while(*in) { -    string line; -    getline(*in, line); -    if (line.empty()) continue; -    AddGrammarRule(line, &g); -  } - -  return pimpl_->Compose(g, trg_forest); -} diff --git a/gi/pf/cfg_wfst_composer.h b/gi/pf/cfg_wfst_composer.h deleted file mode 100644 index cf47f459..00000000 --- a/gi/pf/cfg_wfst_composer.h +++ /dev/null @@ -1,46 +0,0 @@ -#ifndef _CFG_WFST_COMPOSER_H_ -#define _CFG_WFST_COMPOSER_H_ - -#include <iostream> -#include <vector> -#include <utility> - -#include "trule.h" -#include "wordid.h" - -class CFG_WFSTComposerImpl; -class Hypergraph; - -struct WFSTNode { -  virtual ~WFSTNode(); -  // returns the next states reachable by consuming srcindex (which identifies a word) -  // paired with the output string generated by taking that transition. -  virtual std::vector<std::pair<const WFSTNode*,TRulePtr> > ExtendInput(unsigned srcindex) const = 0; -}; - -struct WFST { -  virtual ~WFST(); -  virtual const WFSTNode* Final() const = 0; -  virtual const WFSTNode* Initial() const = 0; -}; - -class CFG_WFSTComposer { - public: -  ~CFG_WFSTComposer(); -  explicit CFG_WFSTComposer(const WFST& wfst); -  bool Compose(const Hypergraph& in_forest, Hypergraph* trg_forest); - -  // reads the grammar from a file. There must be a single top-level -  // S -> X rule.  Anything else is possible. Format is: -  // [S] ||| [SS,1] -  // [SS] ||| [NP,1] [VP,2] ||| Feature1=0.2 Feature2=-2.3 -  // [SS] ||| [VP,1] [NP,2] ||| Feature1=0.8 -  // [NP] ||| [DET,1] [N,2] ||| Feature3=2 -  // ... -  bool Compose(std::istream* grammar_file, Hypergraph* trg_forest); - - private: -  CFG_WFSTComposerImpl* pimpl_; -}; - -#endif diff --git a/gi/pf/conditional_pseg.h b/gi/pf/conditional_pseg.h deleted file mode 100644 index 81ddb206..00000000 --- a/gi/pf/conditional_pseg.h +++ /dev/null @@ -1,275 +0,0 @@ -#ifndef _CONDITIONAL_PSEG_H_ -#define _CONDITIONAL_PSEG_H_ - -#include <vector> -#include <tr1/unordered_map> -#include <boost/functional/hash.hpp> -#include <iostream> - -#include "m.h" -#include "prob.h" -#include "ccrp_nt.h" -#include "mfcr.h" -#include "trule.h" -#include "base_distributions.h" -#include "tdict.h" - -template <typename ConditionalBaseMeasure> -struct MConditionalTranslationModel { -  explicit MConditionalTranslationModel(ConditionalBaseMeasure& rcp0) : -    rp0(rcp0), d(0.5), strength(1.0), lambdas(1, prob_t::One()), p0s(1) {} - -  void Summary() const { -    std::cerr << "Number of conditioning contexts: " << r.size() << std::endl; -    for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) { -      std::cerr << TD::GetString(it->first) << "   \t(d=" << it->second.discount() << ",s=" << it->second.strength() << ") --------------------------" << std::endl; -      for (MFCR<1,TRule>::const_iterator i2 = it->second.begin(); i2 != it->second.end(); ++i2) -        std::cerr << "   " << i2->second.total_dish_count_ << '\t' << i2->first << std::endl; -    } -  } - -  double log_likelihood(const double& dd, const double& aa) const { -    if (aa <= -dd) return -std::numeric_limits<double>::infinity(); -    //double llh = Md::log_beta_density(dd, 10, 3) + Md::log_gamma_density(aa, 1, 1); -    double llh = Md::log_beta_density(dd, 1, 1) + -                 Md::log_gamma_density(dd + aa, 1, 1); -    typename std::tr1::unordered_map<std::vector<WordID>, MFCR<1,TRule>, boost::hash<std::vector<WordID> > >::const_iterator it; -    for (it = r.begin(); it != r.end(); ++it) -      llh += it->second.log_crp_prob(dd, aa); -    return llh; -  } - -  struct DiscountResampler { -    DiscountResampler(const MConditionalTranslationModel& m) : m_(m) {} -    const MConditionalTranslationModel& m_; -    double operator()(const double& proposed_discount) const { -      return m_.log_likelihood(proposed_discount, m_.strength); -    } -  }; - -  struct AlphaResampler { -    AlphaResampler(const MConditionalTranslationModel& m) : m_(m) {} -    const MConditionalTranslationModel& m_; -    double operator()(const double& proposed_strength) const { -      return m_.log_likelihood(m_.d, proposed_strength); -    } -  }; - -  void ResampleHyperparameters(MT19937* rng) { -    typename std::tr1::unordered_map<std::vector<WordID>, MFCR<1,TRule>, boost::hash<std::vector<WordID> > >::iterator it; -#if 1 -    for (it = r.begin(); it != r.end(); ++it) { -      it->second.resample_hyperparameters(rng); -    } -#else -    const unsigned nloop = 5; -    const unsigned niterations = 10; -    DiscountResampler dr(*this); -    AlphaResampler ar(*this); -    for (int iter = 0; iter < nloop; ++iter) { -      strength = slice_sampler1d(ar, strength, *rng, -d + std::numeric_limits<double>::min(), -                              std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations); -      double min_discount = std::numeric_limits<double>::min(); -      if (strength < 0.0) min_discount -= strength; -      d = slice_sampler1d(dr, d, *rng, min_discount, -                          1.0, 0.0, niterations, 100*niterations); -    } -    strength = slice_sampler1d(ar, strength, *rng, -d, -                            std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations); -    std::cerr << "MConditionalTranslationModel(d=" << d << ",s=" << strength << ") = " << log_likelihood(d, strength) << std::endl; -    for (it = r.begin(); it != r.end(); ++it) { -      it->second.set_discount(d); -      it->second.set_strength(strength); -    } -#endif -  } - -  int DecrementRule(const TRule& rule, MT19937* rng) { -    RuleModelHash::iterator it = r.find(rule.f_); -    assert(it != r.end()); -    const TableCount delta = it->second.decrement(rule, rng); -    if (delta.count) { -      if (it->second.num_customers() == 0) r.erase(it); -    } -    return delta.count; -  } - -  int IncrementRule(const TRule& rule, MT19937* rng) { -    RuleModelHash::iterator it = r.find(rule.f_); -    if (it == r.end()) { -      //it = r.insert(make_pair(rule.f_, MFCR<1,TRule>(d, strength))).first; -      it = r.insert(make_pair(rule.f_, MFCR<1,TRule>(1,1,1,1,0.6, -0.12))).first; -    } -    p0s[0] = rp0(rule);  -    TableCount delta = it->second.increment(rule, p0s.begin(), lambdas.begin(), rng); -    return delta.count; -  } - -  prob_t RuleProbability(const TRule& rule) const { -    prob_t p; -    RuleModelHash::const_iterator it = r.find(rule.f_); -    if (it == r.end()) { -      p = rp0(rule); -    } else { -      p0s[0] = rp0(rule); -      p = it->second.prob(rule, p0s.begin(), lambdas.begin()); -    } -    return p; -  } - -  prob_t Likelihood() const { -    prob_t p; p.logeq(log_likelihood(d, strength)); -    return p; -  } - -  const ConditionalBaseMeasure& rp0; -  typedef std::tr1::unordered_map<std::vector<WordID>, -                                  MFCR<1, TRule>, -                                  boost::hash<std::vector<WordID> > > RuleModelHash; -  RuleModelHash r; -  double d, strength; -  std::vector<prob_t> lambdas; -  mutable std::vector<prob_t> p0s; -}; - -template <typename ConditionalBaseMeasure> -struct ConditionalTranslationModel { -  explicit ConditionalTranslationModel(ConditionalBaseMeasure& rcp0) : -    rp0(rcp0) {} - -  void Summary() const { -    std::cerr << "Number of conditioning contexts: " << r.size() << std::endl; -    for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) { -      std::cerr << TD::GetString(it->first) << "   \t(\\alpha = " << it->second.alpha() << ") --------------------------" << std::endl; -      for (CCRP_NoTable<TRule>::const_iterator i2 = it->second.begin(); i2 != it->second.end(); ++i2) -        std::cerr << "   " << i2->second << '\t' << i2->first << std::endl; -    } -  } - -  void ResampleHyperparameters(MT19937* rng) { -    for (RuleModelHash::iterator it = r.begin(); it != r.end(); ++it) -      it->second.resample_hyperparameters(rng); -  }  - -  int DecrementRule(const TRule& rule) { -    RuleModelHash::iterator it = r.find(rule.f_); -    assert(it != r.end());     -    int count = it->second.decrement(rule); -    if (count) { -      if (it->second.num_customers() == 0) r.erase(it); -    } -    return count; -  } - -  int IncrementRule(const TRule& rule) { -    RuleModelHash::iterator it = r.find(rule.f_); -    if (it == r.end()) { -      it = r.insert(make_pair(rule.f_, CCRP_NoTable<TRule>(1.0, 1.0, 8.0))).first; -    }  -    int count = it->second.increment(rule); -    return count; -  } - -  void IncrementRules(const std::vector<TRulePtr>& rules) { -    for (int i = 0; i < rules.size(); ++i) -      IncrementRule(*rules[i]); -  } - -  void DecrementRules(const std::vector<TRulePtr>& rules) { -    for (int i = 0; i < rules.size(); ++i) -      DecrementRule(*rules[i]); -  } - -  prob_t RuleProbability(const TRule& rule) const { -    prob_t p; -    RuleModelHash::const_iterator it = r.find(rule.f_); -    if (it == r.end()) { -      p.logeq(log(rp0(rule))); -    } else { -      p.logeq(it->second.logprob(rule, log(rp0(rule)))); -    } -    return p; -  } - -  prob_t Likelihood() const { -    prob_t p = prob_t::One(); -    for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) { -      prob_t q; q.logeq(it->second.log_crp_prob()); -      p *= q; -      for (CCRP_NoTable<TRule>::const_iterator i2 = it->second.begin(); i2 != it->second.end(); ++i2) -        p *= rp0(i2->first); -    } -    return p; -  } - -  const ConditionalBaseMeasure& rp0; -  typedef std::tr1::unordered_map<std::vector<WordID>, -                                  CCRP_NoTable<TRule>, -                                  boost::hash<std::vector<WordID> > > RuleModelHash; -  RuleModelHash r; -}; - -template <typename ConditionalBaseMeasure> -struct ConditionalParallelSegementationModel { -  explicit ConditionalParallelSegementationModel(ConditionalBaseMeasure& rcp0) : -    tmodel(rcp0), base(prob_t::One()), aligns(1,1) {} - -  ConditionalTranslationModel<ConditionalBaseMeasure> tmodel; - -  void DecrementRule(const TRule& rule) { -    tmodel.DecrementRule(rule); -  } - -  void IncrementRule(const TRule& rule) { -    tmodel.IncrementRule(rule); -  } - -  void IncrementRulesAndAlignments(const std::vector<TRulePtr>& rules) { -    tmodel.IncrementRules(rules); -    for (int i = 0; i < rules.size(); ++i) { -      IncrementAlign(rules[i]->f_.size()); -    } -  } - -  void DecrementRulesAndAlignments(const std::vector<TRulePtr>& rules) { -    tmodel.DecrementRules(rules); -    for (int i = 0; i < rules.size(); ++i) { -      DecrementAlign(rules[i]->f_.size()); -    } -  } - -  prob_t RuleProbability(const TRule& rule) const { -    return tmodel.RuleProbability(rule); -  } - -  void IncrementAlign(unsigned span) { -    if (aligns.increment(span)) { -      // TODO -    } -  } - -  void DecrementAlign(unsigned span) { -    if (aligns.decrement(span)) { -      // TODO -    } -  } - -  prob_t AlignProbability(unsigned span) const { -    prob_t p; -    p.logeq(aligns.logprob(span, Md::log_poisson(span, 1.0))); -    return p; -  } - -  prob_t Likelihood() const { -    prob_t p; p.logeq(aligns.log_crp_prob()); -    p *= base; -    p *= tmodel.Likelihood(); -    return p; -  } - -  prob_t base; -  CCRP_NoTable<unsigned> aligns; -}; - -#endif - diff --git a/gi/pf/condnaive.cc b/gi/pf/condnaive.cc deleted file mode 100644 index 419731ac..00000000 --- a/gi/pf/condnaive.cc +++ /dev/null @@ -1,298 +0,0 @@ -#include <iostream> -#include <tr1/memory> -#include <queue> - -#include <boost/multi_array.hpp> -#include <boost/program_options.hpp> -#include <boost/program_options/variables_map.hpp> - -#include "base_distributions.h" -#include "monotonic_pseg.h" -#include "conditional_pseg.h" -#include "trule.h" -#include "tdict.h" -#include "filelib.h" -#include "dict.h" -#include "sampler.h" -#include "ccrp_nt.h" -#include "corpus.h" - -using namespace std; -using namespace std::tr1; -namespace po = boost::program_options; - -static unsigned kMAX_SRC_PHRASE; -static unsigned kMAX_TRG_PHRASE; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { -  po::options_description opts("Configuration options"); -  opts.add_options() -        ("samples,s",po::value<unsigned>()->default_value(1000),"Number of samples") -        ("input,i",po::value<string>(),"Read parallel data from") -        ("max_src_phrase",po::value<unsigned>()->default_value(4),"Maximum length of source language phrases") -        ("max_trg_phrase",po::value<unsigned>()->default_value(4),"Maximum length of target language phrases") -        ("model1,m",po::value<string>(),"Model 1 parameters (used in base distribution)") -        ("model1_interpolation_weight",po::value<double>()->default_value(0.95),"Mixing proportion of model 1 with uniform target distribution") -        ("random_seed,S",po::value<uint32_t>(), "Random seed"); -  po::options_description clo("Command line options"); -  clo.add_options() -        ("config", po::value<string>(), "Configuration file") -        ("help,h", "Print this help message and exit"); -  po::options_description dconfig_options, dcmdline_options; -  dconfig_options.add(opts); -  dcmdline_options.add(opts).add(clo); -   -  po::store(parse_command_line(argc, argv, dcmdline_options), *conf); -  if (conf->count("config")) { -    ifstream config((*conf)["config"].as<string>().c_str()); -    po::store(po::parse_config_file(config, dconfig_options), *conf); -  } -  po::notify(*conf); - -  if (conf->count("help") || (conf->count("input") == 0)) { -    cerr << dcmdline_options << endl; -    exit(1); -  } -} - -boost::shared_ptr<MT19937> prng; - -struct ModelAndData { -  explicit ModelAndData(ConditionalParallelSegementationModel<PhraseConditionalBase>& m, const vector<vector<int> >& ce, const vector<vector<int> >& cf, const set<int>& ve, const set<int>& vf) : -     model(m), -     rng(&*prng), -     corpuse(ce), -     corpusf(cf), -     vocabe(ve), -     vocabf(vf), -     mh_samples(), -     mh_rejects(), -     kX(-TD::Convert("X")), -     derivations(corpuse.size()) {} - -  void ResampleHyperparameters() { -  } - -  void InstantiateRule(const pair<short,short>& from, -                       const pair<short,short>& to, -                       const vector<int>& sentf, -                       const vector<int>& sente, -                       TRule* rule) const { -    rule->f_.clear(); -    rule->e_.clear(); -    rule->lhs_ = kX; -    for (short i = from.first; i < to.first; ++i) -      rule->f_.push_back(sentf[i]); -    for (short i = from.second; i < to.second; ++i) -      rule->e_.push_back(sente[i]); -  } - -  void DecrementDerivation(const vector<pair<short,short> >& d, const vector<int>& sentf, const vector<int>& sente) { -    if (d.size() < 2) return; -    TRule x; -    for (int i = 1; i < d.size(); ++i) { -      InstantiateRule(d[i], d[i-1], sentf, sente, &x); -      model.DecrementRule(x); -      model.DecrementAlign(x.f_.size()); -    } -  } - -  void PrintDerivation(const vector<pair<short,short> >& d, const vector<int>& sentf, const vector<int>& sente) { -    if (d.size() < 2) return; -    TRule x; -    for (int i = 1; i < d.size(); ++i) { -      InstantiateRule(d[i], d[i-1], sentf, sente, &x); -      cerr << i << '/' << (d.size() - 1) << ": " << x << endl; -    } -  } - -  void IncrementDerivation(const vector<pair<short,short> >& d, const vector<int>& sentf, const vector<int>& sente) { -    if (d.size() < 2) return; -    TRule x; -    for (int i = 1; i < d.size(); ++i) { -      InstantiateRule(d[i], d[i-1], sentf, sente, &x); -      model.IncrementRule(x); -      model.IncrementAlign(x.f_.size()); -    } -  } - -  prob_t Likelihood() const { -    return model.Likelihood(); -  } - -  prob_t DerivationProposalProbability(const vector<pair<short,short> >& d, const vector<int>& sentf, const vector<int>& sente) const { -    prob_t p = prob_t::One(); -    TRule x; -    for (int i = 1; i < d.size(); ++i) { -      InstantiateRule(d[i], d[i-1], sentf, sente, &x); -      p *= model.RuleProbability(x); -      p *= model.AlignProbability(x.f_.size()); -    } -    return p; -  } - -  void Sample(); - -  ConditionalParallelSegementationModel<PhraseConditionalBase>& model; -  MT19937* rng; -  const vector<vector<int> >& corpuse, corpusf; -  const set<int>& vocabe, vocabf; -  unsigned mh_samples, mh_rejects; -  const int kX; -  vector<vector<pair<short, short> > > derivations; -}; - -void ModelAndData::Sample() { -  unsigned MAXK = kMAX_SRC_PHRASE; -  unsigned MAXL = kMAX_TRG_PHRASE; -  TRule x; -  x.lhs_ = -TD::Convert("X"); - -  for (int samples = 0; samples < 1000; ++samples) { -    if (samples % 1 == 0 && samples > 0) { -      //ResampleHyperparameters(); -      cerr << " [" << samples << " LLH=" << log(Likelihood()) << " MH=" << ((double)mh_rejects / mh_samples) << "]\n"; -      for (int i = 0; i < 10; ++i) { -        cerr << "SENTENCE: " << TD::GetString(corpusf[i]) << " ||| " << TD::GetString(corpuse[i]) << endl; -        PrintDerivation(derivations[i], corpusf[i], corpuse[i]); -      } -      static TRule xx("[X] ||| w n ||| s h ||| X=0"); -      const CCRP_NoTable<TRule>& dcrp = model.tmodel.r.find(xx.f_)->second; -      for (CCRP_NoTable<TRule>::const_iterator it = dcrp.begin(); it != dcrp.end(); ++it) { -        cerr << "\t" << it->second << "\t" << it->first << endl; -      } -    } -    cerr << '.' << flush; -    for (int s = 0; s < corpuse.size(); ++s) { -      const vector<int>& sentf = corpusf[s]; -      const vector<int>& sente = corpuse[s]; -//      cerr << "  CUSTOMERS: " << rules.num_customers() << endl; -//      cerr << "SENTENCE: " << TD::GetString(sentf) << " ||| " << TD::GetString(sente) << endl; - -      vector<pair<short, short> >& deriv = derivations[s]; -      const prob_t p_cur = Likelihood(); -      DecrementDerivation(deriv, sentf, sente); - -      boost::multi_array<prob_t, 2> a(boost::extents[sentf.size() + 1][sente.size() + 1]); -      boost::multi_array<prob_t, 4> trans(boost::extents[sentf.size() + 1][sente.size() + 1][MAXK][MAXL]); -      a[0][0] = prob_t::One(); -      for (int i = 0; i < sentf.size(); ++i) { -        for (int j = 0; j < sente.size(); ++j) { -          const prob_t src_a = a[i][j]; -          x.f_.clear(); -          for (int k = 1; k <= MAXK; ++k) { -            if (i + k > sentf.size()) break; -            x.f_.push_back(sentf[i + k - 1]); -            x.e_.clear(); -            const prob_t p_span = model.AlignProbability(k);  // prob of consuming this much source -            for (int l = 1; l <= MAXL; ++l) { -              if (j + l > sente.size()) break; -              x.e_.push_back(sente[j + l - 1]); -              trans[i][j][k - 1][l - 1] = model.RuleProbability(x) * p_span; -              a[i + k][j + l] += src_a * trans[i][j][k - 1][l - 1]; -            } -          } -        } -      } -//      cerr << "Inside: " << log(a[sentf.size()][sente.size()]) << endl; -      const prob_t q_cur = DerivationProposalProbability(deriv, sentf, sente); - -      vector<pair<short,short> > newderiv; -      int cur_i = sentf.size(); -      int cur_j = sente.size(); -      while(cur_i > 0 && cur_j > 0) { -        newderiv.push_back(pair<short,short>(cur_i, cur_j)); -//        cerr << "NODE: (" << cur_i << "," << cur_j << ")\n"; -        SampleSet<prob_t> ss; -        vector<pair<short,short> > nexts; -        for (int k = 1; k <= MAXK; ++k) { -          const int hyp_i = cur_i - k; -          if (hyp_i < 0) break; -          for (int l = 1; l <= MAXL; ++l) { -            const int hyp_j = cur_j - l; -            if (hyp_j < 0) break; -            const prob_t& inside = a[hyp_i][hyp_j]; -            if (inside == prob_t::Zero()) continue; -            const prob_t& transp = trans[hyp_i][hyp_j][k - 1][l - 1]; -            if (transp == prob_t::Zero()) continue; -            const prob_t p = inside * transp; -            ss.add(p); -            nexts.push_back(pair<short,short>(hyp_i, hyp_j)); -//            cerr << "    (" << hyp_i << "," << hyp_j << ")  <--- " << log(p) << endl; -          } -        } -//        cerr << "  sample set has " << nexts.size() << " elements.\n"; -        const int selected = rng->SelectSample(ss); -        cur_i = nexts[selected].first; -        cur_j = nexts[selected].second; -      } -      newderiv.push_back(pair<short,short>(0,0)); -      const prob_t q_new = DerivationProposalProbability(newderiv, sentf, sente); -      IncrementDerivation(newderiv, sentf, sente); -//      cerr << "SANITY: " << q_new << "  " <<log(DerivationProposalProbability(newderiv, sentf, sente)) << endl; -      if (deriv.empty()) { deriv = newderiv; continue; } -      ++mh_samples; - -      if (deriv != newderiv) { -        const prob_t p_new = Likelihood(); -//        cerr << "p_cur=" << log(p_cur) << "\t p_new=" << log(p_new) << endl; -//        cerr << "q_cur=" << log(q_cur) << "\t q_new=" << log(q_new) << endl; -        if (!rng->AcceptMetropolisHastings(p_new, p_cur, q_new, q_cur)) { -          ++mh_rejects; -          DecrementDerivation(newderiv, sentf, sente); -          IncrementDerivation(deriv, sentf, sente); -        } else { -//          cerr << "  ACCEPT\n"; -          deriv = newderiv; -        } -      } -    } -  } -} - -int main(int argc, char** argv) { -  po::variables_map conf; -  InitCommandLine(argc, argv, &conf); -  kMAX_TRG_PHRASE = conf["max_trg_phrase"].as<unsigned>(); -  kMAX_SRC_PHRASE = conf["max_src_phrase"].as<unsigned>(); - -  if (!conf.count("model1")) { -    cerr << argv[0] << "Please use --model1 to specify model 1 parameters\n"; -    return 1; -  } -  if (conf.count("random_seed")) -    prng.reset(new MT19937(conf["random_seed"].as<uint32_t>())); -  else -    prng.reset(new MT19937); -//  MT19937& rng = *prng; - -  vector<vector<int> > corpuse, corpusf; -  set<int> vocabe, vocabf; -  corpus::ReadParallelCorpus(conf["input"].as<string>(), &corpusf, &corpuse, &vocabf, &vocabe); -  cerr << "f-Corpus size: " << corpusf.size() << " sentences\n"; -  cerr << "f-Vocabulary size: " << vocabf.size() << " types\n"; -  cerr << "f-Corpus size: " << corpuse.size() << " sentences\n"; -  cerr << "f-Vocabulary size: " << vocabe.size() << " types\n"; -  assert(corpusf.size() == corpuse.size()); - -  Model1 m1(conf["model1"].as<string>()); - -  PhraseConditionalBase pcb0(m1, conf["model1_interpolation_weight"].as<double>(), vocabe.size()); -  ConditionalParallelSegementationModel<PhraseConditionalBase> x(pcb0);   - -  ModelAndData posterior(x, corpuse, corpusf, vocabe, vocabf); -  posterior.Sample(); - -  TRule r1("[X] ||| x ||| l e ||| X=0"); -  TRule r2("[X] ||| A ||| a d ||| X=0"); -  TRule r3("[X] ||| n ||| e r ||| X=0"); -  TRule r4("[X] ||| x A n ||| b l a g ||| X=0"); - -  PhraseConditionalUninformativeBase u0(vocabe.size()); - -  cerr << (pcb0(r1)*pcb0(r2)*pcb0(r3)) << endl; -  cerr << (u0(r4)) << endl; - -  return 0; -} - diff --git a/gi/pf/corpus.cc b/gi/pf/corpus.cc deleted file mode 100644 index cb6e4ed7..00000000 --- a/gi/pf/corpus.cc +++ /dev/null @@ -1,62 +0,0 @@ -#include "corpus.h" - -#include <set> -#include <vector> -#include <string> - -#include "tdict.h" -#include "filelib.h" - -using namespace std; - -namespace corpus { - -void ReadParallelCorpus(const string& filename, -                vector<vector<WordID> >* f, -                vector<vector<WordID> >* e, -                set<WordID>* vocab_f, -                set<WordID>* vocab_e) { -  f->clear(); -  e->clear(); -  vocab_f->clear(); -  vocab_e->clear(); -  ReadFile rf(filename); -  istream* in = rf.stream(); -  assert(*in); -  string line; -  unsigned lc = 0; -  const WordID kDIV = TD::Convert("|||"); -  vector<WordID> tmp; -  while(getline(*in, line)) { -    ++lc; -    e->push_back(vector<int>()); -    f->push_back(vector<int>()); -    vector<int>& le = e->back(); -    vector<int>& lf = f->back(); -    tmp.clear(); -    TD::ConvertSentence(line, &tmp); -    bool isf = true; -    for (unsigned i = 0; i < tmp.size(); ++i) { -      const int cur = tmp[i]; -      if (isf) { -        if (kDIV == cur) { -          isf = false; -        } else { -          lf.push_back(cur); -          vocab_f->insert(cur); -        } -      } else { -        if (cur == kDIV) { -          cerr << "ERROR in " << lc << ": " << line << endl << endl; -          abort(); -        } -        le.push_back(cur); -        vocab_e->insert(cur); -      } -    } -    assert(isf == false); -  } -} - -} - diff --git a/gi/pf/corpus.h b/gi/pf/corpus.h deleted file mode 100644 index e7febdb7..00000000 --- a/gi/pf/corpus.h +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef _CORPUS_H_ -#define _CORPUS_H_ - -#include <string> -#include <vector> -#include <set> -#include "wordid.h" - -namespace corpus { - -void ReadParallelCorpus(const std::string& filename, -                std::vector<std::vector<WordID> >* f, -                std::vector<std::vector<WordID> >* e, -                std::set<WordID>* vocab_f, -                std::set<WordID>* vocab_e); - -} - -#endif diff --git a/gi/pf/dpnaive.cc b/gi/pf/dpnaive.cc deleted file mode 100644 index 75ccad72..00000000 --- a/gi/pf/dpnaive.cc +++ /dev/null @@ -1,301 +0,0 @@ -#include <iostream> -#include <tr1/memory> -#include <queue> - -#include <boost/multi_array.hpp> -#include <boost/program_options.hpp> -#include <boost/program_options/variables_map.hpp> - -#include "base_distributions.h" -#include "monotonic_pseg.h" -#include "trule.h" -#include "tdict.h" -#include "filelib.h" -#include "dict.h" -#include "sampler.h" -#include "ccrp_nt.h" -#include "corpus.h" - -using namespace std; -using namespace std::tr1; -namespace po = boost::program_options; - -static unsigned kMAX_SRC_PHRASE; -static unsigned kMAX_TRG_PHRASE; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { -  po::options_description opts("Configuration options"); -  opts.add_options() -        ("samples,s",po::value<unsigned>()->default_value(1000),"Number of samples") -        ("input,i",po::value<string>(),"Read parallel data from") -        ("max_src_phrase",po::value<unsigned>()->default_value(4),"Maximum length of source language phrases") -        ("max_trg_phrase",po::value<unsigned>()->default_value(4),"Maximum length of target language phrases") -        ("model1,m",po::value<string>(),"Model 1 parameters (used in base distribution)") -        ("inverse_model1,M",po::value<string>(),"Inverse Model 1 parameters (used in base distribution)") -        ("model1_interpolation_weight",po::value<double>()->default_value(0.95),"Mixing proportion of model 1 with uniform target distribution") -        ("random_seed,S",po::value<uint32_t>(), "Random seed"); -  po::options_description clo("Command line options"); -  clo.add_options() -        ("config", po::value<string>(), "Configuration file") -        ("help,h", "Print this help message and exit"); -  po::options_description dconfig_options, dcmdline_options; -  dconfig_options.add(opts); -  dcmdline_options.add(opts).add(clo); -   -  po::store(parse_command_line(argc, argv, dcmdline_options), *conf); -  if (conf->count("config")) { -    ifstream config((*conf)["config"].as<string>().c_str()); -    po::store(po::parse_config_file(config, dconfig_options), *conf); -  } -  po::notify(*conf); - -  if (conf->count("help") || (conf->count("input") == 0)) { -    cerr << dcmdline_options << endl; -    exit(1); -  } -} - -boost::shared_ptr<MT19937> prng; - -template <typename Base> -struct ModelAndData { -  explicit ModelAndData(MonotonicParallelSegementationModel<PhraseJointBase_BiDir>& m, const Base& b, const vector<vector<int> >& ce, const vector<vector<int> >& cf, const set<int>& ve, const set<int>& vf) : -     model(m), -     rng(&*prng), -     p0(b), -     baseprob(prob_t::One()), -     corpuse(ce), -     corpusf(cf), -     vocabe(ve), -     vocabf(vf), -     mh_samples(), -     mh_rejects(), -     kX(-TD::Convert("X")), -     derivations(corpuse.size()) {} - -  void ResampleHyperparameters() { -  } - -  void InstantiateRule(const pair<short,short>& from, -                       const pair<short,short>& to, -                       const vector<int>& sentf, -                       const vector<int>& sente, -                       TRule* rule) const { -    rule->f_.clear(); -    rule->e_.clear(); -    rule->lhs_ = kX; -    for (short i = from.first; i < to.first; ++i) -      rule->f_.push_back(sentf[i]); -    for (short i = from.second; i < to.second; ++i) -      rule->e_.push_back(sente[i]); -  } - -  void DecrementDerivation(const vector<pair<short,short> >& d, const vector<int>& sentf, const vector<int>& sente) { -    if (d.size() < 2) return; -    TRule x; -    for (int i = 1; i < d.size(); ++i) { -      InstantiateRule(d[i], d[i-1], sentf, sente, &x); -      model.DecrementRule(x); -      model.DecrementContinue(); -    } -    model.DecrementStop(); -  } - -  void PrintDerivation(const vector<pair<short,short> >& d, const vector<int>& sentf, const vector<int>& sente) { -    if (d.size() < 2) return; -    TRule x; -    for (int i = 1; i < d.size(); ++i) { -      InstantiateRule(d[i], d[i-1], sentf, sente, &x); -      cerr << i << '/' << (d.size() - 1) << ": " << x << endl; -    } -  } - -  void IncrementDerivation(const vector<pair<short,short> >& d, const vector<int>& sentf, const vector<int>& sente) { -    if (d.size() < 2) return; -    TRule x; -    for (int i = 1; i < d.size(); ++i) { -      InstantiateRule(d[i], d[i-1], sentf, sente, &x); -      model.IncrementRule(x); -      model.IncrementContinue(); -    } -    model.IncrementStop(); -  } - -  prob_t Likelihood() const { -    return model.Likelihood(); -  } - -  prob_t DerivationProposalProbability(const vector<pair<short,short> >& d, const vector<int>& sentf, const vector<int>& sente) const { -    prob_t p = model.StopProbability(); -    if (d.size() < 2) return p; -    TRule x; -    const prob_t p_cont = model.ContinueProbability(); -    for (int i = 1; i < d.size(); ++i) { -      InstantiateRule(d[i], d[i-1], sentf, sente, &x); -      p *= p_cont; -      p *= model.RuleProbability(x); -    } -    return p; -  } - -  void Sample(); - -  MonotonicParallelSegementationModel<PhraseJointBase_BiDir>& model; -  MT19937* rng; -  const Base& p0; -  prob_t baseprob; // cached value of generating the table table labels from p0 -                   // this can't be used if we go to a hierarchical prior! -  const vector<vector<int> >& corpuse, corpusf; -  const set<int>& vocabe, vocabf; -  unsigned mh_samples, mh_rejects; -  const int kX; -  vector<vector<pair<short, short> > > derivations; -}; - -template <typename Base> -void ModelAndData<Base>::Sample() { -  unsigned MAXK = kMAX_SRC_PHRASE; -  unsigned MAXL = kMAX_TRG_PHRASE; -  TRule x; -  x.lhs_ = -TD::Convert("X"); -  for (int samples = 0; samples < 1000; ++samples) { -    if (samples % 1 == 0 && samples > 0) { -      //ResampleHyperparameters(); -      cerr << " [" << samples << " LLH=" << log(Likelihood()) << " MH=" << ((double)mh_rejects / mh_samples) << "]\n"; -      for (int i = 0; i < 10; ++i) { -        cerr << "SENTENCE: " << TD::GetString(corpusf[i]) << " ||| " << TD::GetString(corpuse[i]) << endl; -        PrintDerivation(derivations[i], corpusf[i], corpuse[i]); -      } -    } -    cerr << '.' << flush; -    for (int s = 0; s < corpuse.size(); ++s) { -      const vector<int>& sentf = corpusf[s]; -      const vector<int>& sente = corpuse[s]; -//      cerr << "  CUSTOMERS: " << rules.num_customers() << endl; -//      cerr << "SENTENCE: " << TD::GetString(sentf) << " ||| " << TD::GetString(sente) << endl; - -      vector<pair<short, short> >& deriv = derivations[s]; -      const prob_t p_cur = Likelihood(); -      DecrementDerivation(deriv, sentf, sente); - -      boost::multi_array<prob_t, 2> a(boost::extents[sentf.size() + 1][sente.size() + 1]); -      boost::multi_array<prob_t, 4> trans(boost::extents[sentf.size() + 1][sente.size() + 1][MAXK][MAXL]); -      a[0][0] = prob_t::One(); -      const prob_t q_stop = model.StopProbability(); -      const prob_t q_cont = model.ContinueProbability(); -      for (int i = 0; i < sentf.size(); ++i) { -        for (int j = 0; j < sente.size(); ++j) { -          const prob_t src_a = a[i][j]; -          x.f_.clear(); -          for (int k = 1; k <= MAXK; ++k) { -            if (i + k > sentf.size()) break; -            x.f_.push_back(sentf[i + k - 1]); -            x.e_.clear(); -            for (int l = 1; l <= MAXL; ++l) { -              if (j + l > sente.size()) break; -              x.e_.push_back(sente[j + l - 1]); -              const bool stop_now = ((j + l) == sente.size()) && ((i + k) == sentf.size()); -              const prob_t& cp = stop_now ? q_stop : q_cont; -              trans[i][j][k - 1][l - 1] = model.RuleProbability(x) * cp; -              a[i + k][j + l] += src_a * trans[i][j][k - 1][l - 1]; -            } -          } -        } -      } -//      cerr << "Inside: " << log(a[sentf.size()][sente.size()]) << endl; -      const prob_t q_cur = DerivationProposalProbability(deriv, sentf, sente); - -      vector<pair<short,short> > newderiv; -      int cur_i = sentf.size(); -      int cur_j = sente.size(); -      while(cur_i > 0 && cur_j > 0) { -        newderiv.push_back(pair<short,short>(cur_i, cur_j)); -//        cerr << "NODE: (" << cur_i << "," << cur_j << ")\n"; -        SampleSet<prob_t> ss; -        vector<pair<short,short> > nexts; -        for (int k = 1; k <= MAXK; ++k) { -          const int hyp_i = cur_i - k; -          if (hyp_i < 0) break; -          for (int l = 1; l <= MAXL; ++l) { -            const int hyp_j = cur_j - l; -            if (hyp_j < 0) break; -            const prob_t& inside = a[hyp_i][hyp_j]; -            if (inside == prob_t::Zero()) continue; -            const prob_t& transp = trans[hyp_i][hyp_j][k - 1][l - 1]; -            if (transp == prob_t::Zero()) continue; -            const prob_t p = inside * transp; -            ss.add(p); -            nexts.push_back(pair<short,short>(hyp_i, hyp_j)); -//            cerr << "    (" << hyp_i << "," << hyp_j << ")  <--- " << log(p) << endl; -          } -        } -//        cerr << "  sample set has " << nexts.size() << " elements.\n"; -        const int selected = rng->SelectSample(ss); -        cur_i = nexts[selected].first; -        cur_j = nexts[selected].second; -      } -      newderiv.push_back(pair<short,short>(0,0)); -      const prob_t q_new = DerivationProposalProbability(newderiv, sentf, sente); -      IncrementDerivation(newderiv, sentf, sente); -//      cerr << "SANITY: " << q_new << "  " <<log(DerivationProposalProbability(newderiv, sentf, sente)) << endl; -      if (deriv.empty()) { deriv = newderiv; continue; } -      ++mh_samples; - -      if (deriv != newderiv) { -        const prob_t p_new = Likelihood(); -//        cerr << "p_cur=" << log(p_cur) << "\t p_new=" << log(p_new) << endl; -//        cerr << "q_cur=" << log(q_cur) << "\t q_new=" << log(q_new) << endl; -        if (!rng->AcceptMetropolisHastings(p_new, p_cur, q_new, q_cur)) { -          ++mh_rejects; -          DecrementDerivation(newderiv, sentf, sente); -          IncrementDerivation(deriv, sentf, sente); -        } else { -//          cerr << "  ACCEPT\n"; -          deriv = newderiv; -        } -      } -    } -  } -} - -int main(int argc, char** argv) { -  po::variables_map conf; -  InitCommandLine(argc, argv, &conf); -  kMAX_TRG_PHRASE = conf["max_trg_phrase"].as<unsigned>(); -  kMAX_SRC_PHRASE = conf["max_src_phrase"].as<unsigned>(); - -  if (!conf.count("model1")) { -    cerr << argv[0] << "Please use --model1 to specify model 1 parameters\n"; -    return 1; -  } -  if (!conf.count("inverse_model1")) { -    cerr << argv[0] << "Please use --inverse_model1 to specify inverse model 1 parameters\n"; -    return 1; -  } -  if (conf.count("random_seed")) -    prng.reset(new MT19937(conf["random_seed"].as<uint32_t>())); -  else -    prng.reset(new MT19937); -//  MT19937& rng = *prng; - -  vector<vector<int> > corpuse, corpusf; -  set<int> vocabe, vocabf; -  corpus::ReadParallelCorpus(conf["input"].as<string>(), &corpusf, &corpuse, &vocabf, &vocabe); -  cerr << "f-Corpus size: " << corpusf.size() << " sentences\n"; -  cerr << "f-Vocabulary size: " << vocabf.size() << " types\n"; -  cerr << "f-Corpus size: " << corpuse.size() << " sentences\n"; -  cerr << "f-Vocabulary size: " << vocabe.size() << " types\n"; -  assert(corpusf.size() == corpuse.size()); - -  Model1 m1(conf["model1"].as<string>()); -  Model1 invm1(conf["inverse_model1"].as<string>()); -//  PhraseJointBase lp0(m1, conf["model1_interpolation_weight"].as<double>(), vocabe.size(), vocabf.size()); -  PhraseJointBase_BiDir alp0(m1, invm1, conf["model1_interpolation_weight"].as<double>(), vocabe.size(), vocabf.size()); -  MonotonicParallelSegementationModel<PhraseJointBase_BiDir> m(alp0); - -  ModelAndData<PhraseJointBase_BiDir> posterior(m, alp0, corpuse, corpusf, vocabe, vocabf); -  posterior.Sample(); - -  return 0; -} - diff --git a/gi/pf/guess-translits.pl b/gi/pf/guess-translits.pl deleted file mode 100755 index d00c2168..00000000 --- a/gi/pf/guess-translits.pl +++ /dev/null @@ -1,72 +0,0 @@ -#!/usr/bin/perl -w -use strict; -use utf8; - -my $MIN_PMI = -3; - -my %fs; -my %es; -my %ef; - -die "Usage: $0 < input.utf8.txt\n" if scalar @ARGV > 0; - -binmode(STDIN,":utf8"); -binmode(STDOUT,":utf8"); -binmode(STDERR,":utf8"); - -my $tot = 0; -print STDERR "Reading alignments from STDIN ...\n"; -while(<STDIN>) { -  chomp; -  my ($fsent, $esent, $alsent) = split / \|\|\| /; -  die "Format should be 'foreign sentence ||| english sentence ||| 0-0 1-1 ...'\n" unless defined $fsent && defined $esent && defined $alsent; - -  my @fws = split /\s+/, $fsent;   -  my @ews = split /\s+/, $esent; -  my @as = split /\s+/, $alsent; -  my %a2b; -  my %b2a; -  for my $ap (@as) { -    my ($a,$b) = split /-/, $ap; -    die "BAD INPUT: $_\n" unless defined $a && defined $b; -    $a2b{$a}->{$b} = 1; -    $b2a{$b}->{$a} = 1; -  } -  for my $a (keys %a2b) { -    my $bref = $a2b{$a}; -    next unless scalar keys %$bref < 2; -    my $b = (keys %$bref)[0]; -    next unless scalar keys %{$b2a{$b}} < 2; -    my $f = $fws[$a]; -    next unless defined $f; -    next unless length($f) > 3; -    my $e = $ews[$b]; -    next unless defined $e; -    next unless length($e) > 3; - -    $ef{$f}->{$e}++; -    $es{$e}++; -    $fs{$f}++; -    $tot++; -  }   -} -my $ltot = log($tot); -my $num = 0; -print STDERR "Extracting pairs for PMI > $MIN_PMI ...\n"; -for my $f (keys %fs) { -  my $logf = log($fs{$f}); -  my $esref = $ef{$f}; -  for my $e (keys %$esref) { -    my $loge = log($es{$e}); -    my $ef = $esref->{$e}; -    my $logef = log($ef); -    my $pmi = $logef - ($loge + $logf); -    next if $pmi < $MIN_PMI; -    my @flets = split //, $f; -    my @elets = split //, $e; -    print "@flets ||| @elets\n"; -    $num++; -  } -} -print STDERR "Extracted $num pairs.\n"; -print STDERR "Recommend running:\n   ../../training/model1 -v -d -t -99999 output.txt\n"; diff --git a/gi/pf/hpyp_tm.cc b/gi/pf/hpyp_tm.cc deleted file mode 100644 index f362d3f8..00000000 --- a/gi/pf/hpyp_tm.cc +++ /dev/null @@ -1,133 +0,0 @@ -#include "hpyp_tm.h" - -#include <tr1/unordered_map> -#include <iostream> -#include <queue> - -#include "tdict.h" -#include "ccrp.h" -#include "pyp_word_model.h" -#include "tied_resampler.h" - -using namespace std; -using namespace std::tr1; - -struct FreqBinner { -  FreqBinner(const std::string& fname) { fd_.Load(fname); } -  unsigned NumberOfBins() const { return fd_.Max() + 1; } -  unsigned Bin(const WordID& w) const { return fd_.LookUp(w); } -  FreqDict<unsigned> fd_; -}; - -template <typename Base, class Binner = FreqBinner> -struct ConditionalPYPWordModel { -  ConditionalPYPWordModel(Base* b, const Binner* bnr = NULL) : -      base(*b), -      binner(bnr), -      btr(binner ? binner->NumberOfBins() + 1u : 2u) {} - -  void Summary() const { -    cerr << "Number of conditioning contexts: " << r.size() << endl; -    for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) { -      cerr << TD::Convert(it->first) << "   \tPYP(d=" << it->second.discount() << ",s=" << it->second.strength() << ") --------------------------" << endl; -      for (CCRP<vector<WordID> >::const_iterator i2 = it->second.begin(); i2 != it->second.end(); ++i2) -        cerr << "   " << i2->second << endl; -    } -  } - -  void ResampleHyperparameters(MT19937* rng) { -    btr.ResampleHyperparameters(rng); -  }  - -  prob_t Prob(const WordID src, const vector<WordID>& trglets) const { -    RuleModelHash::const_iterator it = r.find(src); -    if (it == r.end()) { -      return base(trglets); -    } else { -      return it->second.prob(trglets, base(trglets)); -    } -  } - -  void Increment(const WordID src, const vector<WordID>& trglets, MT19937* rng) { -    RuleModelHash::iterator it = r.find(src); -    if (it == r.end()) { -      it = r.insert(make_pair(src, CCRP<vector<WordID> >(0.5,1.0))).first; -      static const WordID kNULL = TD::Convert("NULL"); -      unsigned bin = (src == kNULL ? 0 : 1); -      if (binner && bin) { bin = binner->Bin(src) + 1; } -      btr.Add(bin, &it->second); -    } -    if (it->second.increment(trglets, base(trglets), rng)) -      base.Increment(trglets, rng); -  } - -  void Decrement(const WordID src, const vector<WordID>& trglets, MT19937* rng) { -    RuleModelHash::iterator it = r.find(src); -    assert(it != r.end()); -    if (it->second.decrement(trglets, rng)) { -      base.Decrement(trglets, rng); -    } -  } - -  prob_t Likelihood() const { -    prob_t p = prob_t::One(); -    for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) { -      prob_t q; q.logeq(it->second.log_crp_prob()); -      p *= q; -    } -    return p; -  } - -  unsigned UniqueConditioningContexts() const { -    return r.size(); -  } - -  // TODO tie PYP hyperparameters based on source word frequency bins -  Base& base; -  const Binner* binner; -  BinTiedResampler<CCRP<vector<WordID> > > btr; -  typedef unordered_map<WordID, CCRP<vector<WordID> > > RuleModelHash; -  RuleModelHash r; -}; - -HPYPLexicalTranslation::HPYPLexicalTranslation(const vector<vector<WordID> >& lets, -                                               const unsigned vocab_size, -                                               const unsigned num_letters) : -    letters(lets), -    base(vocab_size, num_letters, 5), -    up0(new PYPWordModel<PoissonUniformWordModel>(&base)), -    tmodel(new ConditionalPYPWordModel<PYPWordModel<PoissonUniformWordModel> >(up0, new FreqBinner("10k.freq"))), -    kX(-TD::Convert("X")) {} - -void HPYPLexicalTranslation::Summary() const { -  tmodel->Summary(); -  up0->Summary(); -} - -prob_t HPYPLexicalTranslation::Likelihood() const { -  prob_t p = up0->Likelihood(); -  p *= tmodel->Likelihood(); -  return p; -} - -void HPYPLexicalTranslation::ResampleHyperparameters(MT19937* rng) { -  tmodel->ResampleHyperparameters(rng); -  up0->ResampleHyperparameters(rng); -} - -unsigned HPYPLexicalTranslation::UniqueConditioningContexts() const { -  return tmodel->UniqueConditioningContexts(); -} - -prob_t HPYPLexicalTranslation::Prob(WordID src, WordID trg) const { -  return tmodel->Prob(src, letters[trg]); -} - -void HPYPLexicalTranslation::Increment(WordID src, WordID trg, MT19937* rng) { -  tmodel->Increment(src, letters[trg], rng); -} - -void HPYPLexicalTranslation::Decrement(WordID src, WordID trg, MT19937* rng) { -  tmodel->Decrement(src, letters[trg], rng); -} - diff --git a/gi/pf/hpyp_tm.h b/gi/pf/hpyp_tm.h deleted file mode 100644 index af3215ba..00000000 --- a/gi/pf/hpyp_tm.h +++ /dev/null @@ -1,38 +0,0 @@ -#ifndef HPYP_LEX_TRANS -#define HPYP_LEX_TRANS - -#include <vector> -#include "wordid.h" -#include "prob.h" -#include "sampler.h" -#include "freqdict.h" -#include "poisson_uniform_word_model.h" - -struct FreqBinner; -template <class B> struct PYPWordModel; -template <typename T, class B> struct ConditionalPYPWordModel; - -struct HPYPLexicalTranslation { -  explicit HPYPLexicalTranslation(const std::vector<std::vector<WordID> >& lets, -                                 const unsigned vocab_size, -                                 const unsigned num_letters); - -  prob_t Likelihood() const; - -  void ResampleHyperparameters(MT19937* rng); -  prob_t Prob(WordID src, WordID trg) const;  // return p(trg | src) -  void Summary() const; -  void Increment(WordID src, WordID trg, MT19937* rng); -  void Decrement(WordID src, WordID trg, MT19937* rng); -  unsigned UniqueConditioningContexts() const; - - private: -  const std::vector<std::vector<WordID> >& letters;   // spelling dictionary -  PoissonUniformWordModel base;  // "generator" of English types -  PYPWordModel<PoissonUniformWordModel>* up0;  // model English lexicon -  ConditionalPYPWordModel<PYPWordModel<PoissonUniformWordModel>, FreqBinner>* tmodel;  // translation distributions -                      // (model English word | French word) -  const WordID kX; -}; - -#endif diff --git a/gi/pf/itg.cc b/gi/pf/itg.cc deleted file mode 100644 index 29ec3860..00000000 --- a/gi/pf/itg.cc +++ /dev/null @@ -1,275 +0,0 @@ -#include <iostream> -#include <tr1/memory> -#include <queue> - -#include <boost/functional.hpp> -#include <boost/program_options.hpp> -#include <boost/program_options/variables_map.hpp> - -#include "viterbi.h" -#include "hg.h" -#include "trule.h" -#include "tdict.h" -#include "filelib.h" -#include "dict.h" -#include "sampler.h" -#include "ccrp_nt.h" -#include "ccrp_onetable.h" - -using namespace std; -using namespace tr1; -namespace po = boost::program_options; - -ostream& operator<<(ostream& os, const vector<WordID>& p) { -  os << '['; -  for (int i = 0; i < p.size(); ++i) -    os << (i==0 ? "" : " ") << TD::Convert(p[i]); -  return os << ']'; -} - -struct UnigramModel { -  explicit UnigramModel(const string& fname, unsigned vocab_size, double p0null = 0.05) : -      use_uniform_(fname.size() == 0), -      p0null_(p0null), -      uniform_((1.0 - p0null) / vocab_size), -      probs_(TD::NumWords() + 1) { -    if (fname.size() > 0) LoadUnigrams(fname); -    probs_[0] = p0null_; -  } - -//  -// \data\ -// ngram 1=9295 -//  -// \1-grams: -// -3.191193	" - -  void LoadUnigrams(const string& fname) { -    cerr << "Loading unigram probabilities from " << fname << " ..." << endl; -    ReadFile rf(fname); -    string line; -    istream& in = *rf.stream(); -    assert(in); -    getline(in, line); -    assert(line.empty()); -    getline(in, line); -    assert(line == "\\data\\"); -    getline(in, line); -    size_t pos = line.find("ngram 1="); -    assert(pos == 0); -    assert(line.size() > 8); -    const size_t num_unigrams = atoi(&line[8]); -    getline(in, line); -    assert(line.empty()); -    getline(in, line); -    assert(line == "\\1-grams:"); -    for (size_t i = 0; i < num_unigrams; ++i) { -      getline(in, line); -      assert(line.size() > 0); -      pos = line.find('\t'); -      assert(pos > 0); -      assert(pos + 1 < line.size()); -      const WordID w = TD::Convert(line.substr(pos + 1)); -      line[pos] = 0; -      float p = atof(&line[0]); -      const prob_t pnon_null(1.0 - p0null_.as_float()); -      if (w < probs_.size()) probs_[w].logeq(p * log(10) + log(pnon_null)); else abort(); -    } -  } - -  const prob_t& operator()(const WordID& w) const { -    if (!w) return p0null_; -    if (use_uniform_) return uniform_; -    return probs_[w]; -  } - -  const bool use_uniform_; -  const prob_t p0null_; -  const prob_t uniform_; -  vector<prob_t> probs_; -}; - -struct Model1 { -  explicit Model1(const string& fname) : -      kNULL(TD::Convert("<eps>")), -      kZERO() { -    LoadModel1(fname); -  } - -  void LoadModel1(const string& fname) { -    cerr << "Loading Model 1 parameters from " << fname << " ..." << endl; -    ReadFile rf(fname); -    istream& in = *rf.stream(); -    string line; -    unsigned lc = 0; -    while(getline(in, line)) { -      ++lc; -      int cur = 0; -      int start = 0; -      while(cur < line.size() && line[cur] != ' ') { ++cur; } -      assert(cur != line.size()); -      line[cur] = 0; -      const WordID src = TD::Convert(&line[0]); -      ++cur; -      start = cur; -      while(cur < line.size() && line[cur] != ' ') { ++cur; } -      assert(cur != line.size()); -      line[cur] = 0; -      WordID trg = TD::Convert(&line[start]); -      const double logprob = strtod(&line[cur + 1], NULL); -      if (src >= ttable.size()) ttable.resize(src + 1); -      ttable[src][trg].logeq(logprob); -    } -    cerr << "  read " << lc << " parameters.\n"; -  } - -  // returns prob 0 if src or trg is not found! -  const prob_t& operator()(WordID src, WordID trg) const { -    if (src == 0) src = kNULL; -    if (src < ttable.size()) { -      const map<WordID, prob_t>& cpd = ttable[src]; -      const map<WordID, prob_t>::const_iterator it = cpd.find(trg); -      if (it != cpd.end()) -        return it->second; -    } -    return kZERO; -  } - -  const WordID kNULL; -  const prob_t kZERO; -  vector<map<WordID, prob_t> > ttable; -}; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { -  po::options_description opts("Configuration options"); -  opts.add_options() -        ("samples,s",po::value<unsigned>()->default_value(1000),"Number of samples") -        ("particles,p",po::value<unsigned>()->default_value(25),"Number of particles") -        ("input,i",po::value<string>(),"Read parallel data from") -        ("model1,m",po::value<string>(),"Model 1 parameters (used in base distribution)") -        ("inverse_model1,M",po::value<string>(),"Inverse Model 1 parameters (used in backward estimate)") -        ("model1_interpolation_weight",po::value<double>()->default_value(0.95),"Mixing proportion of model 1 with uniform target distribution") -        ("src_unigram,u",po::value<string>()->default_value(""),"Source unigram distribution; empty for uniform") -        ("trg_unigram,U",po::value<string>()->default_value(""),"Target unigram distribution; empty for uniform") -        ("random_seed,S",po::value<uint32_t>(), "Random seed"); -  po::options_description clo("Command line options"); -  clo.add_options() -        ("config", po::value<string>(), "Configuration file") -        ("help,h", "Print this help message and exit"); -  po::options_description dconfig_options, dcmdline_options; -  dconfig_options.add(opts); -  dcmdline_options.add(opts).add(clo); -   -  po::store(parse_command_line(argc, argv, dcmdline_options), *conf); -  if (conf->count("config")) { -    ifstream config((*conf)["config"].as<string>().c_str()); -    po::store(po::parse_config_file(config, dconfig_options), *conf); -  } -  po::notify(*conf); - -  if (conf->count("help") || (conf->count("input") == 0)) { -    cerr << dcmdline_options << endl; -    exit(1); -  } -} - -void ReadParallelCorpus(const string& filename, -                vector<vector<WordID> >* f, -                vector<vector<WordID> >* e, -                set<WordID>* vocab_f, -                set<WordID>* vocab_e) { -  f->clear(); -  e->clear(); -  vocab_f->clear(); -  vocab_e->clear(); -  istream* in; -  if (filename == "-") -    in = &cin; -  else -    in = new ifstream(filename.c_str()); -  assert(*in); -  string line; -  const WordID kDIV = TD::Convert("|||"); -  vector<WordID> tmp; -  while(*in) { -    getline(*in, line); -    if (line.empty() && !*in) break; -    e->push_back(vector<int>()); -    f->push_back(vector<int>()); -    vector<int>& le = e->back(); -    vector<int>& lf = f->back(); -    tmp.clear(); -    TD::ConvertSentence(line, &tmp); -    bool isf = true; -    for (unsigned i = 0; i < tmp.size(); ++i) { -      const int cur = tmp[i]; -      if (isf) { -        if (kDIV == cur) { isf = false; } else { -          lf.push_back(cur); -          vocab_f->insert(cur); -        } -      } else { -        assert(cur != kDIV); -        le.push_back(cur); -        vocab_e->insert(cur); -      } -    } -    assert(isf == false); -  } -  if (in != &cin) delete in; -} - -int main(int argc, char** argv) { -  po::variables_map conf; -  InitCommandLine(argc, argv, &conf); -  const unsigned particles = conf["particles"].as<unsigned>(); -  const unsigned samples = conf["samples"].as<unsigned>(); -  TD::Convert("<s>"); -  TD::Convert("</s>"); -  TD::Convert("<unk>"); -  if (!conf.count("model1")) { -    cerr << argv[0] << "Please use --model1 to specify model 1 parameters\n"; -    return 1; -  } -  boost::shared_ptr<MT19937> prng; -  if (conf.count("random_seed")) -    prng.reset(new MT19937(conf["random_seed"].as<uint32_t>())); -  else -    prng.reset(new MT19937); -  MT19937& rng = *prng; - -  vector<vector<WordID> > corpuse, corpusf; -  set<WordID> vocabe, vocabf; -  cerr << "Reading corpus...\n"; -  ReadParallelCorpus(conf["input"].as<string>(), &corpusf, &corpuse, &vocabf, &vocabe); -  cerr << "F-corpus size: " << corpusf.size() << " sentences\t (" << vocabf.size() << " word types)\n"; -  cerr << "E-corpus size: " << corpuse.size() << " sentences\t (" << vocabe.size() << " word types)\n"; -  assert(corpusf.size() == corpuse.size()); -  UnigramModel src_unigram(conf["src_unigram"].as<string>(), vocabf.size()); -  UnigramModel trg_unigram(conf["trg_unigram"].as<string>(), vocabe.size()); -  const prob_t kHALF(0.5); - -  const string kEMPTY = "NULL"; -  const int kLHS = -TD::Convert("X"); -  Model1 m1(conf["model1"].as<string>()); -  Model1 invm1(conf["inverse_model1"].as<string>()); -  for (int si = 0; si < conf["samples"].as<unsigned>(); ++si) { -    cerr << '.' << flush; -    for (int ci = 0; ci < corpusf.size(); ++ci) { -      const vector<WordID>& trg = corpuse[ci]; -      const vector<WordID>& src = corpusf[ci]; -      for (int i = 0; i <= trg.size(); ++i) { -        const WordID e_i = i > 0 ? trg[i-1] : 0; -        for (int j = 0; j <= src.size(); ++j) { -          const WordID f_j = j > 0 ? src[j-1] : 0; -          if (e_i == 0 && f_j == 0) continue; -          prob_t je = kHALF * src_unigram(f_j) * m1(f_j,e_i) + kHALF * trg_unigram(e_i) * invm1(e_i,f_j); -          cerr << "p( " << (e_i ? TD::Convert(e_i) : kEMPTY) << " , " << (f_j ? TD::Convert(f_j) : kEMPTY) << " ) = " << log(je) << endl; -          if (e_i && f_j) -            cout << "[X] ||| " << TD::Convert(f_j) << " ||| " << TD::Convert(e_i) << " ||| LogProb=" << log(je) << endl; -        } -      } -    } -  } -} - diff --git a/gi/pf/learn_cfg.cc b/gi/pf/learn_cfg.cc deleted file mode 100644 index 1d5126e4..00000000 --- a/gi/pf/learn_cfg.cc +++ /dev/null @@ -1,428 +0,0 @@ -#include <iostream> -#include <tr1/memory> -#include <queue> - -#include <boost/functional.hpp> -#include <boost/program_options.hpp> -#include <boost/program_options/variables_map.hpp> - -#include "inside_outside.h" -#include "hg.h" -#include "bottom_up_parser.h" -#include "fdict.h" -#include "grammar.h" -#include "m.h" -#include "trule.h" -#include "tdict.h" -#include "filelib.h" -#include "dict.h" -#include "sampler.h" -#include "ccrp.h" -#include "ccrp_onetable.h" - -using namespace std; -using namespace tr1; -namespace po = boost::program_options; - -boost::shared_ptr<MT19937> prng; -vector<int> nt_vocab; -vector<int> nt_id_to_index; -static unsigned kMAX_RULE_SIZE = 0; -static unsigned kMAX_ARITY = 0; -static bool kALLOW_MIXED = true;  // allow rules with mixed terminals and NTs -static bool kHIERARCHICAL_PRIOR = false; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { -  po::options_description opts("Configuration options"); -  opts.add_options() -        ("samples,s",po::value<unsigned>()->default_value(1000),"Number of samples") -        ("input,i",po::value<string>(),"Read parallel data from") -        ("max_rule_size,m", po::value<unsigned>()->default_value(0), "Maximum rule size (0 for unlimited)") -        ("max_arity,a", po::value<unsigned>()->default_value(0), "Maximum number of nonterminals in a rule (0 for unlimited)") -        ("no_mixed_rules,M", "Do not mix terminals and nonterminals in a rule RHS") -        ("nonterminals,n", po::value<unsigned>()->default_value(1), "Size of nonterminal vocabulary") -        ("hierarchical_prior,h", "Use hierarchical prior") -        ("random_seed,S",po::value<uint32_t>(), "Random seed"); -  po::options_description clo("Command line options"); -  clo.add_options() -        ("config", po::value<string>(), "Configuration file") -        ("help", "Print this help message and exit"); -  po::options_description dconfig_options, dcmdline_options; -  dconfig_options.add(opts); -  dcmdline_options.add(opts).add(clo); -   -  po::store(parse_command_line(argc, argv, dcmdline_options), *conf); -  if (conf->count("config")) { -    ifstream config((*conf)["config"].as<string>().c_str()); -    po::store(po::parse_config_file(config, dconfig_options), *conf); -  } -  po::notify(*conf); - -  if (conf->count("help") || (conf->count("input") == 0)) { -    cerr << dcmdline_options << endl; -    exit(1); -  } -} - -unsigned ReadCorpus(const string& filename, -                    vector<vector<WordID> >* e, -                    set<WordID>* vocab_e) { -  e->clear(); -  vocab_e->clear(); -  istream* in; -  if (filename == "-") -    in = &cin; -  else -    in = new ifstream(filename.c_str()); -  assert(*in); -  string line; -  unsigned toks = 0; -  while(*in) { -    getline(*in, line); -    if (line.empty() && !*in) break; -    e->push_back(vector<int>()); -    vector<int>& le = e->back(); -    TD::ConvertSentence(line, &le); -    for (unsigned i = 0; i < le.size(); ++i) -      vocab_e->insert(le[i]); -    toks += le.size(); -  } -  if (in != &cin) delete in; -  return toks; -} - -struct Grid { -  // a b c d e -  // 0 - 0 - - -  vector<int> grid; -}; - -struct BaseRuleModel { -  explicit BaseRuleModel(unsigned term_size, -                         unsigned nonterm_size = 1) : -      unif_term(1.0 / term_size), -      unif_nonterm(1.0 / nonterm_size) {} -  prob_t operator()(const TRule& r) const { -    prob_t p; p.logeq(Md::log_poisson(1.0, r.f_.size())); -    const prob_t term_prob((2.0 + 0.01*r.f_.size()) / (r.f_.size() + 2)); -    const prob_t nonterm_prob(1.0 - term_prob.as_float()); -    for (unsigned i = 0; i < r.f_.size(); ++i) { -      if (r.f_[i] <= 0) {     // nonterminal -        if (kALLOW_MIXED) p *= nonterm_prob; -        p *= unif_nonterm; -      } else {                // terminal -        if (kALLOW_MIXED) p *= term_prob; -        p *= unif_term; -      } -    } -    return p; -  } -  const prob_t unif_term, unif_nonterm; -}; - -struct HieroLMModel { -  explicit HieroLMModel(unsigned vocab_size, unsigned num_nts = 1) : -      base(vocab_size, num_nts), -      q0(1,1,1,1), -      nts(num_nts, CCRP<TRule>(1,1,1,1)) {} - -  prob_t Prob(const TRule& r) const { -    return nts[nt_id_to_index[-r.lhs_]].prob(r, p0(r)); -  } - -  inline prob_t p0(const TRule& r) const { -    if (kHIERARCHICAL_PRIOR) -      return q0.prob(r, base(r)); -    else -      return base(r); -  } - -  int Increment(const TRule& r, MT19937* rng) { -    const int delta = nts[nt_id_to_index[-r.lhs_]].increment(r, p0(r), rng); -    if (kHIERARCHICAL_PRIOR && delta) -      q0.increment(r, base(r), rng); -    return delta; -    // return x.increment(r); -  } - -  int Decrement(const TRule& r, MT19937* rng) { -    const int delta = nts[nt_id_to_index[-r.lhs_]].decrement(r, rng); -    if (kHIERARCHICAL_PRIOR && delta) -      q0.decrement(r, rng); -    return delta; -    //return x.decrement(r); -  } - -  prob_t Likelihood() const { -    prob_t p = prob_t::One(); -    for (unsigned i = 0; i < nts.size(); ++i) { -      prob_t q; q.logeq(nts[i].log_crp_prob()); -      p *= q; -      for (CCRP<TRule>::const_iterator it = nts[i].begin(); it != nts[i].end(); ++it) { -        prob_t tp = p0(it->first); -        tp.poweq(it->second.num_tables()); -        p *= tp; -      } -    } -    if (kHIERARCHICAL_PRIOR) { -      prob_t q; q.logeq(q0.log_crp_prob()); -      p *= q; -      for (CCRP<TRule>::const_iterator it = q0.begin(); it != q0.end(); ++it) { -        prob_t tp = base(it->first); -        tp.poweq(it->second.num_tables()); -        p *= tp; -      } -    } -    //for (CCRP_OneTable<TRule>::const_iterator it = x.begin(); it != x.end(); ++it) -    //    p *= base(it->first); -    return p; -  } - -  void ResampleHyperparameters(MT19937* rng) { -    for (unsigned i = 0; i < nts.size(); ++i) -      nts[i].resample_hyperparameters(rng); -    if (kHIERARCHICAL_PRIOR) { -      q0.resample_hyperparameters(rng); -      cerr << "[base d=" << q0.discount() << ", s=" << q0.strength() << "]"; -    } -    cerr << " d=" << nts[0].discount() << ", s=" << nts[0].strength() << endl; -  } - -  const BaseRuleModel base; -  CCRP<TRule> q0; -  vector<CCRP<TRule> > nts; -  //CCRP_OneTable<TRule> x; -}; - -vector<GrammarIter* > tofreelist; - -HieroLMModel* plm; - -struct NPGrammarIter : public GrammarIter, public RuleBin { -  NPGrammarIter() : arity() { tofreelist.push_back(this); } -  NPGrammarIter(const TRulePtr& inr, const int a, int symbol) : arity(a) { -    if (inr) { -      r.reset(new TRule(*inr)); -    } else { -      r.reset(new TRule); -    } -    TRule& rr = *r; -    rr.lhs_ = nt_vocab[0]; -    rr.f_.push_back(symbol); -    rr.e_.push_back(symbol < 0 ? (1-int(arity)) : symbol); -    tofreelist.push_back(this); -  } -  inline static unsigned NextArity(int cur_a, int symbol) { -    return cur_a + (symbol <= 0 ? 1 : 0); -  } -  virtual int GetNumRules() const { -    if (r) return nt_vocab.size(); else return 0; -  } -  virtual TRulePtr GetIthRule(int i) const { -    if (i == 0) return r; -    TRulePtr nr(new TRule(*r)); -    nr->lhs_ = nt_vocab[i]; -    return nr; -  } -  virtual int Arity() const { -    return arity; -  } -  virtual const RuleBin* GetRules() const { -    if (!r) return NULL; else return this; -  } -  virtual const GrammarIter* Extend(int symbol) const { -    const int next_arity = NextArity(arity, symbol); -    if (kMAX_ARITY && next_arity > kMAX_ARITY) -      return NULL; -    if (!kALLOW_MIXED && r) { -      bool t1 = r->f_.front() <= 0; -      bool t2 = symbol <= 0; -      if (t1 != t2) return NULL; -    } -    if (!kMAX_RULE_SIZE || !r || (r->f_.size() < kMAX_RULE_SIZE)) -      return new NPGrammarIter(r, next_arity, symbol); -    else -      return NULL; -  } -  const unsigned char arity; -  TRulePtr r; -}; - -struct NPGrammar : public Grammar { -  virtual const GrammarIter* GetRoot() const { -    return new NPGrammarIter; -  } -}; - -prob_t TotalProb(const Hypergraph& hg) { -  return Inside<prob_t, EdgeProb>(hg); -} - -void SampleDerivation(const Hypergraph& hg, MT19937* rng, vector<unsigned>* sampled_deriv) { -  vector<prob_t> node_probs; -  Inside<prob_t, EdgeProb>(hg, &node_probs); -  queue<unsigned> q; -  q.push(hg.nodes_.size() - 2); -  while(!q.empty()) { -    unsigned cur_node_id = q.front(); -//    cerr << "NODE=" << cur_node_id << endl; -    q.pop(); -    const Hypergraph::Node& node = hg.nodes_[cur_node_id]; -    const unsigned num_in_edges = node.in_edges_.size(); -    unsigned sampled_edge = 0; -    if (num_in_edges == 1) { -      sampled_edge = node.in_edges_[0]; -    } else { -      //prob_t z; -      assert(num_in_edges > 1); -      SampleSet<prob_t> ss; -      for (unsigned j = 0; j < num_in_edges; ++j) { -        const Hypergraph::Edge& edge = hg.edges_[node.in_edges_[j]]; -        prob_t p = edge.edge_prob_; -        for (unsigned k = 0; k < edge.tail_nodes_.size(); ++k) -          p *= node_probs[edge.tail_nodes_[k]]; -        ss.add(p); -//        cerr << log(ss[j]) << " ||| " << edge.rule_->AsString() << endl; -        //z += p; -      } -//      for (unsigned j = 0; j < num_in_edges; ++j) { -//        const Hypergraph::Edge& edge = hg.edges_[node.in_edges_[j]]; -//        cerr << exp(log(ss[j] / z)) << " ||| " << edge.rule_->AsString() << endl; -//      } -//      cerr << " --- \n"; -      sampled_edge = node.in_edges_[rng->SelectSample(ss)]; -    } -    sampled_deriv->push_back(sampled_edge); -    const Hypergraph::Edge& edge = hg.edges_[sampled_edge]; -    for (unsigned j = 0; j < edge.tail_nodes_.size(); ++j) { -      q.push(edge.tail_nodes_[j]); -    } -  } -  for (unsigned i = 0; i < sampled_deriv->size(); ++i) { -    cerr << *hg.edges_[(*sampled_deriv)[i]].rule_ << endl; -  } -} - -void IncrementDerivation(const Hypergraph& hg, const vector<unsigned>& d, HieroLMModel* plm, MT19937* rng) { -  for (unsigned i = 0; i < d.size(); ++i) -    plm->Increment(*hg.edges_[d[i]].rule_, rng); -} - -void DecrementDerivation(const Hypergraph& hg, const vector<unsigned>& d, HieroLMModel* plm, MT19937* rng) { -  for (unsigned i = 0; i < d.size(); ++i) -    plm->Decrement(*hg.edges_[d[i]].rule_, rng); -} - -int main(int argc, char** argv) { -  po::variables_map conf; - -  InitCommandLine(argc, argv, &conf); -  nt_vocab.resize(conf["nonterminals"].as<unsigned>()); -  assert(nt_vocab.size() > 0); -  assert(nt_vocab.size() < 26); -  { -    string nt = "X"; -    for (unsigned i = 0; i < nt_vocab.size(); ++i) { -      if (nt_vocab.size() > 1) nt[0] = ('A' + i); -      int pid = TD::Convert(nt); -      nt_vocab[i] = -pid; -      if (pid >= nt_id_to_index.size()) { -        nt_id_to_index.resize(pid + 1, -1); -      } -      nt_id_to_index[pid] = i; -    } -  } -  vector<GrammarPtr> grammars; -  grammars.push_back(GrammarPtr(new NPGrammar)); - -  const unsigned samples = conf["samples"].as<unsigned>(); -  kMAX_RULE_SIZE = conf["max_rule_size"].as<unsigned>(); -  if (kMAX_RULE_SIZE == 1) { -    cerr << "Invalid maximum rule size: must be 0 or >1\n"; -    return 1; -  } -  kMAX_ARITY = conf["max_arity"].as<unsigned>(); -  if (kMAX_ARITY == 1) { -    cerr << "Invalid maximum arity: must be 0 or >1\n"; -    return 1; -  } -  kALLOW_MIXED = !conf.count("no_mixed_rules"); - -  kHIERARCHICAL_PRIOR = conf.count("hierarchical_prior"); - -  if (conf.count("random_seed")) -    prng.reset(new MT19937(conf["random_seed"].as<uint32_t>())); -  else -    prng.reset(new MT19937); -  MT19937& rng = *prng; -  vector<vector<WordID> > corpuse; -  set<WordID> vocabe; -  cerr << "Reading corpus...\n"; -  const unsigned toks = ReadCorpus(conf["input"].as<string>(), &corpuse, &vocabe); -  cerr << "E-corpus size: " << corpuse.size() << " sentences\t (" << vocabe.size() << " word types)\n"; -  HieroLMModel lm(vocabe.size(), nt_vocab.size()); - -  plm = &lm; -  ExhaustiveBottomUpParser parser(TD::Convert(-nt_vocab[0]), grammars); - -  Hypergraph hg; -  const int kGoal = -TD::Convert("Goal"); -  const int kLP = FD::Convert("LogProb"); -  SparseVector<double> v; v.set_value(kLP, 1.0); -  vector<vector<unsigned> > derivs(corpuse.size()); -  vector<Lattice> cl(corpuse.size()); -  for (int ci = 0; ci < corpuse.size(); ++ci) { -    vector<int>& src = corpuse[ci]; -    Lattice& lat = cl[ci]; -    lat.resize(src.size()); -    for (unsigned i = 0; i < src.size(); ++i) -      lat[i].push_back(LatticeArc(src[i], 0.0, 1)); -  } -  for (int SS=0; SS < samples; ++SS) { -    const bool is_last = ((samples - 1) == SS); -    prob_t dlh = prob_t::One(); -    for (int ci = 0; ci < corpuse.size(); ++ci) { -      const vector<int>& src = corpuse[ci]; -      const Lattice& lat = cl[ci]; -      cerr << TD::GetString(src) << endl; -      hg.clear(); -      parser.Parse(lat, &hg);  // exhaustive parse -      vector<unsigned>& d = derivs[ci]; -      if (!is_last) DecrementDerivation(hg, d, &lm, &rng); -      for (unsigned i = 0; i < hg.edges_.size(); ++i) { -        TRule& r = *hg.edges_[i].rule_; -        if (r.lhs_ == kGoal) -          hg.edges_[i].edge_prob_ = prob_t::One(); -        else -          hg.edges_[i].edge_prob_ = lm.Prob(r); -      } -      if (!is_last) { -        d.clear(); -        SampleDerivation(hg, &rng, &d); -        IncrementDerivation(hg, derivs[ci], &lm, &rng); -      } else { -        prob_t p = TotalProb(hg); -        dlh *= p; -        cerr << " p(sentence) = " << log(p) << "\t" << log(dlh) << endl; -      } -      if (tofreelist.size() > 200000) { -        cerr << "Freeing ... "; -        for (unsigned i = 0; i < tofreelist.size(); ++i) -          delete tofreelist[i]; -        tofreelist.clear(); -        cerr << "Freed.\n"; -      } -    } -    double llh = log(lm.Likelihood()); -    cerr << "LLH=" << llh << "\tENTROPY=" << (-llh / log(2) / toks) << "\tPPL=" << pow(2, -llh / log(2) / toks) << endl; -    if (SS % 10 == 9) lm.ResampleHyperparameters(&rng); -    if (is_last) { -      double z = log(dlh); -      cerr << "TOTAL_PROB=" << z << "\tENTROPY=" << (-z / log(2) / toks) << "\tPPL=" << pow(2, -z / log(2) / toks) << endl; -    } -  } -  for (unsigned i = 0; i < nt_vocab.size(); ++i) -    cerr << lm.nts[i] << endl; -  return 0; -} - diff --git a/gi/pf/make-freq-bins.pl b/gi/pf/make-freq-bins.pl deleted file mode 100755 index fdcd3555..00000000 --- a/gi/pf/make-freq-bins.pl +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/perl -w -use strict; - -my $BASE = 6; -my $CUTOFF = 3; - -my %d; -my $num = 0; -while(<>){ - chomp; - my @words = split /\s+/; - for my $w (@words) {$d{$w}++; $num++;} -} - -my @vocab = sort {$d{$b} <=> $d{$a}} keys %d; - -for (my $i=0; $i<scalar @vocab; $i++) { -  my $most = $d{$vocab[$i]}; -  my $least = 1; - -  my $nl = -int(log($most / $num) / log($BASE) + $CUTOFF); -  if ($nl < 0) { $nl = 0; } -  print "$vocab[$i] $nl\n" -} - - diff --git a/gi/pf/mh_test.cc b/gi/pf/mh_test.cc deleted file mode 100644 index 296e7285..00000000 --- a/gi/pf/mh_test.cc +++ /dev/null @@ -1,148 +0,0 @@ -#include "ccrp.h" - -#include <vector> -#include <iostream> - -#include "tdict.h" -#include "transliterations.h" - -using namespace std; - -MT19937 rng; - -static bool verbose = false; - -struct Model { - -  Model() : bp(), base(0.2, 0.6) , ccrps(5, CCRP<int>(0.8, 0.5)) {} - -  double p0(int x) const { -    assert(x > 0); -    assert(x < 5); -    return 1.0/4.0; -  } - -  double llh() const { -    double lh = bp + base.log_crp_prob(); -    for (int ctx = 1; ctx < 5; ++ctx) -      lh += ccrps[ctx].log_crp_prob(); -    return lh; -  } - -  double prob(int ctx, int x) const { -    assert(ctx > 0 && ctx < 5); -    return ccrps[ctx].prob(x, base.prob(x, p0(x))); -  } - -  void increment(int ctx, int x) { -    assert(ctx > 0 && ctx < 5); -    if (ccrps[ctx].increment(x, base.prob(x, p0(x)), &rng)) { -      if (base.increment(x, p0(x), &rng)) { -        bp += log(1.0 / 4.0); -      } -    } -  } - -  // this is just a biased estimate -  double est_base_prob(int x) { -    return (x + 1) * x / 40.0; -  } - -  void increment_is(int ctx, int x) { -    assert(ctx > 0 && ctx < 5); -    SampleSet<double> ss; -    const int PARTICLES = 25; -    vector<CCRP<int> > s1s(PARTICLES, CCRP<int>(0.5,0.5)); -    vector<CCRP<int> > sbs(PARTICLES, CCRP<int>(0.5,0.5)); -    vector<double> sp0s(PARTICLES); - -    CCRP<int> s1 = ccrps[ctx]; -    CCRP<int> sb = base; -    double sp0 = bp; -    for (int pp = 0; pp < PARTICLES; ++pp) { -      if (pp > 0) { -        ccrps[ctx] = s1; -        base = sb; -        bp = sp0; -      } - -      double q = 1; -      double gamma = 1; -      double est_p = est_base_prob(x); -      //base.prob(x, p0(x)) + rng.next() * 0.1; -      if (ccrps[ctx].increment(x, est_p, &rng, &q)) { -        gamma = q * base.prob(x, p0(x)); -        q *= est_p; -        if (verbose) cerr << "(DP-base draw) "; -        double qq = -1; -        if (base.increment(x, p0(x), &rng, &qq)) { -          if (verbose) cerr << "(G0 draw) "; -          bp += log(p0(x)); -          qq *= p0(x); -        } -      } else { gamma = q; } -      double w = gamma / q; -      if (verbose) -        cerr << "gamma=" << gamma << " q=" << q << "\tw=" << w << endl; -      ss.add(w); -      s1s[pp] = ccrps[ctx]; -      sbs[pp] = base; -      sp0s[pp] = bp; -    } -    int ps = rng.SelectSample(ss); -    ccrps[ctx] = s1s[ps]; -    base = sbs[ps]; -    bp = sp0s[ps]; -    if (verbose) { -      cerr << "SELECTED: " << ps << endl; -      static int cc = 0; cc++; if (cc ==10) exit(1); -    } -  } - -  void decrement(int ctx, int x) { -    assert(ctx > 0 && ctx < 5); -    if (ccrps[ctx].decrement(x, &rng)) { -      if (base.decrement(x, &rng)) { -        bp -= log(p0(x)); -      } -    } -  } - -  double bp; -  CCRP<int> base; -  vector<CCRP<int> > ccrps; - -}; - -int main(int argc, char** argv) { -  if (argc > 1) { verbose = true; } -  vector<int> counts(15, 0); -  vector<int> tcounts(15, 0); -  int points[] = {1,2, 2,2, 3,2, 4,1, 3, 4, 3, 3, 2, 3, 4, 1, 4, 1, 3, 2, 1, 3, 1, 4, 0, 0}; -  double tlh = 0; -  double tt = 0; -  for (int n = 0; n < 1000; ++n) { -    if (n % 10 == 0) cerr << '.'; -    if ((n+1) % 400 == 0) cerr << " [" << (n+1) << "]\n"; -    Model m; -    for (int *x = points; *x; x += 2) -      m.increment(x[0], x[1]); - -    for (int j = 0; j < 24; ++j) { -      for (int *x = points; *x; x += 2) { -        if (rng.next() < 0.8) { -          m.decrement(x[0], x[1]); -          m.increment_is(x[0], x[1]); -        } -      } -    } -    counts[m.base.num_customers()]++; -    tcounts[m.base.num_tables()]++; -    tlh += m.llh(); -    tt += 1.0; -  } -  cerr << "mean LLH = " << (tlh / tt) << endl; -  for (int i = 0; i < 15; ++i) -    cerr << i << ": " << (counts[i] / tt) << "\t" << (tcounts[i] / tt) << endl; -} - diff --git a/gi/pf/monotonic_pseg.h b/gi/pf/monotonic_pseg.h deleted file mode 100644 index 10d171fe..00000000 --- a/gi/pf/monotonic_pseg.h +++ /dev/null @@ -1,89 +0,0 @@ -#ifndef _MONOTONIC_PSEG_H_ -#define _MONOTONIC_PSEG_H_ - -#include <vector> - -#include "prob.h" -#include "ccrp_nt.h" -#include "trule.h" -#include "base_distributions.h" - -template <typename BaseMeasure> -struct MonotonicParallelSegementationModel { -  explicit MonotonicParallelSegementationModel(BaseMeasure& rcp0) : -    rp0(rcp0), base(prob_t::One()), rules(1,1), stop(1.0) {} - -  void DecrementRule(const TRule& rule) { -    if (rules.decrement(rule)) -      base /= rp0(rule); -  } - -  void IncrementRule(const TRule& rule) { -    if (rules.increment(rule)) -      base *= rp0(rule); -  } - -  void IncrementRulesAndStops(const std::vector<TRulePtr>& rules) { -    for (int i = 0; i < rules.size(); ++i) -      IncrementRule(*rules[i]); -    if (rules.size()) IncrementContinue(rules.size() - 1); -    IncrementStop(); -  } - -  void DecrementRulesAndStops(const std::vector<TRulePtr>& rules) { -    for (int i = 0; i < rules.size(); ++i) -      DecrementRule(*rules[i]); -    if (rules.size()) { -      DecrementContinue(rules.size() - 1); -      DecrementStop(); -    } -  } - -  prob_t RuleProbability(const TRule& rule) const { -    prob_t p; p.logeq(rules.logprob(rule, log(rp0(rule)))); -    return p; -  } - -  prob_t Likelihood() const { -    prob_t p = base; -    prob_t q; q.logeq(rules.log_crp_prob()); -    p *= q; -    q.logeq(stop.log_crp_prob()); -    p *= q; -    return p; -  } - -  void IncrementStop() { -    stop.increment(true); -  } - -  void IncrementContinue(int n = 1) { -    for (int i = 0; i < n; ++i) -      stop.increment(false); -  } - -  void DecrementStop() { -    stop.decrement(true); -  } - -  void DecrementContinue(int n = 1) { -    for (int i = 0; i < n; ++i) -      stop.decrement(false); -  } - -  prob_t StopProbability() const { -    return prob_t(stop.prob(true, 0.5)); -  } - -  prob_t ContinueProbability() const { -    return prob_t(stop.prob(false, 0.5)); -  } - -  const BaseMeasure& rp0; -  prob_t base; -  CCRP_NoTable<TRule> rules; -  CCRP_NoTable<bool> stop; -}; - -#endif - diff --git a/gi/pf/ngram_base.cc b/gi/pf/ngram_base.cc deleted file mode 100644 index 1299f06f..00000000 --- a/gi/pf/ngram_base.cc +++ /dev/null @@ -1,69 +0,0 @@ -#include "ngram_base.h" - -#include "lm/model.hh" -#include "tdict.h" - -using namespace std; - -namespace { -struct GICSVMapper : public lm::EnumerateVocab { -  GICSVMapper(vector<lm::WordIndex>* out) : out_(out), kLM_UNKNOWN_TOKEN(0) { out_->clear(); } -  void Add(lm::WordIndex index, const StringPiece &str) { -    const WordID cdec_id = TD::Convert(str.as_string()); -    if (cdec_id >= out_->size()) -      out_->resize(cdec_id + 1, kLM_UNKNOWN_TOKEN); -    (*out_)[cdec_id] = index; -  } -  vector<lm::WordIndex>* out_; -  const lm::WordIndex kLM_UNKNOWN_TOKEN; -}; -} - -struct FixedNgramBaseImpl { -  FixedNgramBaseImpl(const string& param) { -    GICSVMapper vm(&cdec2klm_map_); -    lm::ngram::Config conf; -    conf.enumerate_vocab = &vm; -    cerr << "Reading character LM from " << param << endl; -    model = new lm::ngram::ProbingModel(param.c_str(), conf); -    order = model->Order(); -    kEOS = MapWord(TD::Convert("</s>")); -    assert(kEOS > 0); -  } - -  lm::WordIndex MapWord(const WordID w) const { -    if (w < cdec2klm_map_.size()) return cdec2klm_map_[w]; -    return 0; -  } - -  ~FixedNgramBaseImpl() { delete model; } - -  prob_t StringProbability(const vector<WordID>& s) const { -    lm::ngram::State state = model->BeginSentenceState(); -    double prob = 0; -    for (unsigned i = 0; i < s.size(); ++i) { -      const lm::ngram::State scopy(state); -      prob += model->Score(scopy, MapWord(s[i]), state); -    } -    const lm::ngram::State scopy(state); -    prob += model->Score(scopy, kEOS, state); -    prob_t p; p.logeq(prob * log(10)); -    return p; -  } - -  lm::ngram::ProbingModel* model; -  unsigned order; -  vector<lm::WordIndex> cdec2klm_map_; -  lm::WordIndex kEOS; -}; - -FixedNgramBase::~FixedNgramBase() { delete impl; } - -FixedNgramBase::FixedNgramBase(const string& lmfname) { -  impl = new FixedNgramBaseImpl(lmfname); -} - -prob_t FixedNgramBase::StringProbability(const vector<WordID>& s) const { -  return impl->StringProbability(s); -} - diff --git a/gi/pf/ngram_base.h b/gi/pf/ngram_base.h deleted file mode 100644 index 4ea999f3..00000000 --- a/gi/pf/ngram_base.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef _NGRAM_BASE_H_ -#define _NGRAM_BASE_H_ - -#include <string> -#include <vector> -#include "trule.h" -#include "wordid.h" -#include "prob.h" - -struct FixedNgramBaseImpl; -struct FixedNgramBase { -  FixedNgramBase(const std::string& lmfname); -  ~FixedNgramBase(); -  prob_t StringProbability(const std::vector<WordID>& s) const; - -  prob_t operator()(const TRule& rule) const { -    return StringProbability(rule.e_); -  } - - private: -  FixedNgramBaseImpl* impl; - -}; - -#endif diff --git a/gi/pf/nuisance_test.cc b/gi/pf/nuisance_test.cc deleted file mode 100644 index fc0af9cb..00000000 --- a/gi/pf/nuisance_test.cc +++ /dev/null @@ -1,161 +0,0 @@ -#include "ccrp.h" - -#include <vector> -#include <iostream> - -#include "tdict.h" -#include "transliterations.h" - -using namespace std; - -MT19937 rng; - -ostream& operator<<(ostream&os, const vector<int>& v) { -  os << '[' << v[0]; -  if (v.size() == 2) os << ' ' << v[1]; -  return os << ']'; -} - -struct Base { -  Base() : llh(), v(2), v1(1), v2(1), crp(0.25, 0.5) {} -  inline double p0(const vector<int>& x) const { -    double p = 0.75; -    if (x.size() == 2) p = 0.25; -    p *= 1.0 / 3.0; -    if (x.size() == 2) p *= 1.0 / 3.0; -    return p; -  } -  double est_deriv_prob(int a, int b, int seg) const { -    assert(a > 0 && a < 4);  // a \in {1,2,3} -    assert(b > 0 && b < 4);  // b \in {1,2,3} -    assert(seg == 0 || seg == 1);   // seg \in {0,1} -    if (seg == 0) { -      v[0] = a; -      v[1] = b; -      return crp.prob(v, p0(v)); -    } else { -      v1[0] = a; -      v2[0] = b; -      return crp.prob(v1, p0(v1)) * crp.prob(v2, p0(v2)); -    } -  } -  double est_marginal_prob(int a, int b) const { -    return est_deriv_prob(a,b,0) + est_deriv_prob(a,b,1); -  } -  int increment(int a, int b, double* pw = NULL) { -    double p1 = est_deriv_prob(a, b, 0); -    double p2 = est_deriv_prob(a, b, 1); -    //p1 = 0.5; p2 = 0.5; -    int seg = rng.SelectSample(p1,p2); -    double tmp = 0; -    if (!pw) pw = &tmp; -    double& w = *pw; -    if (seg == 0) { -      v[0] = a; -      v[1] = b; -      w = crp.prob(v, p0(v)) / p1; -      if (crp.increment(v, p0(v), &rng)) { -        llh += log(p0(v)); -      } -    } else { -      v1[0] = a; -      w = crp.prob(v1, p0(v1)) / p2; -      if (crp.increment(v1, p0(v1), &rng)) { -        llh += log(p0(v1)); -      } -      v2[0] = b; -      w *= crp.prob(v2, p0(v2)); -      if (crp.increment(v2, p0(v2), &rng)) { -        llh += log(p0(v2)); -      } -    } -    return seg; -  } -  void increment(int a, int b, int seg) { -    if (seg == 0) { -      v[0] = a; -      v[1] = b; -      if (crp.increment(v, p0(v), &rng)) { -        llh += log(p0(v)); -      } -    } else { -      v1[0] = a; -      if (crp.increment(v1, p0(v1), &rng)) { -        llh += log(p0(v1)); -      } -      v2[0] = b; -      if (crp.increment(v2, p0(v2), &rng)) { -        llh += log(p0(v2)); -      } -    } -  } -  void decrement(int a, int b, int seg) { -    if (seg == 0) { -      v[0] = a; -      v[1] = b; -      if (crp.decrement(v, &rng)) { -        llh -= log(p0(v)); -      } -    } else { -      v1[0] = a; -      if (crp.decrement(v1, &rng)) { -        llh -= log(p0(v1)); -      } -      v2[0] = b; -      if (crp.decrement(v2, &rng)) { -        llh -= log(p0(v2)); -      } -    } -  } -  double log_likelihood() const { -    return llh + crp.log_crp_prob(); -  } -  double llh; -  mutable vector<int> v, v1, v2; -  CCRP<vector<int> > crp; -}; - -int main(int argc, char** argv) { -  double tl = 0; -  const int ITERS = 1000; -  const int PARTICLES = 20; -  const int DATAPOINTS = 50; -  WordID x = TD::Convert("souvenons"); -  WordID y = TD::Convert("remember"); -  vector<WordID> src; TD::ConvertSentence("s o u v e n o n s", &src); -  vector<WordID> trg; TD::ConvertSentence("r e m e m b e r", &trg); -//  Transliterations xx; -//  xx.Initialize(x, src, y, trg); -//  return 1; - - for (int j = 0; j < ITERS; ++j) { -  Base b; -  vector<int> segs(DATAPOINTS); -  SampleSet<double> ss; -  vector<int> sss; -  for (int i = 0; i < DATAPOINTS; i++) { -    ss.clear(); -    sss.clear(); -    int x = ((i / 10) % 3) + 1; -    int y = (i % 3) + 1; -    //double ep = b.est_marginal_prob(x,y); -    //cerr << "est p(" << x << "," << y << ") = " << ep << endl; -    for (int n = 0; n < PARTICLES; ++n) { -      double w; -      int seg = b.increment(x,y,&w); -      //cerr << seg << " w=" << w << endl; -      ss.add(w); -      sss.push_back(seg); -      b.decrement(x,y,seg); -    } -    int seg = sss[rng.SelectSample(ss)]; -    b.increment(x, y, seg); -    //cerr << "Selected: " << seg << endl; -    //return 1; -    segs[i] = seg; -  } -  tl += b.log_likelihood(); - } -  cerr << "LLH=" << tl / ITERS << endl; -} - diff --git a/gi/pf/os_phrase.h b/gi/pf/os_phrase.h deleted file mode 100644 index dfe40cb1..00000000 --- a/gi/pf/os_phrase.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef _OS_PHRASE_H_ -#define _OS_PHRASE_H_ - -#include <iostream> -#include <vector> -#include "tdict.h" - -inline std::ostream& operator<<(std::ostream& os, const std::vector<WordID>& p) { -  os << '['; -  for (int i = 0; i < p.size(); ++i) -    os << (i==0 ? "" : " ") << TD::Convert(p[i]); -  return os << ']'; -} - -#endif diff --git a/gi/pf/pf.h b/gi/pf/pf.h deleted file mode 100644 index ede7cda8..00000000 --- a/gi/pf/pf.h +++ /dev/null @@ -1,84 +0,0 @@ -#ifndef _PF_H_ -#define _PF_H_ - -#include <cassert> -#include <vector> -#include "sampler.h" -#include "prob.h" - -template <typename ParticleType> -struct ParticleRenormalizer { -  void operator()(std::vector<ParticleType>* pv) const { -    if (pv->empty()) return; -    prob_t z = prob_t::Zero(); -    for (unsigned i = 0; i < pv->size(); ++i) -      z += (*pv)[i].weight; -    assert(z > prob_t::Zero()); -    for (unsigned i = 0; i < pv->size(); ++i) -      (*pv)[i].weight /= z; -  } -}; - -template <typename ParticleType> -struct MultinomialResampleFilter { -  explicit MultinomialResampleFilter(MT19937* rng) : rng_(rng) {} - -  void operator()(std::vector<ParticleType>* pv) { -    if (pv->empty()) return; -    std::vector<ParticleType>& ps = *pv; -    SampleSet<prob_t> ss; -    for (int i = 0; i < ps.size(); ++i) -      ss.add(ps[i].weight); -    std::vector<ParticleType> nps; nps.reserve(ps.size()); -    const prob_t uniform_weight(1.0 / ps.size()); -    for (int i = 0; i < ps.size(); ++i) { -      nps.push_back(ps[rng_->SelectSample(ss)]); -      nps[i].weight = uniform_weight; -    } -    nps.swap(ps); -  } - - private: -  MT19937* rng_; -}; - -template <typename ParticleType> -struct SystematicResampleFilter { -  explicit SystematicResampleFilter(MT19937* rng) : rng_(rng), renorm_() {} - -  void operator()(std::vector<ParticleType>* pv) { -    if (pv->empty()) return; -    renorm_(pv); -    std::vector<ParticleType>& ps = *pv; -    std::vector<ParticleType> nps; nps.reserve(ps.size()); -    double lower = 0, upper = 0; -    const double skip = 1.0 / ps.size(); -    double u_j = rng_->next() * skip; -    //std::cerr << "u_0: " << u_j << std::endl; -    int j = 0; -    for (unsigned i = 0; i < ps.size(); ++i) { -      upper += ps[i].weight.as_float(); -      //std::cerr << "lower: " << lower << " upper: " << upper << std::endl; -      // how many children does ps[i] have? -      while (u_j < lower) { u_j += skip; ++j; } -      while (u_j >= lower && u_j <= upper) { -        assert(j < ps.size()); -        nps.push_back(ps[i]); -        u_j += skip; -        //std::cerr << " add u_j=" << u_j << std::endl; -        ++j; -      } -      lower = upper; -    } -    //std::cerr << ps.size() << " " << nps.size() << "\n"; -    assert(ps.size() == nps.size()); -    //exit(1); -    ps.swap(nps); -  } - - private: -  MT19937* rng_; -  ParticleRenormalizer<ParticleType> renorm_; -}; - -#endif diff --git a/gi/pf/pf_test.cc b/gi/pf/pf_test.cc deleted file mode 100644 index 296e7285..00000000 --- a/gi/pf/pf_test.cc +++ /dev/null @@ -1,148 +0,0 @@ -#include "ccrp.h" - -#include <vector> -#include <iostream> - -#include "tdict.h" -#include "transliterations.h" - -using namespace std; - -MT19937 rng; - -static bool verbose = false; - -struct Model { - -  Model() : bp(), base(0.2, 0.6) , ccrps(5, CCRP<int>(0.8, 0.5)) {} - -  double p0(int x) const { -    assert(x > 0); -    assert(x < 5); -    return 1.0/4.0; -  } - -  double llh() const { -    double lh = bp + base.log_crp_prob(); -    for (int ctx = 1; ctx < 5; ++ctx) -      lh += ccrps[ctx].log_crp_prob(); -    return lh; -  } - -  double prob(int ctx, int x) const { -    assert(ctx > 0 && ctx < 5); -    return ccrps[ctx].prob(x, base.prob(x, p0(x))); -  } - -  void increment(int ctx, int x) { -    assert(ctx > 0 && ctx < 5); -    if (ccrps[ctx].increment(x, base.prob(x, p0(x)), &rng)) { -      if (base.increment(x, p0(x), &rng)) { -        bp += log(1.0 / 4.0); -      } -    } -  } - -  // this is just a biased estimate -  double est_base_prob(int x) { -    return (x + 1) * x / 40.0; -  } - -  void increment_is(int ctx, int x) { -    assert(ctx > 0 && ctx < 5); -    SampleSet<double> ss; -    const int PARTICLES = 25; -    vector<CCRP<int> > s1s(PARTICLES, CCRP<int>(0.5,0.5)); -    vector<CCRP<int> > sbs(PARTICLES, CCRP<int>(0.5,0.5)); -    vector<double> sp0s(PARTICLES); - -    CCRP<int> s1 = ccrps[ctx]; -    CCRP<int> sb = base; -    double sp0 = bp; -    for (int pp = 0; pp < PARTICLES; ++pp) { -      if (pp > 0) { -        ccrps[ctx] = s1; -        base = sb; -        bp = sp0; -      } - -      double q = 1; -      double gamma = 1; -      double est_p = est_base_prob(x); -      //base.prob(x, p0(x)) + rng.next() * 0.1; -      if (ccrps[ctx].increment(x, est_p, &rng, &q)) { -        gamma = q * base.prob(x, p0(x)); -        q *= est_p; -        if (verbose) cerr << "(DP-base draw) "; -        double qq = -1; -        if (base.increment(x, p0(x), &rng, &qq)) { -          if (verbose) cerr << "(G0 draw) "; -          bp += log(p0(x)); -          qq *= p0(x); -        } -      } else { gamma = q; } -      double w = gamma / q; -      if (verbose) -        cerr << "gamma=" << gamma << " q=" << q << "\tw=" << w << endl; -      ss.add(w); -      s1s[pp] = ccrps[ctx]; -      sbs[pp] = base; -      sp0s[pp] = bp; -    } -    int ps = rng.SelectSample(ss); -    ccrps[ctx] = s1s[ps]; -    base = sbs[ps]; -    bp = sp0s[ps]; -    if (verbose) { -      cerr << "SELECTED: " << ps << endl; -      static int cc = 0; cc++; if (cc ==10) exit(1); -    } -  } - -  void decrement(int ctx, int x) { -    assert(ctx > 0 && ctx < 5); -    if (ccrps[ctx].decrement(x, &rng)) { -      if (base.decrement(x, &rng)) { -        bp -= log(p0(x)); -      } -    } -  } - -  double bp; -  CCRP<int> base; -  vector<CCRP<int> > ccrps; - -}; - -int main(int argc, char** argv) { -  if (argc > 1) { verbose = true; } -  vector<int> counts(15, 0); -  vector<int> tcounts(15, 0); -  int points[] = {1,2, 2,2, 3,2, 4,1, 3, 4, 3, 3, 2, 3, 4, 1, 4, 1, 3, 2, 1, 3, 1, 4, 0, 0}; -  double tlh = 0; -  double tt = 0; -  for (int n = 0; n < 1000; ++n) { -    if (n % 10 == 0) cerr << '.'; -    if ((n+1) % 400 == 0) cerr << " [" << (n+1) << "]\n"; -    Model m; -    for (int *x = points; *x; x += 2) -      m.increment(x[0], x[1]); - -    for (int j = 0; j < 24; ++j) { -      for (int *x = points; *x; x += 2) { -        if (rng.next() < 0.8) { -          m.decrement(x[0], x[1]); -          m.increment_is(x[0], x[1]); -        } -      } -    } -    counts[m.base.num_customers()]++; -    tcounts[m.base.num_tables()]++; -    tlh += m.llh(); -    tt += 1.0; -  } -  cerr << "mean LLH = " << (tlh / tt) << endl; -  for (int i = 0; i < 15; ++i) -    cerr << i << ": " << (counts[i] / tt) << "\t" << (tcounts[i] / tt) << endl; -} - diff --git a/gi/pf/pfbrat.cc b/gi/pf/pfbrat.cc deleted file mode 100644 index 832f22cf..00000000 --- a/gi/pf/pfbrat.cc +++ /dev/null @@ -1,543 +0,0 @@ -#include <iostream> -#include <tr1/memory> -#include <queue> - -#include <boost/functional.hpp> -#include <boost/multi_array.hpp> -#include <boost/program_options.hpp> -#include <boost/program_options/variables_map.hpp> - -#include "viterbi.h" -#include "hg.h" -#include "trule.h" -#include "tdict.h" -#include "filelib.h" -#include "dict.h" -#include "sampler.h" -#include "ccrp_nt.h" -#include "cfg_wfst_composer.h" - -using namespace std; -using namespace tr1; -namespace po = boost::program_options; - -static unsigned kMAX_SRC_PHRASE; -static unsigned kMAX_TRG_PHRASE; -struct FSTState; - -double log_poisson(unsigned x, const double& lambda) { -  assert(lambda > 0.0); -  return log(lambda) * x - lgamma(x + 1) - lambda; -} - -struct ConditionalBase { -  explicit ConditionalBase(const double m1mixture, const unsigned vocab_e_size, const string& model1fname) : -      kM1MIXTURE(m1mixture), -      kUNIFORM_MIXTURE(1.0 - m1mixture), -      kUNIFORM_TARGET(1.0 / vocab_e_size), -      kNULL(TD::Convert("<eps>")) { -    assert(m1mixture >= 0.0 && m1mixture <= 1.0); -    assert(vocab_e_size > 0); -    LoadModel1(model1fname); -  } - -  void LoadModel1(const string& fname) { -    cerr << "Loading Model 1 parameters from " << fname << " ..." << endl; -    ReadFile rf(fname); -    istream& in = *rf.stream(); -    string line; -    unsigned lc = 0; -    while(getline(in, line)) { -      ++lc; -      int cur = 0; -      int start = 0; -      while(cur < line.size() && line[cur] != ' ') { ++cur; } -      assert(cur != line.size()); -      line[cur] = 0; -      const WordID src = TD::Convert(&line[0]); -      ++cur; -      start = cur; -      while(cur < line.size() && line[cur] != ' ') { ++cur; } -      assert(cur != line.size()); -      line[cur] = 0; -      WordID trg = TD::Convert(&line[start]); -      const double logprob = strtod(&line[cur + 1], NULL); -      if (src >= ttable.size()) ttable.resize(src + 1); -      ttable[src][trg].logeq(logprob); -    } -    cerr << "  read " << lc << " parameters.\n"; -  } - -  // return logp0 of rule.e_ | rule.f_ -  prob_t operator()(const TRule& rule) const { -    const int flen = rule.f_.size(); -    const int elen = rule.e_.size(); -    prob_t uniform_src_alignment; uniform_src_alignment.logeq(-log(flen + 1)); -    prob_t p; -    p.logeq(log_poisson(elen, flen + 0.01));       // elen | flen          ~Pois(flen + 0.01) -    for (int i = 0; i < elen; ++i) {               // for each position i in e-RHS -      const WordID trg = rule.e_[i]; -      prob_t tp = prob_t::Zero(); -      for (int j = -1; j < flen; ++j) { -        const WordID src = j < 0 ? kNULL : rule.f_[j]; -        const map<WordID, prob_t>::const_iterator it = ttable[src].find(trg); -        if (it != ttable[src].end()) { -          tp += kM1MIXTURE * it->second; -        } -        tp += kUNIFORM_MIXTURE * kUNIFORM_TARGET; -      } -      tp *= uniform_src_alignment;                 //     draw a_i         ~uniform -      p *= tp;                                     //     draw e_i         ~Model1(f_a_i) / uniform -    } -    return p; -  } - -  const prob_t kM1MIXTURE;  // Model 1 mixture component -  const prob_t kUNIFORM_MIXTURE; // uniform mixture component -  const prob_t kUNIFORM_TARGET; -  const WordID kNULL; -  vector<map<WordID, prob_t> > ttable; -}; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { -  po::options_description opts("Configuration options"); -  opts.add_options() -        ("samples,s",po::value<unsigned>()->default_value(1000),"Number of samples") -        ("input,i",po::value<string>(),"Read parallel data from") -        ("max_src_phrase",po::value<unsigned>()->default_value(3),"Maximum length of source language phrases") -        ("max_trg_phrase",po::value<unsigned>()->default_value(3),"Maximum length of target language phrases") -        ("model1,m",po::value<string>(),"Model 1 parameters (used in base distribution)") -        ("model1_interpolation_weight",po::value<double>()->default_value(0.95),"Mixing proportion of model 1 with uniform target distribution") -        ("random_seed,S",po::value<uint32_t>(), "Random seed"); -  po::options_description clo("Command line options"); -  clo.add_options() -        ("config", po::value<string>(), "Configuration file") -        ("help,h", "Print this help message and exit"); -  po::options_description dconfig_options, dcmdline_options; -  dconfig_options.add(opts); -  dcmdline_options.add(opts).add(clo); -   -  po::store(parse_command_line(argc, argv, dcmdline_options), *conf); -  if (conf->count("config")) { -    ifstream config((*conf)["config"].as<string>().c_str()); -    po::store(po::parse_config_file(config, dconfig_options), *conf); -  } -  po::notify(*conf); - -  if (conf->count("help") || (conf->count("input") == 0)) { -    cerr << dcmdline_options << endl; -    exit(1); -  } -} - -void ReadParallelCorpus(const string& filename, -                vector<vector<WordID> >* f, -                vector<vector<int> >* e, -                set<int>* vocab_f, -                set<int>* vocab_e) { -  f->clear(); -  e->clear(); -  vocab_f->clear(); -  vocab_e->clear(); -  istream* in; -  if (filename == "-") -    in = &cin; -  else -    in = new ifstream(filename.c_str()); -  assert(*in); -  string line; -  const WordID kDIV = TD::Convert("|||"); -  vector<WordID> tmp; -  while(*in) { -    getline(*in, line); -    if (line.empty() && !*in) break; -    e->push_back(vector<int>()); -    f->push_back(vector<int>()); -    vector<int>& le = e->back(); -    vector<int>& lf = f->back(); -    tmp.clear(); -    TD::ConvertSentence(line, &tmp); -    bool isf = true; -    for (unsigned i = 0; i < tmp.size(); ++i) { -      const int cur = tmp[i]; -      if (isf) { -        if (kDIV == cur) { isf = false; } else { -          lf.push_back(cur); -          vocab_f->insert(cur); -        } -      } else { -        assert(cur != kDIV); -        le.push_back(cur); -        vocab_e->insert(cur); -      } -    } -    assert(isf == false); -  } -  if (in != &cin) delete in; -} - -struct UniphraseLM { -  UniphraseLM(const vector<vector<int> >& corpus, -              const set<int>& vocab, -              const po::variables_map& conf) : -    phrases_(1,1), -    gen_(1,1), -    corpus_(corpus), -    uniform_word_(1.0 / vocab.size()), -    gen_p0_(0.5), -    p_end_(0.5), -    use_poisson_(conf.count("poisson_length") > 0) {} - -  void ResampleHyperparameters(MT19937* rng) { -    phrases_.resample_hyperparameters(rng); -    gen_.resample_hyperparameters(rng); -    cerr << " " << phrases_.alpha(); -  } - -  CCRP_NoTable<vector<int> > phrases_; -  CCRP_NoTable<bool> gen_; -  vector<vector<bool> > z_;   // z_[i] is there a phrase boundary after the ith word -  const vector<vector<int> >& corpus_; -  const double uniform_word_; -  const double gen_p0_; -  const double p_end_; // in base length distribution, p of the end of a phrase -  const bool use_poisson_; -}; - -struct Reachability { -  boost::multi_array<bool, 4> edges;  // edges[src_covered][trg_covered][x][trg_delta] is this edge worth exploring? -  boost::multi_array<short, 2> max_src_delta; // msd[src_covered][trg_covered] -- the largest src delta that's valid - -  Reachability(int srclen, int trglen, int src_max_phrase_len, int trg_max_phrase_len) : -      edges(boost::extents[srclen][trglen][src_max_phrase_len+1][trg_max_phrase_len+1]), -      max_src_delta(boost::extents[srclen][trglen]) { -    ComputeReachability(srclen, trglen, src_max_phrase_len, trg_max_phrase_len); -  } - - private: -  struct SState { -    SState() : prev_src_covered(), prev_trg_covered() {} -    SState(int i, int j) : prev_src_covered(i), prev_trg_covered(j) {} -    int prev_src_covered; -    int prev_trg_covered; -  }; - -  struct NState { -    NState() : next_src_covered(), next_trg_covered() {} -    NState(int i, int j) : next_src_covered(i), next_trg_covered(j) {} -    int next_src_covered; -    int next_trg_covered; -  }; - -  void ComputeReachability(int srclen, int trglen, int src_max_phrase_len, int trg_max_phrase_len) { -    typedef boost::multi_array<vector<SState>, 2> array_type; -    array_type a(boost::extents[srclen + 1][trglen + 1]); -    a[0][0].push_back(SState()); -    for (int i = 0; i < srclen; ++i) { -      for (int j = 0; j < trglen; ++j) { -        if (a[i][j].size() == 0) continue; -        const SState prev(i,j); -        for (int k = 1; k <= src_max_phrase_len; ++k) { -          if ((i + k) > srclen) continue; -          for (int l = 1; l <= trg_max_phrase_len; ++l) { -            if ((j + l) > trglen) continue; -            a[i + k][j + l].push_back(prev); -          } -        } -      } -    } -    a[0][0].clear(); -    cerr << "Final cell contains " << a[srclen][trglen].size() << " back pointers\n"; -    assert(a[srclen][trglen].size() > 0); - -    typedef boost::multi_array<bool, 2> rarray_type; -    rarray_type r(boost::extents[srclen + 1][trglen + 1]); -//    typedef boost::multi_array<vector<NState>, 2> narray_type; -//    narray_type b(boost::extents[srclen + 1][trglen + 1]); -    r[srclen][trglen] = true; -    for (int i = srclen; i >= 0; --i) { -      for (int j = trglen; j >= 0; --j) { -        vector<SState>& prevs = a[i][j]; -        if (!r[i][j]) { prevs.clear(); } -//        const NState nstate(i,j); -        for (int k = 0; k < prevs.size(); ++k) { -          r[prevs[k].prev_src_covered][prevs[k].prev_trg_covered] = true; -          int src_delta = i - prevs[k].prev_src_covered; -          edges[prevs[k].prev_src_covered][prevs[k].prev_trg_covered][src_delta][j - prevs[k].prev_trg_covered] = true; -          short &msd = max_src_delta[prevs[k].prev_src_covered][prevs[k].prev_trg_covered]; -          if (src_delta > msd) msd = src_delta; -//          b[prevs[k].prev_src_covered][prevs[k].prev_trg_covered].push_back(nstate); -        } -      } -    } -    assert(!edges[0][0][1][0]); -    assert(!edges[0][0][0][1]); -    assert(!edges[0][0][0][0]); -    cerr << "  MAX SRC DELTA[0][0] = " << max_src_delta[0][0] << endl; -    assert(max_src_delta[0][0] > 0); -    //cerr << "First cell contains " << b[0][0].size() << " forward pointers\n"; -    //for (int i = 0; i < b[0][0].size(); ++i) { -    //  cerr << "  -> (" << b[0][0][i].next_src_covered << "," << b[0][0][i].next_trg_covered << ")\n"; -    //} -  } -}; - -ostream& operator<<(ostream& os, const FSTState& q); -struct FSTState { -  explicit FSTState(int src_size) : -      trg_covered_(), -      src_covered_(), -      src_coverage_(src_size) {} - -  FSTState(short trg_covered, short src_covered, const vector<bool>& src_coverage, const vector<short>& src_prefix) : -      trg_covered_(trg_covered), -      src_covered_(src_covered), -      src_coverage_(src_coverage), -      src_prefix_(src_prefix) { -    if (src_coverage_.size() == src_covered) { -      assert(src_prefix.size() == 0); -    } -  } - -  // if we extend by the word at src_position, what are -  // the next states that are reachable and lie on a valid -  // path to the final state? -  vector<FSTState> Extensions(int src_position, int src_len, int trg_len, const Reachability& r) const { -    assert(src_position < src_coverage_.size()); -    if (src_coverage_[src_position]) { -      cerr << "Trying to extend " << *this << " with position " << src_position << endl; -      abort(); -    } -    vector<bool> ncvg = src_coverage_; -    ncvg[src_position] = true; - -    vector<FSTState> res; -    const int trg_remaining = trg_len - trg_covered_; -    if (trg_remaining <= 0) { -      cerr << "Target appears to have been covered: " << *this << " (trg_len=" << trg_len << ",trg_covered=" << trg_covered_ << ")" << endl; -      abort(); -    } -    const int src_remaining = src_len - src_covered_; -    if (src_remaining <= 0) { -      cerr << "Source appears to have been covered: " << *this << endl; -      abort(); -    } - -    for (int tc = 1; tc <= kMAX_TRG_PHRASE; ++tc) { -      if (r.edges[src_covered_][trg_covered_][src_prefix_.size() + 1][tc]) { -        int nc = src_prefix_.size() + 1 + src_covered_; -        res.push_back(FSTState(trg_covered_ + tc, nc, ncvg, vector<short>())); -      } -    } - -    if ((src_prefix_.size() + 1) < r.max_src_delta[src_covered_][trg_covered_]) { -      vector<short> nsp = src_prefix_; -      nsp.push_back(src_position); -      res.push_back(FSTState(trg_covered_, src_covered_, ncvg, nsp)); -    } - -    if (res.size() == 0) { -      cerr << *this << " can't be extended!\n"; -      abort(); -    } -    return res; -  } - -  short trg_covered_, src_covered_; -  vector<bool> src_coverage_; -  vector<short> src_prefix_; -}; -bool operator<(const FSTState& q, const FSTState& r) { -  if (q.trg_covered_ != r.trg_covered_) return q.trg_covered_ < r.trg_covered_; -  if (q.src_covered_!= r.src_covered_) return q.src_covered_ < r.src_covered_; -  if (q.src_coverage_ != r.src_coverage_) return q.src_coverage_ < r.src_coverage_; -  return q.src_prefix_ < r.src_prefix_; -} - -ostream& operator<<(ostream& os, const FSTState& q) { -  os << "[" << q.trg_covered_ << " : "; -  for (int i = 0; i < q.src_coverage_.size(); ++i) -    os << q.src_coverage_[i]; -  os << " : <"; -  for (int i = 0; i < q.src_prefix_.size(); ++i) { -    if (i != 0) os << ' '; -    os << q.src_prefix_[i]; -  } -  return os << ">]"; -} - -struct MyModel { -  MyModel(ConditionalBase& rcp0) : rp0(rcp0) {} -  typedef unordered_map<vector<WordID>, CCRP_NoTable<TRule>, boost::hash<vector<WordID> > > SrcToRuleCRPMap; - -  void DecrementRule(const TRule& rule) { -    SrcToRuleCRPMap::iterator it = rules.find(rule.f_); -    assert(it != rules.end()); -    it->second.decrement(rule); -    if (it->second.num_customers() == 0) rules.erase(it); -  } - -  void IncrementRule(const TRule& rule) { -    SrcToRuleCRPMap::iterator it = rules.find(rule.f_); -    if (it == rules.end()) { -      CCRP_NoTable<TRule> crp(1,1); -      it = rules.insert(make_pair(rule.f_, crp)).first; -    } -    it->second.increment(rule); -  } - -  // conditioned on rule.f_ -  prob_t RuleConditionalProbability(const TRule& rule) const { -    const prob_t base = rp0(rule); -    SrcToRuleCRPMap::const_iterator it = rules.find(rule.f_); -    if (it == rules.end()) { -      return base; -    } else { -      const double lp = it->second.logprob(rule, log(base)); -      prob_t q; q.logeq(lp); -      return q; -    } -  } - -  const ConditionalBase& rp0; -  SrcToRuleCRPMap rules; -}; - -struct MyFST : public WFST { -  MyFST(const vector<WordID>& ssrc, const vector<WordID>& strg, MyModel* m) : -      src(ssrc), trg(strg), -      r(src.size(),trg.size(),kMAX_SRC_PHRASE, kMAX_TRG_PHRASE), -      model(m) { -    FSTState in(src.size()); -    cerr << " INIT: " << in << endl; -    init = GetNode(in); -    for (int i = 0; i < in.src_coverage_.size(); ++i) in.src_coverage_[i] = true; -    in.src_covered_ = src.size(); -    in.trg_covered_ = trg.size(); -    cerr << "FINAL: " << in << endl; -    final = GetNode(in); -  } -  virtual const WFSTNode* Final() const; -  virtual const WFSTNode* Initial() const; - -  const WFSTNode* GetNode(const FSTState& q); -  map<FSTState, boost::shared_ptr<WFSTNode> > m; -  const vector<WordID>& src; -  const vector<WordID>& trg; -  Reachability r; -  const WFSTNode* init; -  const WFSTNode* final; -  MyModel* model; -}; - -struct MyNode : public WFSTNode { -  MyNode(const FSTState& q, MyFST* fst) : state(q), container(fst) {} -  virtual vector<pair<const WFSTNode*, TRulePtr> > ExtendInput(unsigned srcindex) const; -  const FSTState state; -  mutable MyFST* container; -}; - -vector<pair<const WFSTNode*, TRulePtr> > MyNode::ExtendInput(unsigned srcindex) const { -  cerr << "EXTEND " << state << " with " << srcindex << endl; -  vector<FSTState> ext = state.Extensions(srcindex, container->src.size(), container->trg.size(), container->r); -  vector<pair<const WFSTNode*,TRulePtr> > res(ext.size()); -  for (unsigned i = 0; i < ext.size(); ++i) { -    res[i].first = container->GetNode(ext[i]); -    if (ext[i].src_prefix_.size() == 0) { -      const unsigned trg_from = state.trg_covered_; -      const unsigned trg_to = ext[i].trg_covered_; -      const unsigned prev_prfx_size = state.src_prefix_.size(); -      res[i].second.reset(new TRule); -      res[i].second->lhs_ = -TD::Convert("X"); -      vector<WordID>& src = res[i].second->f_; -      vector<WordID>& trg = res[i].second->e_; -      src.resize(prev_prfx_size + 1); -      for (unsigned j = 0; j < prev_prfx_size; ++j) -        src[j] = container->src[state.src_prefix_[j]]; -      src[prev_prfx_size] = container->src[srcindex]; -      for (unsigned j = trg_from; j < trg_to; ++j) -        trg.push_back(container->trg[j]); -      res[i].second->scores_.set_value(FD::Convert("Proposal"), log(container->model->RuleConditionalProbability(*res[i].second))); -    } -  } -  return res; -} - -const WFSTNode* MyFST::GetNode(const FSTState& q) { -  boost::shared_ptr<WFSTNode>& res = m[q]; -  if (!res) { -    res.reset(new MyNode(q, this)); -  } -  return &*res; -} - -const WFSTNode* MyFST::Final() const { -  return final; -} - -const WFSTNode* MyFST::Initial() const { -  return init; -} - -int main(int argc, char** argv) { -  po::variables_map conf; -  InitCommandLine(argc, argv, &conf); -  kMAX_TRG_PHRASE = conf["max_trg_phrase"].as<unsigned>(); -  kMAX_SRC_PHRASE = conf["max_src_phrase"].as<unsigned>(); - -  if (!conf.count("model1")) { -    cerr << argv[0] << "Please use --model1 to specify model 1 parameters\n"; -    return 1; -  } -  boost::shared_ptr<MT19937> prng; -  if (conf.count("random_seed")) -    prng.reset(new MT19937(conf["random_seed"].as<uint32_t>())); -  else -    prng.reset(new MT19937); -  MT19937& rng = *prng; - -  vector<vector<int> > corpuse, corpusf; -  set<int> vocabe, vocabf; -  ReadParallelCorpus(conf["input"].as<string>(), &corpusf, &corpuse, &vocabf, &vocabe); -  cerr << "f-Corpus size: " << corpusf.size() << " sentences\n"; -  cerr << "f-Vocabulary size: " << vocabf.size() << " types\n"; -  cerr << "f-Corpus size: " << corpuse.size() << " sentences\n"; -  cerr << "f-Vocabulary size: " << vocabe.size() << " types\n"; -  assert(corpusf.size() == corpuse.size()); - -  ConditionalBase lp0(conf["model1_interpolation_weight"].as<double>(), -                      vocabe.size(), -                      conf["model1"].as<string>()); -  MyModel m(lp0); - -  TRule x("[X] ||| kAnwntR myN ||| at the convent ||| 0"); -  m.IncrementRule(x); -  TRule y("[X] ||| nY dyN ||| gave ||| 0"); -  m.IncrementRule(y); - - -  MyFST fst(corpusf[0], corpuse[0], &m); -  ifstream in("./kimura.g"); -  assert(in); -  CFG_WFSTComposer comp(fst); -  Hypergraph hg; -  bool succeed = comp.Compose(&in, &hg); -  hg.PrintGraphviz(); -  if (succeed) { cerr << "SUCCESS.\n"; } else { cerr << "FAILURE REPORTED.\n"; } - -#if 0 -  ifstream in2("./amnabooks.g"); -  assert(in2); -  MyFST fst2(corpusf[1], corpuse[1], &m); -  CFG_WFSTComposer comp2(fst2); -  Hypergraph hg2; -  bool succeed2 = comp2.Compose(&in2, &hg2); -  if (succeed2) { cerr << "SUCCESS.\n"; } else { cerr << "FAILURE REPORTED.\n"; } -#endif - -  SparseVector<double> w; w.set_value(FD::Convert("Proposal"), 1.0); -  hg.Reweight(w); -  cerr << ViterbiFTree(hg) << endl; -  return 0; -} - diff --git a/gi/pf/pfdist.cc b/gi/pf/pfdist.cc deleted file mode 100644 index a3e46064..00000000 --- a/gi/pf/pfdist.cc +++ /dev/null @@ -1,598 +0,0 @@ -#include <iostream> -#include <tr1/memory> -#include <queue> - -#include <boost/functional.hpp> -#include <boost/program_options.hpp> -#include <boost/program_options/variables_map.hpp> - -#include "pf.h" -#include "base_distributions.h" -#include "reachability.h" -#include "viterbi.h" -#include "hg.h" -#include "trule.h" -#include "tdict.h" -#include "filelib.h" -#include "dict.h" -#include "sampler.h" -#include "ccrp_nt.h" -#include "ccrp_onetable.h" - -using namespace std; -using namespace tr1; -namespace po = boost::program_options; - -boost::shared_ptr<MT19937> prng; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { -  po::options_description opts("Configuration options"); -  opts.add_options() -        ("samples,s",po::value<unsigned>()->default_value(1000),"Number of samples") -        ("particles,p",po::value<unsigned>()->default_value(30),"Number of particles") -        ("filter_frequency,f",po::value<unsigned>()->default_value(5),"Number of time steps between filterings") -        ("input,i",po::value<string>(),"Read parallel data from") -        ("max_src_phrase",po::value<unsigned>()->default_value(5),"Maximum length of source language phrases") -        ("max_trg_phrase",po::value<unsigned>()->default_value(5),"Maximum length of target language phrases") -        ("model1,m",po::value<string>(),"Model 1 parameters (used in base distribution)") -        ("inverse_model1,M",po::value<string>(),"Inverse Model 1 parameters (used in backward estimate)") -        ("model1_interpolation_weight",po::value<double>()->default_value(0.95),"Mixing proportion of model 1 with uniform target distribution") -        ("random_seed,S",po::value<uint32_t>(), "Random seed"); -  po::options_description clo("Command line options"); -  clo.add_options() -        ("config", po::value<string>(), "Configuration file") -        ("help,h", "Print this help message and exit"); -  po::options_description dconfig_options, dcmdline_options; -  dconfig_options.add(opts); -  dcmdline_options.add(opts).add(clo); -   -  po::store(parse_command_line(argc, argv, dcmdline_options), *conf); -  if (conf->count("config")) { -    ifstream config((*conf)["config"].as<string>().c_str()); -    po::store(po::parse_config_file(config, dconfig_options), *conf); -  } -  po::notify(*conf); - -  if (conf->count("help") || (conf->count("input") == 0)) { -    cerr << dcmdline_options << endl; -    exit(1); -  } -} - -void ReadParallelCorpus(const string& filename, -                vector<vector<WordID> >* f, -                vector<vector<WordID> >* e, -                set<WordID>* vocab_f, -                set<WordID>* vocab_e) { -  f->clear(); -  e->clear(); -  vocab_f->clear(); -  vocab_e->clear(); -  istream* in; -  if (filename == "-") -    in = &cin; -  else -    in = new ifstream(filename.c_str()); -  assert(*in); -  string line; -  const WordID kDIV = TD::Convert("|||"); -  vector<WordID> tmp; -  while(*in) { -    getline(*in, line); -    if (line.empty() && !*in) break; -    e->push_back(vector<int>()); -    f->push_back(vector<int>()); -    vector<int>& le = e->back(); -    vector<int>& lf = f->back(); -    tmp.clear(); -    TD::ConvertSentence(line, &tmp); -    bool isf = true; -    for (unsigned i = 0; i < tmp.size(); ++i) { -      const int cur = tmp[i]; -      if (isf) { -        if (kDIV == cur) { isf = false; } else { -          lf.push_back(cur); -          vocab_f->insert(cur); -        } -      } else { -        assert(cur != kDIV); -        le.push_back(cur); -        vocab_e->insert(cur); -      } -    } -    assert(isf == false); -  } -  if (in != &cin) delete in; -} - -#if 0 -struct MyConditionalModel { -  MyConditionalModel(PhraseConditionalBase& rcp0) : rp0(&rcp0), base(prob_t::One()), src_phrases(1,1), src_jumps(200, CCRP_NoTable<int>(1,1)) {} - -  prob_t srcp0(const vector<WordID>& src) const { -    prob_t p(1.0 / 3000.0); -    p.poweq(src.size()); -    prob_t lenp; lenp.logeq(log_poisson(src.size(), 1.0)); -    p *= lenp; -    return p; -  } - -  void DecrementRule(const TRule& rule) { -    const RuleCRPMap::iterator it = rules.find(rule.f_); -    assert(it != rules.end()); -    if (it->second.decrement(rule)) { -      base /= (*rp0)(rule); -      if (it->second.num_customers() == 0) -        rules.erase(it); -    } -    if (src_phrases.decrement(rule.f_)) -      base /= srcp0(rule.f_); -  } - -  void IncrementRule(const TRule& rule) { -    RuleCRPMap::iterator it = rules.find(rule.f_); -    if (it == rules.end()) -      it = rules.insert(make_pair(rule.f_, CCRP_NoTable<TRule>(1,1))).first; -    if (it->second.increment(rule)) { -      base *= (*rp0)(rule); -    } -    if (src_phrases.increment(rule.f_)) -      base *= srcp0(rule.f_); -  } - -  void IncrementRules(const vector<TRulePtr>& rules) { -    for (int i = 0; i < rules.size(); ++i) -      IncrementRule(*rules[i]); -  } - -  void DecrementRules(const vector<TRulePtr>& rules) { -    for (int i = 0; i < rules.size(); ++i) -      DecrementRule(*rules[i]); -  } - -  void IncrementJump(int dist, unsigned src_len) { -    assert(src_len > 0); -    if (src_jumps[src_len].increment(dist)) -      base *= jp0(dist, src_len); -  } - -  void DecrementJump(int dist, unsigned src_len) { -    assert(src_len > 0); -    if (src_jumps[src_len].decrement(dist)) -      base /= jp0(dist, src_len); -  } - -  void IncrementJumps(const vector<int>& js, unsigned src_len) { -    for (unsigned i = 0; i < js.size(); ++i) -      IncrementJump(js[i], src_len); -  } - -  void DecrementJumps(const vector<int>& js, unsigned src_len) { -    for (unsigned i = 0; i < js.size(); ++i) -      DecrementJump(js[i], src_len); -  } - -  // p(jump = dist | src_len , z) -  prob_t JumpProbability(int dist, unsigned src_len) { -    const prob_t p0 = jp0(dist, src_len); -    const double lp = src_jumps[src_len].logprob(dist, log(p0)); -    prob_t q; q.logeq(lp); -    return q; -  } - -  // p(rule.f_ | z) * p(rule.e_ | rule.f_ , z) -  prob_t RuleProbability(const TRule& rule) const { -    const prob_t p0 = (*rp0)(rule); -    prob_t srcp; srcp.logeq(src_phrases.logprob(rule.f_, log(srcp0(rule.f_)))); -    const RuleCRPMap::const_iterator it = rules.find(rule.f_); -    if (it == rules.end()) return srcp * p0; -    const double lp = it->second.logprob(rule, log(p0)); -    prob_t q; q.logeq(lp); -    return q * srcp; -  } - -  prob_t Likelihood() const { -    prob_t p = base; -    for (RuleCRPMap::const_iterator it = rules.begin(); -         it != rules.end(); ++it) { -      prob_t cl; cl.logeq(it->second.log_crp_prob()); -      p *= cl; -    } -    for (unsigned l = 1; l < src_jumps.size(); ++l) { -      if (src_jumps[l].num_customers() > 0) { -        prob_t q; -        q.logeq(src_jumps[l].log_crp_prob()); -        p *= q; -      } -    } -    return p; -  } - -  JumpBase jp0; -  const PhraseConditionalBase* rp0; -  prob_t base; -  typedef unordered_map<vector<WordID>, CCRP_NoTable<TRule>, boost::hash<vector<WordID> > > RuleCRPMap; -  RuleCRPMap rules; -  CCRP_NoTable<vector<WordID> > src_phrases; -  vector<CCRP_NoTable<int> > src_jumps; -}; - -#endif - -struct MyJointModel { -  MyJointModel(PhraseJointBase& rcp0) : -    rp0(rcp0), base(prob_t::One()), rules(1,1), src_jumps(200, CCRP_NoTable<int>(1,1)) {} - -  void DecrementRule(const TRule& rule) { -    if (rules.decrement(rule)) -      base /= rp0(rule); -  } - -  void IncrementRule(const TRule& rule) { -    if (rules.increment(rule)) -      base *= rp0(rule); -  } - -  void IncrementRules(const vector<TRulePtr>& rules) { -    for (int i = 0; i < rules.size(); ++i) -      IncrementRule(*rules[i]); -  } - -  void DecrementRules(const vector<TRulePtr>& rules) { -    for (int i = 0; i < rules.size(); ++i) -      DecrementRule(*rules[i]); -  } - -  void IncrementJump(int dist, unsigned src_len) { -    assert(src_len > 0); -    if (src_jumps[src_len].increment(dist)) -      base *= jp0(dist, src_len); -  } - -  void DecrementJump(int dist, unsigned src_len) { -    assert(src_len > 0); -    if (src_jumps[src_len].decrement(dist)) -      base /= jp0(dist, src_len); -  } - -  void IncrementJumps(const vector<int>& js, unsigned src_len) { -    for (unsigned i = 0; i < js.size(); ++i) -      IncrementJump(js[i], src_len); -  } - -  void DecrementJumps(const vector<int>& js, unsigned src_len) { -    for (unsigned i = 0; i < js.size(); ++i) -      DecrementJump(js[i], src_len); -  } - -  // p(jump = dist | src_len , z) -  prob_t JumpProbability(int dist, unsigned src_len) { -    const prob_t p0 = jp0(dist, src_len); -    const double lp = src_jumps[src_len].logprob(dist, log(p0)); -    prob_t q; q.logeq(lp); -    return q; -  } - -  // p(rule.f_ | z) * p(rule.e_ | rule.f_ , z) -  prob_t RuleProbability(const TRule& rule) const { -    prob_t p; p.logeq(rules.logprob(rule, log(rp0(rule)))); -    return p; -  } - -  prob_t Likelihood() const { -    prob_t p = base; -    prob_t q; q.logeq(rules.log_crp_prob()); -    p *= q; -    for (unsigned l = 1; l < src_jumps.size(); ++l) { -      if (src_jumps[l].num_customers() > 0) { -        prob_t q; -        q.logeq(src_jumps[l].log_crp_prob()); -        p *= q; -      } -    } -    return p; -  } - -  JumpBase jp0; -  const PhraseJointBase& rp0; -  prob_t base; -  CCRP_NoTable<TRule> rules; -  vector<CCRP_NoTable<int> > src_jumps; -}; - -struct BackwardEstimate { -  BackwardEstimate(const Model1& m1, const vector<WordID>& src, const vector<WordID>& trg) : -      model1_(m1), src_(src), trg_(trg) { -  } -  const prob_t& operator()(const vector<bool>& src_cov, unsigned trg_cov) const { -    assert(src_.size() == src_cov.size()); -    assert(trg_cov <= trg_.size()); -    prob_t& e = cache_[src_cov][trg_cov]; -    if (e.is_0()) { -      if (trg_cov == trg_.size()) { e = prob_t::One(); return e; } -      vector<WordID> r(src_.size() + 1); r.clear(); -      r.push_back(0);  // NULL word -      for (int i = 0; i < src_cov.size(); ++i) -        if (!src_cov[i]) r.push_back(src_[i]); -      const prob_t uniform_alignment(1.0 / r.size()); -      e.logeq(Md::log_poisson(trg_.size() - trg_cov, r.size() - 1)); // p(trg len remaining | src len remaining) -      for (unsigned j = trg_cov; j < trg_.size(); ++j) { -        prob_t p; -        for (unsigned i = 0; i < r.size(); ++i) -          p += model1_(r[i], trg_[j]); -        if (p.is_0()) { -          cerr << "ERROR: p(" << TD::Convert(trg_[j]) << " | " << TD::GetString(r) << ") = 0!\n"; -          abort(); -        } -        p *= uniform_alignment; -        e *= p; -      } -    } -    return e; -  } -  const Model1& model1_; -  const vector<WordID>& src_; -  const vector<WordID>& trg_; -  mutable unordered_map<vector<bool>, map<unsigned, prob_t>, boost::hash<vector<bool> > > cache_; -}; - -struct BackwardEstimateSym { -  BackwardEstimateSym(const Model1& m1, -                      const Model1& invm1, const vector<WordID>& src, const vector<WordID>& trg) : -      model1_(m1), invmodel1_(invm1), src_(src), trg_(trg) { -  } -  const prob_t& operator()(const vector<bool>& src_cov, unsigned trg_cov) const { -    assert(src_.size() == src_cov.size()); -    assert(trg_cov <= trg_.size()); -    prob_t& e = cache_[src_cov][trg_cov]; -    if (e.is_0()) { -      if (trg_cov == trg_.size()) { e = prob_t::One(); return e; } -      vector<WordID> r(src_.size() + 1); r.clear(); -      for (int i = 0; i < src_cov.size(); ++i) -        if (!src_cov[i]) r.push_back(src_[i]); -      r.push_back(0);  // NULL word -      const prob_t uniform_alignment(1.0 / r.size()); -      e.logeq(Md::log_poisson(trg_.size() - trg_cov, r.size() - 1)); // p(trg len remaining | src len remaining) -      for (unsigned j = trg_cov; j < trg_.size(); ++j) { -        prob_t p; -        for (unsigned i = 0; i < r.size(); ++i) -          p += model1_(r[i], trg_[j]); -        if (p.is_0()) { -          cerr << "ERROR: p(" << TD::Convert(trg_[j]) << " | " << TD::GetString(r) << ") = 0!\n"; -          abort(); -        } -        p *= uniform_alignment; -        e *= p; -      } -      r.pop_back(); -      const prob_t inv_uniform(1.0 / (trg_.size() - trg_cov + 1.0)); -      prob_t inv; -      inv.logeq(Md::log_poisson(r.size(), trg_.size() - trg_cov)); -      for (unsigned i = 0; i < r.size(); ++i) { -        prob_t p; -        for (unsigned j = trg_cov - 1; j < trg_.size(); ++j) -          p += invmodel1_(j < trg_cov ? 0 : trg_[j], r[i]); -        if (p.is_0()) { -          cerr << "ERROR: p_inv(" << TD::Convert(r[i]) << " | " << TD::GetString(trg_) << ") = 0!\n"; -          abort(); -        } -        p *= inv_uniform; -        inv *= p; -      } -      prob_t x = pow(e * inv, 0.5); -      e = x; -      //cerr << "Forward: " << log(e) << "\tBackward: " << log(inv) << "\t prop: " << log(x) << endl; -    } -    return e; -  } -  const Model1& model1_; -  const Model1& invmodel1_; -  const vector<WordID>& src_; -  const vector<WordID>& trg_; -  mutable unordered_map<vector<bool>, map<unsigned, prob_t>, boost::hash<vector<bool> > > cache_; -}; - -struct Particle { -  Particle() : weight(prob_t::One()), src_cov(), trg_cov(), prev_pos(-1) {} -  prob_t weight; -  prob_t gamma_last; -  vector<int> src_jumps; -  vector<TRulePtr> rules; -  vector<bool> src_cv; -  int src_cov; -  int trg_cov; -  int prev_pos; -}; - -ostream& operator<<(ostream& o, const vector<bool>& v) { -  for (int i = 0; i < v.size(); ++i) -    o << (v[i] ? '1' : '0'); -  return o; -} -ostream& operator<<(ostream& o, const Particle& p) { -  o << "[cv=" << p.src_cv << "  src_cov=" << p.src_cov << " trg_cov=" << p.trg_cov << " last_pos=" << p.prev_pos << " num_rules=" << p.rules.size() << "  w=" << log(p.weight) << ']'; -  return o; -} - -int main(int argc, char** argv) { -  po::variables_map conf; -  InitCommandLine(argc, argv, &conf); -  const unsigned kMAX_TRG_PHRASE = conf["max_trg_phrase"].as<unsigned>(); -  const unsigned kMAX_SRC_PHRASE = conf["max_src_phrase"].as<unsigned>(); -  const unsigned particles = conf["particles"].as<unsigned>(); -  const unsigned samples = conf["samples"].as<unsigned>(); -  const unsigned rejuv_freq = conf["filter_frequency"].as<unsigned>(); - -  if (!conf.count("model1")) { -    cerr << argv[0] << "Please use --model1 to specify model 1 parameters\n"; -    return 1; -  } -  if (conf.count("random_seed")) -    prng.reset(new MT19937(conf["random_seed"].as<uint32_t>())); -  else -    prng.reset(new MT19937); -  MT19937& rng = *prng; - -  vector<vector<WordID> > corpuse, corpusf; -  set<WordID> vocabe, vocabf; -  cerr << "Reading corpus...\n"; -  ReadParallelCorpus(conf["input"].as<string>(), &corpusf, &corpuse, &vocabf, &vocabe); -  cerr << "F-corpus size: " << corpusf.size() << " sentences\t (" << vocabf.size() << " word types)\n"; -  cerr << "E-corpus size: " << corpuse.size() << " sentences\t (" << vocabe.size() << " word types)\n"; -  assert(corpusf.size() == corpuse.size()); - -  const int kLHS = -TD::Convert("X"); -  Model1 m1(conf["model1"].as<string>()); -  Model1 invm1(conf["inverse_model1"].as<string>()); - -#if 0 -  PhraseConditionalBase lp0(m1, conf["model1_interpolation_weight"].as<double>(), vocabe.size()); -  MyConditionalModel m(lp0); -#else -  PhraseJointBase lp0(m1, conf["model1_interpolation_weight"].as<double>(), vocabe.size(), vocabf.size()); -  MyJointModel m(lp0); -#endif - -  MultinomialResampleFilter<Particle> filter(&rng); -  cerr << "Initializing reachability limits...\n"; -  vector<Particle> ps(corpusf.size()); -  vector<Reachability> reaches; reaches.reserve(corpusf.size()); -  for (int ci = 0; ci < corpusf.size(); ++ci) -    reaches.push_back(Reachability(corpusf[ci].size(), -                                   corpuse[ci].size(), -                                   kMAX_SRC_PHRASE, -                                   kMAX_TRG_PHRASE)); -  cerr << "Sampling...\n";  -  vector<Particle> tmp_p(10000);  // work space -  SampleSet<prob_t> pfss; -  for (int SS=0; SS < samples; ++SS) { -    for (int ci = 0; ci < corpusf.size(); ++ci) { -      vector<int>& src = corpusf[ci]; -      vector<int>& trg = corpuse[ci]; -      m.DecrementRules(ps[ci].rules); -      m.DecrementJumps(ps[ci].src_jumps, src.size()); - -      //BackwardEstimate be(m1, src, trg); -      BackwardEstimateSym be(m1, invm1, src, trg); -      const Reachability& r = reaches[ci]; -      vector<Particle> lps(particles); - -      for (int pi = 0; pi < particles; ++pi) { -        Particle& p = lps[pi]; -        p.src_cv.resize(src.size(), false); -      } - -      bool all_complete = false; -      while(!all_complete) { -        SampleSet<prob_t> ss; - -        // all particles have now been extended a bit, we will reweight them now -        if (lps[0].trg_cov > 0) -          filter(&lps); - -        // loop over all particles and extend them -        bool done_nothing = true; -        for (int pi = 0; pi < particles; ++pi) { -          Particle& p = lps[pi]; -          int tic = 0; -          while(p.trg_cov < trg.size() && tic < rejuv_freq) { -            ++tic; -            done_nothing = false; -            ss.clear(); -            TRule x; x.lhs_ = kLHS; -            prob_t z; -            int first_uncovered = src.size(); -            int last_uncovered = -1; -            for (int i = 0; i < src.size(); ++i) { -              const bool is_uncovered = !p.src_cv[i]; -              if (i < first_uncovered && is_uncovered) first_uncovered = i; -              if (is_uncovered && i > last_uncovered) last_uncovered = i; -            } -            assert(last_uncovered > -1); -            assert(first_uncovered < src.size()); - -            for (int trg_len = 1; trg_len <= kMAX_TRG_PHRASE; ++trg_len) { -              x.e_.push_back(trg[trg_len - 1 + p.trg_cov]); -              for (int src_len = 1; src_len <= kMAX_SRC_PHRASE; ++src_len) { -                if (!r.edges[p.src_cov][p.trg_cov][src_len][trg_len]) continue; - -                const int last_possible_start = last_uncovered - src_len + 1; -                assert(last_possible_start >= 0); -                //cerr << src_len << "," << trg_len << " is allowed. E=" << TD::GetString(x.e_) << endl; -                //cerr << "  first_uncovered=" << first_uncovered << "  last_possible_start=" << last_possible_start << endl; -                for (int i = first_uncovered; i <= last_possible_start; ++i) { -                  if (p.src_cv[i]) continue; -                  assert(ss.size() < tmp_p.size());  // if fails increase tmp_p size -                  Particle& np = tmp_p[ss.size()]; -                  np = p; -                  x.f_.clear(); -                  int gap_add = 0; -                  bool bad = false; -                  prob_t jp = prob_t::One(); -                  int prev_pos = p.prev_pos; -                  for (int j = 0; j < src_len; ++j) { -                    if ((j + i + gap_add) == src.size()) { bad = true; break; } -                    while ((i+j+gap_add) < src.size() && p.src_cv[i + j + gap_add]) { ++gap_add; } -                    if ((j + i + gap_add) == src.size()) { bad = true; break; } -                    np.src_cv[i + j + gap_add] = true; -                    x.f_.push_back(src[i + j + gap_add]); -                    jp *= m.JumpProbability(i + j + gap_add - prev_pos, src.size()); -                    int jump = i + j + gap_add - prev_pos; -                    assert(jump != 0); -                    np.src_jumps.push_back(jump); -                    prev_pos = i + j + gap_add; -                  } -                  if (bad) continue; -                  np.prev_pos = prev_pos; -                  np.src_cov += x.f_.size(); -                  np.trg_cov += x.e_.size(); -                  if (x.f_.size() != src_len) continue; -                  prob_t rp = m.RuleProbability(x); -                  np.gamma_last = rp * jp; -                  const prob_t u = pow(np.gamma_last * be(np.src_cv, np.trg_cov), 0.2); -                  //cerr << "**rule=" << x << endl; -                  //cerr << "  u=" << log(u) << "  rule=" << rp << " jump=" << jp << endl; -                  ss.add(u); -                  np.rules.push_back(TRulePtr(new TRule(x))); -                  z += u; - -                  const bool completed = (p.trg_cov == trg.size()); -                  if (completed) { -                    int last_jump = src.size() - p.prev_pos; -                    assert(last_jump > 0); -                    p.src_jumps.push_back(last_jump); -                    p.weight *= m.JumpProbability(last_jump, src.size()); -                  } -                } -              } -            } -            cerr << "number of edges to consider: " << ss.size() << endl; -            const int sampled = rng.SelectSample(ss); -            prob_t q_n = ss[sampled] / z; -            p = tmp_p[sampled]; -            //m.IncrementRule(*p.rules.back()); -            p.weight *= p.gamma_last / q_n; -            cerr << "[w=" << log(p.weight) << "]\tsampled rule: " << p.rules.back()->AsString() << endl; -            cerr << p << endl; -          } -        } // loop over particles (pi = 0 .. particles) -        if (done_nothing) all_complete = true; -      } -      pfss.clear(); -      for (int i = 0; i < lps.size(); ++i) -        pfss.add(lps[i].weight); -      const int sampled = rng.SelectSample(pfss); -      ps[ci] = lps[sampled]; -      m.IncrementRules(lps[sampled].rules); -      m.IncrementJumps(lps[sampled].src_jumps, src.size()); -      for (int i = 0; i < lps[sampled].rules.size(); ++i) { cerr << "S:\t" << lps[sampled].rules[i]->AsString() << "\n"; } -      cerr << "tmp-LLH: " << log(m.Likelihood()) << endl; -    } -    cerr << "LLH: " << log(m.Likelihood()) << endl; -    for (int sni = 0; sni < 5; ++sni) { -      for (int i = 0; i < ps[sni].rules.size(); ++i) { cerr << "\t" << ps[sni].rules[i]->AsString() << endl; } -    } -  } -  return 0; -} - diff --git a/gi/pf/pfdist.new.cc b/gi/pf/pfdist.new.cc deleted file mode 100644 index 3169eb75..00000000 --- a/gi/pf/pfdist.new.cc +++ /dev/null @@ -1,620 +0,0 @@ -#include <iostream> -#include <tr1/memory> -#include <queue> - -#include <boost/functional.hpp> -#include <boost/program_options.hpp> -#include <boost/program_options/variables_map.hpp> - -#include "base_measures.h" -#include "reachability.h" -#include "viterbi.h" -#include "hg.h" -#include "trule.h" -#include "tdict.h" -#include "filelib.h" -#include "dict.h" -#include "sampler.h" -#include "ccrp_nt.h" -#include "ccrp_onetable.h" - -using namespace std; -using namespace tr1; -namespace po = boost::program_options; - -shared_ptr<MT19937> prng; - -size_t hash_value(const TRule& r) { -  size_t h = boost::hash_value(r.e_); -  boost::hash_combine(h, -r.lhs_); -  boost::hash_combine(h, boost::hash_value(r.f_)); -  return h; -} - -bool operator==(const TRule& a, const TRule& b) { -  return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_); -} - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { -  po::options_description opts("Configuration options"); -  opts.add_options() -        ("samples,s",po::value<unsigned>()->default_value(1000),"Number of samples") -        ("particles,p",po::value<unsigned>()->default_value(25),"Number of particles") -        ("input,i",po::value<string>(),"Read parallel data from") -        ("max_src_phrase",po::value<unsigned>()->default_value(5),"Maximum length of source language phrases") -        ("max_trg_phrase",po::value<unsigned>()->default_value(5),"Maximum length of target language phrases") -        ("model1,m",po::value<string>(),"Model 1 parameters (used in base distribution)") -        ("inverse_model1,M",po::value<string>(),"Inverse Model 1 parameters (used in backward estimate)") -        ("model1_interpolation_weight",po::value<double>()->default_value(0.95),"Mixing proportion of model 1 with uniform target distribution") -        ("random_seed,S",po::value<uint32_t>(), "Random seed"); -  po::options_description clo("Command line options"); -  clo.add_options() -        ("config", po::value<string>(), "Configuration file") -        ("help,h", "Print this help message and exit"); -  po::options_description dconfig_options, dcmdline_options; -  dconfig_options.add(opts); -  dcmdline_options.add(opts).add(clo); -   -  po::store(parse_command_line(argc, argv, dcmdline_options), *conf); -  if (conf->count("config")) { -    ifstream config((*conf)["config"].as<string>().c_str()); -    po::store(po::parse_config_file(config, dconfig_options), *conf); -  } -  po::notify(*conf); - -  if (conf->count("help") || (conf->count("input") == 0)) { -    cerr << dcmdline_options << endl; -    exit(1); -  } -} - -void ReadParallelCorpus(const string& filename, -                vector<vector<WordID> >* f, -                vector<vector<WordID> >* e, -                set<WordID>* vocab_f, -                set<WordID>* vocab_e) { -  f->clear(); -  e->clear(); -  vocab_f->clear(); -  vocab_e->clear(); -  istream* in; -  if (filename == "-") -    in = &cin; -  else -    in = new ifstream(filename.c_str()); -  assert(*in); -  string line; -  const WordID kDIV = TD::Convert("|||"); -  vector<WordID> tmp; -  while(*in) { -    getline(*in, line); -    if (line.empty() && !*in) break; -    e->push_back(vector<int>()); -    f->push_back(vector<int>()); -    vector<int>& le = e->back(); -    vector<int>& lf = f->back(); -    tmp.clear(); -    TD::ConvertSentence(line, &tmp); -    bool isf = true; -    for (unsigned i = 0; i < tmp.size(); ++i) { -      const int cur = tmp[i]; -      if (isf) { -        if (kDIV == cur) { isf = false; } else { -          lf.push_back(cur); -          vocab_f->insert(cur); -        } -      } else { -        assert(cur != kDIV); -        le.push_back(cur); -        vocab_e->insert(cur); -      } -    } -    assert(isf == false); -  } -  if (in != &cin) delete in; -} - -#if 0 -struct MyConditionalModel { -  MyConditionalModel(PhraseConditionalBase& rcp0) : rp0(&rcp0), base(prob_t::One()), src_phrases(1,1), src_jumps(200, CCRP_NoTable<int>(1,1)) {} - -  prob_t srcp0(const vector<WordID>& src) const { -    prob_t p(1.0 / 3000.0); -    p.poweq(src.size()); -    prob_t lenp; lenp.logeq(log_poisson(src.size(), 1.0)); -    p *= lenp; -    return p; -  } - -  void DecrementRule(const TRule& rule) { -    const RuleCRPMap::iterator it = rules.find(rule.f_); -    assert(it != rules.end()); -    if (it->second.decrement(rule)) { -      base /= (*rp0)(rule); -      if (it->second.num_customers() == 0) -        rules.erase(it); -    } -    if (src_phrases.decrement(rule.f_)) -      base /= srcp0(rule.f_); -  } - -  void IncrementRule(const TRule& rule) { -    RuleCRPMap::iterator it = rules.find(rule.f_); -    if (it == rules.end()) -      it = rules.insert(make_pair(rule.f_, CCRP_NoTable<TRule>(1,1))).first; -    if (it->second.increment(rule)) { -      base *= (*rp0)(rule); -    } -    if (src_phrases.increment(rule.f_)) -      base *= srcp0(rule.f_); -  } - -  void IncrementRules(const vector<TRulePtr>& rules) { -    for (int i = 0; i < rules.size(); ++i) -      IncrementRule(*rules[i]); -  } - -  void DecrementRules(const vector<TRulePtr>& rules) { -    for (int i = 0; i < rules.size(); ++i) -      DecrementRule(*rules[i]); -  } - -  void IncrementJump(int dist, unsigned src_len) { -    assert(src_len > 0); -    if (src_jumps[src_len].increment(dist)) -      base *= jp0(dist, src_len); -  } - -  void DecrementJump(int dist, unsigned src_len) { -    assert(src_len > 0); -    if (src_jumps[src_len].decrement(dist)) -      base /= jp0(dist, src_len); -  } - -  void IncrementJumps(const vector<int>& js, unsigned src_len) { -    for (unsigned i = 0; i < js.size(); ++i) -      IncrementJump(js[i], src_len); -  } - -  void DecrementJumps(const vector<int>& js, unsigned src_len) { -    for (unsigned i = 0; i < js.size(); ++i) -      DecrementJump(js[i], src_len); -  } - -  // p(jump = dist | src_len , z) -  prob_t JumpProbability(int dist, unsigned src_len) { -    const prob_t p0 = jp0(dist, src_len); -    const double lp = src_jumps[src_len].logprob(dist, log(p0)); -    prob_t q; q.logeq(lp); -    return q; -  } - -  // p(rule.f_ | z) * p(rule.e_ | rule.f_ , z) -  prob_t RuleProbability(const TRule& rule) const { -    const prob_t p0 = (*rp0)(rule); -    prob_t srcp; srcp.logeq(src_phrases.logprob(rule.f_, log(srcp0(rule.f_)))); -    const RuleCRPMap::const_iterator it = rules.find(rule.f_); -    if (it == rules.end()) return srcp * p0; -    const double lp = it->second.logprob(rule, log(p0)); -    prob_t q; q.logeq(lp); -    return q * srcp; -  } - -  prob_t Likelihood() const { -    prob_t p = base; -    for (RuleCRPMap::const_iterator it = rules.begin(); -         it != rules.end(); ++it) { -      prob_t cl; cl.logeq(it->second.log_crp_prob()); -      p *= cl; -    } -    for (unsigned l = 1; l < src_jumps.size(); ++l) { -      if (src_jumps[l].num_customers() > 0) { -        prob_t q; -        q.logeq(src_jumps[l].log_crp_prob()); -        p *= q; -      } -    } -    return p; -  } - -  JumpBase jp0; -  const PhraseConditionalBase* rp0; -  prob_t base; -  typedef unordered_map<vector<WordID>, CCRP_NoTable<TRule>, boost::hash<vector<WordID> > > RuleCRPMap; -  RuleCRPMap rules; -  CCRP_NoTable<vector<WordID> > src_phrases; -  vector<CCRP_NoTable<int> > src_jumps; -}; - -#endif - -struct MyJointModel { -  MyJointModel(PhraseJointBase& rcp0) : -    rp0(rcp0), base(prob_t::One()), rules(1,1), src_jumps(200, CCRP_NoTable<int>(1,1)) {} - -  void DecrementRule(const TRule& rule) { -    if (rules.decrement(rule)) -      base /= rp0(rule); -  } - -  void IncrementRule(const TRule& rule) { -    if (rules.increment(rule)) -      base *= rp0(rule); -  } - -  void IncrementRules(const vector<TRulePtr>& rules) { -    for (int i = 0; i < rules.size(); ++i) -      IncrementRule(*rules[i]); -  } - -  void DecrementRules(const vector<TRulePtr>& rules) { -    for (int i = 0; i < rules.size(); ++i) -      DecrementRule(*rules[i]); -  } - -  void IncrementJump(int dist, unsigned src_len) { -    assert(src_len > 0); -    if (src_jumps[src_len].increment(dist)) -      base *= jp0(dist, src_len); -  } - -  void DecrementJump(int dist, unsigned src_len) { -    assert(src_len > 0); -    if (src_jumps[src_len].decrement(dist)) -      base /= jp0(dist, src_len); -  } - -  void IncrementJumps(const vector<int>& js, unsigned src_len) { -    for (unsigned i = 0; i < js.size(); ++i) -      IncrementJump(js[i], src_len); -  } - -  void DecrementJumps(const vector<int>& js, unsigned src_len) { -    for (unsigned i = 0; i < js.size(); ++i) -      DecrementJump(js[i], src_len); -  } - -  // p(jump = dist | src_len , z) -  prob_t JumpProbability(int dist, unsigned src_len) { -    const prob_t p0 = jp0(dist, src_len); -    const double lp = src_jumps[src_len].logprob(dist, log(p0)); -    prob_t q; q.logeq(lp); -    return q; -  } - -  // p(rule.f_ | z) * p(rule.e_ | rule.f_ , z) -  prob_t RuleProbability(const TRule& rule) const { -    prob_t p; p.logeq(rules.logprob(rule, log(rp0(rule)))); -    return p; -  } - -  prob_t Likelihood() const { -    prob_t p = base; -    prob_t q; q.logeq(rules.log_crp_prob()); -    p *= q; -    for (unsigned l = 1; l < src_jumps.size(); ++l) { -      if (src_jumps[l].num_customers() > 0) { -        prob_t q; -        q.logeq(src_jumps[l].log_crp_prob()); -        p *= q; -      } -    } -    return p; -  } - -  JumpBase jp0; -  const PhraseJointBase& rp0; -  prob_t base; -  CCRP_NoTable<TRule> rules; -  vector<CCRP_NoTable<int> > src_jumps; -}; - -struct BackwardEstimate { -  BackwardEstimate(const Model1& m1, const vector<WordID>& src, const vector<WordID>& trg) : -      model1_(m1), src_(src), trg_(trg) { -  } -  const prob_t& operator()(const vector<bool>& src_cov, unsigned trg_cov) const { -    assert(src_.size() == src_cov.size()); -    assert(trg_cov <= trg_.size()); -    prob_t& e = cache_[src_cov][trg_cov]; -    if (e.is_0()) { -      if (trg_cov == trg_.size()) { e = prob_t::One(); return e; } -      vector<WordID> r(src_.size() + 1); r.clear(); -      r.push_back(0);  // NULL word -      for (int i = 0; i < src_cov.size(); ++i) -        if (!src_cov[i]) r.push_back(src_[i]); -      const prob_t uniform_alignment(1.0 / r.size()); -      e.logeq(log_poisson(trg_.size() - trg_cov, r.size() - 1)); // p(trg len remaining | src len remaining) -      for (unsigned j = trg_cov; j < trg_.size(); ++j) { -        prob_t p; -        for (unsigned i = 0; i < r.size(); ++i) -          p += model1_(r[i], trg_[j]); -        if (p.is_0()) { -          cerr << "ERROR: p(" << TD::Convert(trg_[j]) << " | " << TD::GetString(r) << ") = 0!\n"; -          abort(); -        } -        p *= uniform_alignment; -        e *= p; -      } -    } -    return e; -  } -  const Model1& model1_; -  const vector<WordID>& src_; -  const vector<WordID>& trg_; -  mutable unordered_map<vector<bool>, map<unsigned, prob_t>, boost::hash<vector<bool> > > cache_; -}; - -struct BackwardEstimateSym { -  BackwardEstimateSym(const Model1& m1, -                      const Model1& invm1, const vector<WordID>& src, const vector<WordID>& trg) : -      model1_(m1), invmodel1_(invm1), src_(src), trg_(trg) { -  } -  const prob_t& operator()(const vector<bool>& src_cov, unsigned trg_cov) const { -    assert(src_.size() == src_cov.size()); -    assert(trg_cov <= trg_.size()); -    prob_t& e = cache_[src_cov][trg_cov]; -    if (e.is_0()) { -      if (trg_cov == trg_.size()) { e = prob_t::One(); return e; } -      vector<WordID> r(src_.size() + 1); r.clear(); -      for (int i = 0; i < src_cov.size(); ++i) -        if (!src_cov[i]) r.push_back(src_[i]); -      r.push_back(0);  // NULL word -      const prob_t uniform_alignment(1.0 / r.size()); -      e.logeq(log_poisson(trg_.size() - trg_cov, r.size() - 1)); // p(trg len remaining | src len remaining) -      for (unsigned j = trg_cov; j < trg_.size(); ++j) { -        prob_t p; -        for (unsigned i = 0; i < r.size(); ++i) -          p += model1_(r[i], trg_[j]); -        if (p.is_0()) { -          cerr << "ERROR: p(" << TD::Convert(trg_[j]) << " | " << TD::GetString(r) << ") = 0!\n"; -          abort(); -        } -        p *= uniform_alignment; -        e *= p; -      } -      r.pop_back(); -      const prob_t inv_uniform(1.0 / (trg_.size() - trg_cov + 1.0)); -      prob_t inv; -      inv.logeq(log_poisson(r.size(), trg_.size() - trg_cov)); -      for (unsigned i = 0; i < r.size(); ++i) { -        prob_t p; -        for (unsigned j = trg_cov - 1; j < trg_.size(); ++j) -          p += invmodel1_(j < trg_cov ? 0 : trg_[j], r[i]); -        if (p.is_0()) { -          cerr << "ERROR: p_inv(" << TD::Convert(r[i]) << " | " << TD::GetString(trg_) << ") = 0!\n"; -          abort(); -        } -        p *= inv_uniform; -        inv *= p; -      } -      prob_t x = pow(e * inv, 0.5); -      e = x; -      //cerr << "Forward: " << log(e) << "\tBackward: " << log(inv) << "\t prop: " << log(x) << endl; -    } -    return e; -  } -  const Model1& model1_; -  const Model1& invmodel1_; -  const vector<WordID>& src_; -  const vector<WordID>& trg_; -  mutable unordered_map<vector<bool>, map<unsigned, prob_t>, boost::hash<vector<bool> > > cache_; -}; - -struct Particle { -  Particle() : weight(prob_t::One()), src_cov(), trg_cov(), prev_pos(-1) {} -  prob_t weight; -  prob_t gamma_last; -  vector<int> src_jumps; -  vector<TRulePtr> rules; -  vector<bool> src_cv; -  int src_cov; -  int trg_cov; -  int prev_pos; -}; - -ostream& operator<<(ostream& o, const vector<bool>& v) { -  for (int i = 0; i < v.size(); ++i) -    o << (v[i] ? '1' : '0'); -  return o; -} -ostream& operator<<(ostream& o, const Particle& p) { -  o << "[cv=" << p.src_cv << "  src_cov=" << p.src_cov << " trg_cov=" << p.trg_cov << " last_pos=" << p.prev_pos << " num_rules=" << p.rules.size() << "  w=" << log(p.weight) << ']'; -  return o; -} - -void FilterCrapParticlesAndReweight(vector<Particle>* pps) { -  vector<Particle>& ps = *pps; -  SampleSet<prob_t> ss; -  for (int i = 0; i < ps.size(); ++i) -    ss.add(ps[i].weight); -  vector<Particle> nps; nps.reserve(ps.size()); -  const prob_t uniform_weight(1.0 / ps.size()); -  for (int i = 0; i < ps.size(); ++i) { -    nps.push_back(ps[prng->SelectSample(ss)]); -    nps[i].weight = uniform_weight; -  } -  nps.swap(ps); -} - -int main(int argc, char** argv) { -  po::variables_map conf; -  InitCommandLine(argc, argv, &conf); -  const unsigned kMAX_TRG_PHRASE = conf["max_trg_phrase"].as<unsigned>(); -  const unsigned kMAX_SRC_PHRASE = conf["max_src_phrase"].as<unsigned>(); -  const unsigned particles = conf["particles"].as<unsigned>(); -  const unsigned samples = conf["samples"].as<unsigned>(); - -  if (!conf.count("model1")) { -    cerr << argv[0] << "Please use --model1 to specify model 1 parameters\n"; -    return 1; -  } -  if (conf.count("random_seed")) -    prng.reset(new MT19937(conf["random_seed"].as<uint32_t>())); -  else -    prng.reset(new MT19937); -  MT19937& rng = *prng; - -  vector<vector<WordID> > corpuse, corpusf; -  set<WordID> vocabe, vocabf; -  cerr << "Reading corpus...\n"; -  ReadParallelCorpus(conf["input"].as<string>(), &corpusf, &corpuse, &vocabf, &vocabe); -  cerr << "F-corpus size: " << corpusf.size() << " sentences\t (" << vocabf.size() << " word types)\n"; -  cerr << "E-corpus size: " << corpuse.size() << " sentences\t (" << vocabe.size() << " word types)\n"; -  assert(corpusf.size() == corpuse.size()); - -  const int kLHS = -TD::Convert("X"); -  Model1 m1(conf["model1"].as<string>()); -  Model1 invm1(conf["inverse_model1"].as<string>()); - -#if 0 -  PhraseConditionalBase lp0(m1, conf["model1_interpolation_weight"].as<double>(), vocabe.size()); -  MyConditionalModel m(lp0); -#else -  PhraseJointBase lp0(m1, conf["model1_interpolation_weight"].as<double>(), vocabe.size(), vocabf.size()); -  MyJointModel m(lp0); -#endif - -  cerr << "Initializing reachability limits...\n"; -  vector<Particle> ps(corpusf.size()); -  vector<Reachability> reaches; reaches.reserve(corpusf.size()); -  for (int ci = 0; ci < corpusf.size(); ++ci) -    reaches.push_back(Reachability(corpusf[ci].size(), -                                   corpuse[ci].size(), -                                   kMAX_SRC_PHRASE, -                                   kMAX_TRG_PHRASE)); -  cerr << "Sampling...\n";  -  vector<Particle> tmp_p(10000);  // work space -  SampleSet<prob_t> pfss; -  for (int SS=0; SS < samples; ++SS) { -    for (int ci = 0; ci < corpusf.size(); ++ci) { -      vector<int>& src = corpusf[ci]; -      vector<int>& trg = corpuse[ci]; -      m.DecrementRules(ps[ci].rules); -      m.DecrementJumps(ps[ci].src_jumps, src.size()); - -      //BackwardEstimate be(m1, src, trg); -      BackwardEstimateSym be(m1, invm1, src, trg); -      const Reachability& r = reaches[ci]; -      vector<Particle> lps(particles); - -      for (int pi = 0; pi < particles; ++pi) { -        Particle& p = lps[pi]; -        p.src_cv.resize(src.size(), false); -      } - -      bool all_complete = false; -      while(!all_complete) { -        SampleSet<prob_t> ss; - -        // all particles have now been extended a bit, we will reweight them now -        if (lps[0].trg_cov > 0) -          FilterCrapParticlesAndReweight(&lps); - -        // loop over all particles and extend them -        bool done_nothing = true; -        for (int pi = 0; pi < particles; ++pi) { -          Particle& p = lps[pi]; -          int tic = 0; -          const int rejuv_freq = 1; -          while(p.trg_cov < trg.size() && tic < rejuv_freq) { -            ++tic; -            done_nothing = false; -            ss.clear(); -            TRule x; x.lhs_ = kLHS; -            prob_t z; -            int first_uncovered = src.size(); -            int last_uncovered = -1; -            for (int i = 0; i < src.size(); ++i) { -              const bool is_uncovered = !p.src_cv[i]; -              if (i < first_uncovered && is_uncovered) first_uncovered = i; -              if (is_uncovered && i > last_uncovered) last_uncovered = i; -            } -            assert(last_uncovered > -1); -            assert(first_uncovered < src.size()); - -            for (int trg_len = 1; trg_len <= kMAX_TRG_PHRASE; ++trg_len) { -              x.e_.push_back(trg[trg_len - 1 + p.trg_cov]); -              for (int src_len = 1; src_len <= kMAX_SRC_PHRASE; ++src_len) { -                if (!r.edges[p.src_cov][p.trg_cov][src_len][trg_len]) continue; - -                const int last_possible_start = last_uncovered - src_len + 1; -                assert(last_possible_start >= 0); -                //cerr << src_len << "," << trg_len << " is allowed. E=" << TD::GetString(x.e_) << endl; -                //cerr << "  first_uncovered=" << first_uncovered << "  last_possible_start=" << last_possible_start << endl; -                for (int i = first_uncovered; i <= last_possible_start; ++i) { -                  if (p.src_cv[i]) continue; -                  assert(ss.size() < tmp_p.size());  // if fails increase tmp_p size -                  Particle& np = tmp_p[ss.size()]; -                  np = p; -                  x.f_.clear(); -                  int gap_add = 0; -                  bool bad = false; -                  prob_t jp = prob_t::One(); -                  int prev_pos = p.prev_pos; -                  for (int j = 0; j < src_len; ++j) { -                    if ((j + i + gap_add) == src.size()) { bad = true; break; } -                    while ((i+j+gap_add) < src.size() && p.src_cv[i + j + gap_add]) { ++gap_add; } -                    if ((j + i + gap_add) == src.size()) { bad = true; break; } -                    np.src_cv[i + j + gap_add] = true; -                    x.f_.push_back(src[i + j + gap_add]); -                    jp *= m.JumpProbability(i + j + gap_add - prev_pos, src.size()); -                    int jump = i + j + gap_add - prev_pos; -                    assert(jump != 0); -                    np.src_jumps.push_back(jump); -                    prev_pos = i + j + gap_add; -                  } -                  if (bad) continue; -                  np.prev_pos = prev_pos; -                  np.src_cov += x.f_.size(); -                  np.trg_cov += x.e_.size(); -                  if (x.f_.size() != src_len) continue; -                  prob_t rp = m.RuleProbability(x); -                  np.gamma_last = rp * jp; -                  const prob_t u = pow(np.gamma_last * be(np.src_cv, np.trg_cov), 0.2); -                  //cerr << "**rule=" << x << endl; -                  //cerr << "  u=" << log(u) << "  rule=" << rp << " jump=" << jp << endl; -                  ss.add(u); -                  np.rules.push_back(TRulePtr(new TRule(x))); -                  z += u; - -                  const bool completed = (p.trg_cov == trg.size()); -                  if (completed) { -                    int last_jump = src.size() - p.prev_pos; -                    assert(last_jump > 0); -                    p.src_jumps.push_back(last_jump); -                    p.weight *= m.JumpProbability(last_jump, src.size()); -                  } -                } -              } -            } -            cerr << "number of edges to consider: " << ss.size() << endl; -            const int sampled = rng.SelectSample(ss); -            prob_t q_n = ss[sampled] / z; -            p = tmp_p[sampled]; -            //m.IncrementRule(*p.rules.back()); -            p.weight *= p.gamma_last / q_n; -            cerr << "[w=" << log(p.weight) << "]\tsampled rule: " << p.rules.back()->AsString() << endl; -            cerr << p << endl; -          } -        } // loop over particles (pi = 0 .. particles) -        if (done_nothing) all_complete = true; -      } -      pfss.clear(); -      for (int i = 0; i < lps.size(); ++i) -        pfss.add(lps[i].weight); -      const int sampled = rng.SelectSample(pfss); -      ps[ci] = lps[sampled]; -      m.IncrementRules(lps[sampled].rules); -      m.IncrementJumps(lps[sampled].src_jumps, src.size()); -      for (int i = 0; i < lps[sampled].rules.size(); ++i) { cerr << "S:\t" << lps[sampled].rules[i]->AsString() << "\n"; } -      cerr << "tmp-LLH: " << log(m.Likelihood()) << endl; -    } -    cerr << "LLH: " << log(m.Likelihood()) << endl; -    for (int sni = 0; sni < 5; ++sni) { -      for (int i = 0; i < ps[sni].rules.size(); ++i) { cerr << "\t" << ps[sni].rules[i]->AsString() << endl; } -    } -  } -  return 0; -} - diff --git a/gi/pf/pfnaive.cc b/gi/pf/pfnaive.cc deleted file mode 100644 index 958ec4e2..00000000 --- a/gi/pf/pfnaive.cc +++ /dev/null @@ -1,284 +0,0 @@ -#include <iostream> -#include <tr1/memory> -#include <queue> - -#include <boost/functional.hpp> -#include <boost/program_options.hpp> -#include <boost/program_options/variables_map.hpp> - -#include "pf.h" -#include "base_distributions.h" -#include "monotonic_pseg.h" -#include "reachability.h" -#include "viterbi.h" -#include "hg.h" -#include "trule.h" -#include "tdict.h" -#include "filelib.h" -#include "dict.h" -#include "sampler.h" -#include "ccrp_nt.h" -#include "ccrp_onetable.h" -#include "corpus.h" - -using namespace std; -using namespace tr1; -namespace po = boost::program_options; - -boost::shared_ptr<MT19937> prng; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { -  po::options_description opts("Configuration options"); -  opts.add_options() -        ("samples,s",po::value<unsigned>()->default_value(1000),"Number of samples") -        ("particles,p",po::value<unsigned>()->default_value(30),"Number of particles") -        ("filter_frequency,f",po::value<unsigned>()->default_value(5),"Number of time steps between filterings") -        ("input,i",po::value<string>(),"Read parallel data from") -        ("max_src_phrase",po::value<unsigned>()->default_value(5),"Maximum length of source language phrases") -        ("max_trg_phrase",po::value<unsigned>()->default_value(5),"Maximum length of target language phrases") -        ("model1,m",po::value<string>(),"Model 1 parameters (used in base distribution)") -        ("inverse_model1,M",po::value<string>(),"Inverse Model 1 parameters (used in backward estimate)") -        ("model1_interpolation_weight",po::value<double>()->default_value(0.95),"Mixing proportion of model 1 with uniform target distribution") -        ("random_seed,S",po::value<uint32_t>(), "Random seed"); -  po::options_description clo("Command line options"); -  clo.add_options() -        ("config", po::value<string>(), "Configuration file") -        ("help,h", "Print this help message and exit"); -  po::options_description dconfig_options, dcmdline_options; -  dconfig_options.add(opts); -  dcmdline_options.add(opts).add(clo); -   -  po::store(parse_command_line(argc, argv, dcmdline_options), *conf); -  if (conf->count("config")) { -    ifstream config((*conf)["config"].as<string>().c_str()); -    po::store(po::parse_config_file(config, dconfig_options), *conf); -  } -  po::notify(*conf); - -  if (conf->count("help") || (conf->count("input") == 0)) { -    cerr << dcmdline_options << endl; -    exit(1); -  } -} - -struct BackwardEstimateSym { -  BackwardEstimateSym(const Model1& m1, -                      const Model1& invm1, const vector<WordID>& src, const vector<WordID>& trg) : -      model1_(m1), invmodel1_(invm1), src_(src), trg_(trg) { -  } -  const prob_t& operator()(unsigned src_cov, unsigned trg_cov) const { -    assert(src_cov <= src_.size()); -    assert(trg_cov <= trg_.size()); -    prob_t& e = cache_[src_cov][trg_cov]; -    if (e.is_0()) { -      if (trg_cov == trg_.size()) { e = prob_t::One(); return e; } -      vector<WordID> r(src_.size() + 1); r.clear(); -      for (int i = src_cov; i < src_.size(); ++i) -        r.push_back(src_[i]); -      r.push_back(0);  // NULL word -      const prob_t uniform_alignment(1.0 / r.size()); -      e.logeq(Md::log_poisson(trg_.size() - trg_cov, r.size() - 1)); // p(trg len remaining | src len remaining) -      for (unsigned j = trg_cov; j < trg_.size(); ++j) { -        prob_t p; -        for (unsigned i = 0; i < r.size(); ++i) -          p += model1_(r[i], trg_[j]); -        if (p.is_0()) { -          cerr << "ERROR: p(" << TD::Convert(trg_[j]) << " | " << TD::GetString(r) << ") = 0!\n"; -          abort(); -        } -        p *= uniform_alignment; -        e *= p; -      } -      r.pop_back(); -      const prob_t inv_uniform(1.0 / (trg_.size() - trg_cov + 1.0)); -      prob_t inv; -      inv.logeq(Md::log_poisson(r.size(), trg_.size() - trg_cov)); -      for (unsigned i = 0; i < r.size(); ++i) { -        prob_t p; -        for (unsigned j = trg_cov - 1; j < trg_.size(); ++j) -          p += invmodel1_(j < trg_cov ? 0 : trg_[j], r[i]); -        if (p.is_0()) { -          cerr << "ERROR: p_inv(" << TD::Convert(r[i]) << " | " << TD::GetString(trg_) << ") = 0!\n"; -          abort(); -        } -        p *= inv_uniform; -        inv *= p; -      } -      prob_t x = pow(e * inv, 0.5); -      e = x; -      //cerr << "Forward: " << log(e) << "\tBackward: " << log(inv) << "\t prop: " << log(x) << endl; -    } -    return e; -  } -  const Model1& model1_; -  const Model1& invmodel1_; -  const vector<WordID>& src_; -  const vector<WordID>& trg_; -  mutable unordered_map<unsigned, map<unsigned, prob_t> > cache_; -}; - -struct Particle { -  Particle() : weight(prob_t::One()), src_cov(), trg_cov() {} -  prob_t weight; -  prob_t gamma_last; -  vector<TRulePtr> rules; -  int src_cov; -  int trg_cov; -}; - -ostream& operator<<(ostream& o, const vector<bool>& v) { -  for (int i = 0; i < v.size(); ++i) -    o << (v[i] ? '1' : '0'); -  return o; -} -ostream& operator<<(ostream& o, const Particle& p) { -  o << "[src_cov=" << p.src_cov << " trg_cov=" << p.trg_cov << " num_rules=" << p.rules.size() << "  w=" << log(p.weight) << ']'; -  return o; -} - -int main(int argc, char** argv) { -  po::variables_map conf; -  InitCommandLine(argc, argv, &conf); -  const unsigned kMAX_TRG_PHRASE = conf["max_trg_phrase"].as<unsigned>(); -  const unsigned kMAX_SRC_PHRASE = conf["max_src_phrase"].as<unsigned>(); -  const unsigned particles = conf["particles"].as<unsigned>(); -  const unsigned samples = conf["samples"].as<unsigned>(); -  const unsigned rejuv_freq = conf["filter_frequency"].as<unsigned>(); - -  if (!conf.count("model1")) { -    cerr << argv[0] << "Please use --model1 to specify model 1 parameters\n"; -    return 1; -  } -  if (conf.count("random_seed")) -    prng.reset(new MT19937(conf["random_seed"].as<uint32_t>())); -  else -    prng.reset(new MT19937); -  MT19937& rng = *prng; - -  vector<vector<WordID> > corpuse, corpusf; -  set<WordID> vocabe, vocabf; -  cerr << "Reading corpus...\n"; -  corpus::ReadParallelCorpus(conf["input"].as<string>(), &corpusf, &corpuse, &vocabf, &vocabe); -  cerr << "F-corpus size: " << corpusf.size() << " sentences\t (" << vocabf.size() << " word types)\n"; -  cerr << "E-corpus size: " << corpuse.size() << " sentences\t (" << vocabe.size() << " word types)\n"; -  assert(corpusf.size() == corpuse.size()); - -  const int kLHS = -TD::Convert("X"); -  Model1 m1(conf["model1"].as<string>()); -  Model1 invm1(conf["inverse_model1"].as<string>()); - -  PhraseJointBase lp0(m1, conf["model1_interpolation_weight"].as<double>(), vocabe.size(), vocabf.size()); -  PhraseJointBase_BiDir alp0(m1, invm1, conf["model1_interpolation_weight"].as<double>(), vocabe.size(), vocabf.size()); -  MonotonicParallelSegementationModel<PhraseJointBase_BiDir> m(alp0); -  TRule xx("[X] ||| ms. kimura ||| MS. KIMURA ||| X=0"); -  cerr << xx << endl << lp0(xx) << " " << alp0(xx) << endl; -  TRule xx12("[X] ||| . ||| PHARMACY . ||| X=0"); -  TRule xx21("[X] ||| pharmacy . ||| . ||| X=0"); -//  TRule xx22("[X] ||| . ||| . ||| X=0"); -  TRule xx22("[X] ||| . ||| THE . ||| X=0"); -  cerr << xx12 << "\t" << lp0(xx12) << " " << alp0(xx12) << endl; -  cerr << xx21 << "\t" << lp0(xx21) << " " << alp0(xx21) << endl; -  cerr << xx22 << "\t" << lp0(xx22) << " " << alp0(xx22) << endl; - -  cerr << "Initializing reachability limits...\n"; -  vector<Particle> ps(corpusf.size()); -  vector<Reachability> reaches; reaches.reserve(corpusf.size()); -  for (int ci = 0; ci < corpusf.size(); ++ci) -    reaches.push_back(Reachability(corpusf[ci].size(), -                                   corpuse[ci].size(), -                                   kMAX_SRC_PHRASE, -                                   kMAX_TRG_PHRASE)); -  cerr << "Sampling...\n";  -  vector<Particle> tmp_p(10000);  // work space -  SampleSet<prob_t> pfss; -  SystematicResampleFilter<Particle> filter(&rng); -  // MultinomialResampleFilter<Particle> filter(&rng); -  for (int SS=0; SS < samples; ++SS) { -    for (int ci = 0; ci < corpusf.size(); ++ci) { -      vector<int>& src = corpusf[ci]; -      vector<int>& trg = corpuse[ci]; -      m.DecrementRulesAndStops(ps[ci].rules); -      const prob_t q_stop = m.StopProbability(); -      const prob_t q_cont = m.ContinueProbability(); -      cerr << "P(stop)=" << q_stop << "\tP(continue)=" <<q_cont << endl; - -      BackwardEstimateSym be(m1, invm1, src, trg); -      const Reachability& r = reaches[ci]; -      vector<Particle> lps(particles); - -      bool all_complete = false; -      while(!all_complete) { -        SampleSet<prob_t> ss; - -        // all particles have now been extended a bit, we will reweight them now -        if (lps[0].trg_cov > 0) -          filter(&lps); - -        // loop over all particles and extend them -        bool done_nothing = true; -        for (int pi = 0; pi < particles; ++pi) { -          Particle& p = lps[pi]; -          int tic = 0; -          while(p.trg_cov < trg.size() && tic < rejuv_freq) { -            ++tic; -            done_nothing = false; -            ss.clear(); -            TRule x; x.lhs_ = kLHS; -            prob_t z; - -            for (int trg_len = 1; trg_len <= kMAX_TRG_PHRASE; ++trg_len) { -              x.e_.push_back(trg[trg_len - 1 + p.trg_cov]); -              for (int src_len = 1; src_len <= kMAX_SRC_PHRASE; ++src_len) { -                if (!r.edges[p.src_cov][p.trg_cov][src_len][trg_len]) continue; - -                int i = p.src_cov; -                assert(ss.size() < tmp_p.size());  // if fails increase tmp_p size -                Particle& np = tmp_p[ss.size()]; -                np = p; -                x.f_.clear(); -                for (int j = 0; j < src_len; ++j) -                  x.f_.push_back(src[i + j]); -                np.src_cov += x.f_.size(); -                np.trg_cov += x.e_.size(); -                const bool stop_now = (np.src_cov == src_len && np.trg_cov == trg_len); -                prob_t rp = m.RuleProbability(x) * (stop_now ? q_stop : q_cont); -                np.gamma_last = rp; -                const prob_t u = pow(np.gamma_last * pow(be(np.src_cov, np.trg_cov), 1.2), 0.1); -                //cerr << "**rule=" << x << endl; -                //cerr << "  u=" << log(u) << "  rule=" << rp << endl; -                ss.add(u); -                np.rules.push_back(TRulePtr(new TRule(x))); -                z += u; -              } -            } -            //cerr << "number of edges to consider: " << ss.size() << endl; -            const int sampled = rng.SelectSample(ss); -            prob_t q_n = ss[sampled] / z; -            p = tmp_p[sampled]; -            //m.IncrementRule(*p.rules.back()); -            p.weight *= p.gamma_last / q_n; -            //cerr << "[w=" << log(p.weight) << "]\tsampled rule: " << p.rules.back()->AsString() << endl; -            //cerr << p << endl; -          } -        } // loop over particles (pi = 0 .. particles) -        if (done_nothing) all_complete = true; -        prob_t wv = prob_t::Zero(); -        for (int pp = 0; pp < lps.size(); ++pp) -          wv += lps[pp].weight; -        for (int pp = 0; pp < lps.size(); ++pp) -          lps[pp].weight /= wv; -      } -      pfss.clear(); -      for (int i = 0; i < lps.size(); ++i) -        pfss.add(lps[i].weight); -      const int sampled = rng.SelectSample(pfss); -      ps[ci] = lps[sampled]; -      m.IncrementRulesAndStops(lps[sampled].rules); -      for (int i = 0; i < lps[sampled].rules.size(); ++i) { cerr << "S:\t" << lps[sampled].rules[i]->AsString() << "\n"; } -      cerr << "tmp-LLH: " << log(m.Likelihood()) << endl; -    } -    cerr << "LLH: " << log(m.Likelihood()) << endl; -  } -  return 0; -} - diff --git a/gi/pf/poisson_uniform_word_model.h b/gi/pf/poisson_uniform_word_model.h deleted file mode 100644 index 76204a0e..00000000 --- a/gi/pf/poisson_uniform_word_model.h +++ /dev/null @@ -1,50 +0,0 @@ -#ifndef _POISSON_UNIFORM_WORD_MODEL_H_ -#define _POISSON_UNIFORM_WORD_MODEL_H_ - -#include <cmath> -#include <vector> -#include "prob.h" -#include "m.h" - -// len ~ Poisson(lambda) -//   for (1..len) -//     e_i ~ Uniform({Vocabulary}) -struct PoissonUniformWordModel { -  explicit PoissonUniformWordModel(const unsigned vocab_size, -                                   const unsigned alphabet_size, -                                   const double mean_len = 5) : -    lh(prob_t::One()), -    v0(-std::log(vocab_size)), -    u0(-std::log(alphabet_size)), -    mean_length(mean_len) {} - -  void ResampleHyperparameters(MT19937*) {} - -  inline prob_t operator()(const std::vector<WordID>& s) const { -    prob_t p; -    p.logeq(Md::log_poisson(s.size(), mean_length) + s.size() * u0); -    //p.logeq(v0); -    return p; -  } - -  inline void Increment(const std::vector<WordID>& w, MT19937*) { -    lh *= (*this)(w); -  } - -  inline void Decrement(const std::vector<WordID>& w, MT19937 *) { -    lh /= (*this)(w); -  } - -  inline prob_t Likelihood() const { return lh; } - -  void Summary() const {} - - private: - -  prob_t lh;  // keeps track of the draws from the base distribution -  const double v0;  // uniform log prob of generating a word -  const double u0;  // uniform log prob of generating a letter -  const double mean_length;  // mean length of a word in the base distribution -}; - -#endif diff --git a/gi/pf/pyp_lm.cc b/gi/pf/pyp_lm.cc deleted file mode 100644 index 605d8206..00000000 --- a/gi/pf/pyp_lm.cc +++ /dev/null @@ -1,273 +0,0 @@ -#include <iostream> -#include <tr1/memory> -#include <queue> - -#include <boost/functional.hpp> -#include <boost/program_options.hpp> -#include <boost/program_options/variables_map.hpp> - -#include "gamma_poisson.h" -#include "corpus_tools.h" -#include "m.h" -#include "tdict.h" -#include "sampler.h" -#include "ccrp.h" -#include "tied_resampler.h" - -// A not very memory-efficient implementation of an N-gram LM based on PYPs -// as described in Y.-W. Teh. (2006) A Hierarchical Bayesian Language Model -// based on Pitman-Yor Processes. In Proc. ACL. - -// I use templates to handle the recursive formalation of the prior, so -// the order of the model has to be specified here, at compile time: -#define kORDER 3 - -using namespace std; -using namespace tr1; -namespace po = boost::program_options; - -boost::shared_ptr<MT19937> prng; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { -  po::options_description opts("Configuration options"); -  opts.add_options() -        ("samples,n",po::value<unsigned>()->default_value(300),"Number of samples") -        ("train,i",po::value<string>(),"Training data file") -        ("test,T",po::value<string>(),"Test data file") -        ("discount_prior_a,a",po::value<double>()->default_value(1.0), "discount ~ Beta(a,b): a=this") -        ("discount_prior_b,b",po::value<double>()->default_value(1.0), "discount ~ Beta(a,b): b=this") -        ("strength_prior_s,s",po::value<double>()->default_value(1.0), "strength ~ Gamma(s,r): s=this") -        ("strength_prior_r,r",po::value<double>()->default_value(1.0), "strength ~ Gamma(s,r): r=this") -        ("random_seed,S",po::value<uint32_t>(), "Random seed"); -  po::options_description clo("Command line options"); -  clo.add_options() -        ("config", po::value<string>(), "Configuration file") -        ("help", "Print this help message and exit"); -  po::options_description dconfig_options, dcmdline_options; -  dconfig_options.add(opts); -  dcmdline_options.add(opts).add(clo); -   -  po::store(parse_command_line(argc, argv, dcmdline_options), *conf); -  if (conf->count("config")) { -    ifstream config((*conf)["config"].as<string>().c_str()); -    po::store(po::parse_config_file(config, dconfig_options), *conf); -  } -  po::notify(*conf); - -  if (conf->count("help") || (conf->count("train") == 0)) { -    cerr << dcmdline_options << endl; -    exit(1); -  } -} - -// uniform distribution over a fixed vocabulary -struct UniformVocabulary { -  UniformVocabulary(unsigned vs, double, double, double, double) : p0(1.0 / vs), draws() {} -  void increment(WordID, const vector<WordID>&, MT19937*) { ++draws; } -  void decrement(WordID, const vector<WordID>&, MT19937*) { --draws; assert(draws >= 0); } -  double prob(WordID, const vector<WordID>&) const { return p0; } -  void resample_hyperparameters(MT19937*) {} -  double log_likelihood() const { return draws * log(p0); } -  const double p0; -  int draws; -}; - -// Lord Rothschild. 1986. THE DISTRIBUTION OF ENGLISH DICTIONARY WORD LENGTHS. -// Journal of Statistical Planning and Inference 14 (1986) 311-322 -struct PoissonLengthUniformCharWordModel { -  explicit PoissonLengthUniformCharWordModel(unsigned vocab_size, double, double, double, double) : plen(5,5), uc(-log(95)), llh() {} -  void increment(WordID w, const vector<WordID>& v, MT19937*) { -    llh += log(prob(w, v)); // this isn't quite right -    plen.increment(TD::Convert(w).size() - 1); -  } -  void decrement(WordID w, const vector<WordID>& v, MT19937*) { -    plen.decrement(TD::Convert(w).size() - 1); -    llh -= log(prob(w, v)); // this isn't quite right -  } -  double prob(WordID w, const vector<WordID>&) const { -    const unsigned len = TD::Convert(w).size(); -    return plen.prob(len - 1) * exp(uc * len); -  } -  double log_likelihood() const { return llh; } -  void resample_hyperparameters(MT19937*) {} -  GammaPoisson plen; -  const double uc; -  double llh; -}; - -struct PYPAdaptedPoissonLengthUniformCharWordModel { -  explicit PYPAdaptedPoissonLengthUniformCharWordModel(unsigned vocab_size, double, double, double, double) : -    base(vocab_size,1,1,1,1), -    crp(1,1,1,1) {} -  void increment(WordID w, const vector<WordID>& v, MT19937* rng) { -    double p0 = base.prob(w, v); -    if (crp.increment(w, p0, rng)) -      base.increment(w, v, rng); -  } -  void decrement(WordID w, const vector<WordID>& v, MT19937* rng) { -    if (crp.decrement(w, rng)) -      base.decrement(w, v, rng); -  } -  double prob(WordID w, const vector<WordID>& v) const { -    double p0 = base.prob(w, v); -    return crp.prob(w, p0); -  } -  double log_likelihood() const { return crp.log_crp_prob() + base.log_likelihood(); } -  void resample_hyperparameters(MT19937* rng) { crp.resample_hyperparameters(rng); } -  PoissonLengthUniformCharWordModel base; -  CCRP<WordID> crp; -}; - -template <unsigned N> struct PYPLM; - -#if 1 -template<> struct PYPLM<0> : public UniformVocabulary { -  PYPLM(unsigned vs, double a, double b, double c, double d) : -    UniformVocabulary(vs, a, b, c, d) {} -}; -#else -#if 0 -template<> struct PYPLM<0> : public PoissonLengthUniformCharWordModel { -  PYPLM(unsigned vs, double a, double b, double c, double d) : -    PoissonLengthUniformCharWordModel(vs, a, b, c, d) {} -}; -#else -template<> struct PYPLM<0> : public PYPAdaptedPoissonLengthUniformCharWordModel { -  PYPLM(unsigned vs, double a, double b, double c, double d) : -    PYPAdaptedPoissonLengthUniformCharWordModel(vs, a, b, c, d) {} -}; -#endif -#endif - -// represents an N-gram LM -template <unsigned N> struct PYPLM { -  PYPLM(unsigned vs, double da, double db, double ss, double sr) : -      backoff(vs, da, db, ss, sr), -      tr(da, db, ss, sr, 0.8, 1.0), -      lookup(N-1) {} -  void increment(WordID w, const vector<WordID>& context, MT19937* rng) { -    const double bo = backoff.prob(w, context); -    for (unsigned i = 0; i < N-1; ++i) -      lookup[i] = context[context.size() - 1 - i]; -    typename unordered_map<vector<WordID>, CCRP<WordID>, boost::hash<vector<WordID> > >::iterator it = p.find(lookup); -    if (it == p.end()) { -      it = p.insert(make_pair(lookup, CCRP<WordID>(0.5,1))).first; -      tr.Add(&it->second);  // add to resampler -    } -    if (it->second.increment(w, bo, rng)) -      backoff.increment(w, context, rng); -  } -  void decrement(WordID w, const vector<WordID>& context, MT19937* rng) { -    for (unsigned i = 0; i < N-1; ++i) -      lookup[i] = context[context.size() - 1 - i]; -    typename unordered_map<vector<WordID>, CCRP<WordID>, boost::hash<vector<WordID> > >::iterator it = p.find(lookup); -    assert(it != p.end()); -    if (it->second.decrement(w, rng)) -      backoff.decrement(w, context, rng); -  } -  double prob(WordID w, const vector<WordID>& context) const { -    const double bo = backoff.prob(w, context); -    for (unsigned i = 0; i < N-1; ++i) -      lookup[i] = context[context.size() - 1 - i]; -    typename unordered_map<vector<WordID>, CCRP<WordID>, boost::hash<vector<WordID> > >::const_iterator it = p.find(lookup); -    if (it == p.end()) return bo; -    return it->second.prob(w, bo); -  } - -  double log_likelihood() const { -    double llh = backoff.log_likelihood(); -    typename unordered_map<vector<WordID>, CCRP<WordID>, boost::hash<vector<WordID> > >::const_iterator it; -    for (it = p.begin(); it != p.end(); ++it) -      llh += it->second.log_crp_prob(); -    llh += tr.LogLikelihood(); -    return llh; -  } - -  void resample_hyperparameters(MT19937* rng) { -    tr.ResampleHyperparameters(rng); -    backoff.resample_hyperparameters(rng); -  } - -  PYPLM<N-1> backoff; -  TiedResampler<CCRP<WordID> > tr; -  double discount_a, discount_b, strength_s, strength_r; -  double d, strength; -  mutable vector<WordID> lookup;  // thread-local -  unordered_map<vector<WordID>, CCRP<WordID>, boost::hash<vector<WordID> > > p; -}; - -int main(int argc, char** argv) { -  po::variables_map conf; - -  InitCommandLine(argc, argv, &conf); -  const unsigned samples = conf["samples"].as<unsigned>(); -  if (conf.count("random_seed")) -    prng.reset(new MT19937(conf["random_seed"].as<uint32_t>())); -  else -    prng.reset(new MT19937); -  MT19937& rng = *prng; -  vector<vector<WordID> > corpuse; -  set<WordID> vocabe; -  const WordID kEOS = TD::Convert("</s>"); -  cerr << "Reading corpus...\n"; -  CorpusTools::ReadFromFile(conf["train"].as<string>(), &corpuse, &vocabe); -  cerr << "E-corpus size: " << corpuse.size() << " sentences\t (" << vocabe.size() << " word types)\n"; -  vector<vector<WordID> > test; -  if (conf.count("test")) -    CorpusTools::ReadFromFile(conf["test"].as<string>(), &test); -  else -    test = corpuse; -  PYPLM<kORDER> lm(vocabe.size(), -                   conf["discount_prior_a"].as<double>(), -                   conf["discount_prior_b"].as<double>(), -                   conf["strength_prior_s"].as<double>(), -                   conf["strength_prior_r"].as<double>()); -  vector<WordID> ctx(kORDER - 1, TD::Convert("<s>")); -  for (int SS=0; SS < samples; ++SS) { -    for (int ci = 0; ci < corpuse.size(); ++ci) { -      ctx.resize(kORDER - 1); -      const vector<WordID>& s = corpuse[ci]; -      for (int i = 0; i <= s.size(); ++i) { -        WordID w = (i < s.size() ? s[i] : kEOS); -        if (SS > 0) lm.decrement(w, ctx, &rng); -        lm.increment(w, ctx, &rng); -        ctx.push_back(w); -      } -    } -    if (SS % 10 == 9) { -      cerr << " [LLH=" << lm.log_likelihood() << "]" << endl; -      if (SS % 30 == 29) lm.resample_hyperparameters(&rng); -    } else { cerr << '.' << flush; } -  } -  double llh = 0; -  unsigned cnt = 0; -  unsigned oovs = 0; -  for (int ci = 0; ci < test.size(); ++ci) { -    ctx.resize(kORDER - 1); -    const vector<WordID>& s = test[ci]; -    for (int i = 0; i <= s.size(); ++i) { -      WordID w = (i < s.size() ? s[i] : kEOS); -      double lp = log(lm.prob(w, ctx)) / log(2); -      if (i < s.size() && vocabe.count(w) == 0) { -        cerr << "**OOV "; -        ++oovs; -        lp = 0; -      } -      cerr << "p(" << TD::Convert(w) << " |"; -      for (int j = ctx.size() + 1 - kORDER; j < ctx.size(); ++j) -        cerr << ' ' << TD::Convert(ctx[j]); -      cerr << ") = " << lp << endl; -      ctx.push_back(w); -      llh -= lp; -      cnt++; -    } -  } -  cerr << "  Log_10 prob: " << (-llh * log(2) / log(10)) << endl; -  cerr << "        Count: " << cnt << endl; -  cerr << "         OOVs: " << oovs << endl; -  cerr << "Cross-entropy: " << (llh / cnt) << endl; -  cerr << "   Perplexity: " << pow(2, llh / cnt) << endl; -  return 0; -} - - diff --git a/gi/pf/pyp_tm.cc b/gi/pf/pyp_tm.cc deleted file mode 100644 index 37b9a604..00000000 --- a/gi/pf/pyp_tm.cc +++ /dev/null @@ -1,128 +0,0 @@ -#include "pyp_tm.h" - -#include <tr1/unordered_map> -#include <iostream> -#include <queue> - -#include "tdict.h" -#include "ccrp.h" -#include "pyp_word_model.h" -#include "tied_resampler.h" - -using namespace std; -using namespace std::tr1; - -struct FreqBinner { -  FreqBinner(const std::string& fname) { fd_.Load(fname); } -  unsigned NumberOfBins() const { return fd_.Max() + 1; } -  unsigned Bin(const WordID& w) const { return fd_.LookUp(w); } -  FreqDict<unsigned> fd_; -}; - -template <typename Base, class Binner = FreqBinner> -struct ConditionalPYPWordModel { -  ConditionalPYPWordModel(Base* b, const Binner* bnr = NULL) : -      base(*b), -      binner(bnr), -      btr(binner ? binner->NumberOfBins() + 1u : 2u) {} - -  void Summary() const { -    cerr << "Number of conditioning contexts: " << r.size() << endl; -    for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) { -      cerr << TD::Convert(it->first) << "   \tPYP(d=" << it->second.discount() << ",s=" << it->second.strength() << ") --------------------------" << endl; -      for (CCRP<vector<WordID> >::const_iterator i2 = it->second.begin(); i2 != it->second.end(); ++i2) -        cerr << "   " << i2->second << '\t' << TD::GetString(i2->first) << endl; -    } -  } - -  void ResampleHyperparameters(MT19937* rng) { -    btr.ResampleHyperparameters(rng); -  }  - -  prob_t Prob(const WordID src, const vector<WordID>& trglets) const { -    RuleModelHash::const_iterator it = r.find(src); -    if (it == r.end()) { -      return base(trglets); -    } else { -      return it->second.prob(trglets, base(trglets)); -    } -  } - -  void Increment(const WordID src, const vector<WordID>& trglets, MT19937* rng) { -    RuleModelHash::iterator it = r.find(src); -    if (it == r.end()) { -      it = r.insert(make_pair(src, CCRP<vector<WordID> >(0.5,1.0))).first; -      static const WordID kNULL = TD::Convert("NULL"); -      unsigned bin = (src == kNULL ? 0 : 1); -      if (binner && bin) { bin = binner->Bin(src) + 1; } -      btr.Add(bin, &it->second); -    } -    if (it->second.increment(trglets, base(trglets), rng)) -      base.Increment(trglets, rng); -  } - -  void Decrement(const WordID src, const vector<WordID>& trglets, MT19937* rng) { -    RuleModelHash::iterator it = r.find(src); -    assert(it != r.end()); -    if (it->second.decrement(trglets, rng)) { -      base.Decrement(trglets, rng); -    } -  } - -  prob_t Likelihood() const { -    prob_t p = prob_t::One(); -    for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) { -      prob_t q; q.logeq(it->second.log_crp_prob()); -      p *= q; -    } -    return p; -  } - -  unsigned UniqueConditioningContexts() const { -    return r.size(); -  } - -  // TODO tie PYP hyperparameters based on source word frequency bins -  Base& base; -  const Binner* binner; -  BinTiedResampler<CCRP<vector<WordID> > > btr; -  typedef unordered_map<WordID, CCRP<vector<WordID> > > RuleModelHash; -  RuleModelHash r; -}; - -PYPLexicalTranslation::PYPLexicalTranslation(const vector<vector<WordID> >& lets, -                                             const unsigned vocab_size, -                                             const unsigned num_letters) : -    letters(lets), -    base(vocab_size, num_letters, 5), -    tmodel(new ConditionalPYPWordModel<PoissonUniformWordModel>(&base, new FreqBinner("10k.freq"))), -    kX(-TD::Convert("X")) {} - -void PYPLexicalTranslation::Summary() const { -  tmodel->Summary(); -} - -prob_t PYPLexicalTranslation::Likelihood() const { -  return tmodel->Likelihood() * base.Likelihood(); -} - -void PYPLexicalTranslation::ResampleHyperparameters(MT19937* rng) { -  tmodel->ResampleHyperparameters(rng); -} - -unsigned PYPLexicalTranslation::UniqueConditioningContexts() const { -  return tmodel->UniqueConditioningContexts(); -} - -prob_t PYPLexicalTranslation::Prob(WordID src, WordID trg) const { -  return tmodel->Prob(src, letters[trg]); -} - -void PYPLexicalTranslation::Increment(WordID src, WordID trg, MT19937* rng) { -  tmodel->Increment(src, letters[trg], rng); -} - -void PYPLexicalTranslation::Decrement(WordID src, WordID trg, MT19937* rng) { -  tmodel->Decrement(src, letters[trg], rng); -} - diff --git a/gi/pf/pyp_tm.h b/gi/pf/pyp_tm.h deleted file mode 100644 index 2b076a25..00000000 --- a/gi/pf/pyp_tm.h +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef PYP_LEX_TRANS -#define PYP_LEX_TRANS - -#include <vector> -#include "wordid.h" -#include "prob.h" -#include "sampler.h" -#include "freqdict.h" -#include "poisson_uniform_word_model.h" - -struct FreqBinner; -template <typename T, class B> struct ConditionalPYPWordModel; - -struct PYPLexicalTranslation { -  explicit PYPLexicalTranslation(const std::vector<std::vector<WordID> >& lets, -                                 const unsigned vocab_size, -                                 const unsigned num_letters); - -  prob_t Likelihood() const; - -  void ResampleHyperparameters(MT19937* rng); -  prob_t Prob(WordID src, WordID trg) const;  // return p(trg | src) -  void Summary() const; -  void Increment(WordID src, WordID trg, MT19937* rng); -  void Decrement(WordID src, WordID trg, MT19937* rng); -  unsigned UniqueConditioningContexts() const; - - private: -  const std::vector<std::vector<WordID> >& letters;   // spelling dictionary -  PoissonUniformWordModel base;  // "generator" of English types -  ConditionalPYPWordModel<PoissonUniformWordModel, FreqBinner>* tmodel;  // translation distributions -                      // (model English word | French word) -  const WordID kX; -}; - -#endif diff --git a/gi/pf/pyp_word_model.h b/gi/pf/pyp_word_model.h deleted file mode 100644 index 0bebb751..00000000 --- a/gi/pf/pyp_word_model.h +++ /dev/null @@ -1,61 +0,0 @@ -#ifndef _PYP_WORD_MODEL_H_ -#define _PYP_WORD_MODEL_H_ - -#include <iostream> -#include <cmath> -#include <vector> -#include "prob.h" -#include "ccrp.h" -#include "m.h" -#include "tdict.h" -#include "os_phrase.h" - -// PYP(d,s,poisson-uniform) represented as a CRP -template <class Base> -struct PYPWordModel { -  explicit PYPWordModel(Base* b) : -      base(*b), -      r(1,1,1,1,0.66,50.0) -    {} - -  void ResampleHyperparameters(MT19937* rng) { -    r.resample_hyperparameters(rng); -    std::cerr << " PYPWordModel(d=" << r.discount() << ",s=" << r.strength() << ")\n"; -  } - -  inline prob_t operator()(const std::vector<WordID>& s) const { -    return r.prob(s, base(s)); -  } - -  inline void Increment(const std::vector<WordID>& s, MT19937* rng) { -    if (r.increment(s, base(s), rng)) -      base.Increment(s, rng); -  } - -  inline void Decrement(const std::vector<WordID>& s, MT19937 *rng) { -    if (r.decrement(s, rng)) -      base.Decrement(s, rng); -  } - -  inline prob_t Likelihood() const { -    prob_t p; p.logeq(r.log_crp_prob()); -    p *= base.Likelihood(); -    return p; -  } - -  void Summary() const { -    std::cerr << "PYPWordModel: generations=" << r.num_customers() -         << " PYP(d=" << r.discount() << ",s=" << r.strength() << ')' << std::endl; -    for (typename CCRP<std::vector<WordID> >::const_iterator it = r.begin(); it != r.end(); ++it) { -      std::cerr << "  " << it->second -                << TD::GetString(it->first) << std::endl; -    } -  } - - private: - -  Base& base;  // keeps track of the draws from the base distribution -  CCRP<std::vector<WordID> > r; -}; - -#endif diff --git a/gi/pf/quasi_model2.h b/gi/pf/quasi_model2.h deleted file mode 100644 index 4075affe..00000000 --- a/gi/pf/quasi_model2.h +++ /dev/null @@ -1,177 +0,0 @@ -#ifndef _QUASI_MODEL2_H_ -#define _QUASI_MODEL2_H_ - -#include <vector> -#include <cmath> -#include <tr1/unordered_map> -#include "boost/functional.hpp" -#include "prob.h" -#include "array2d.h" -#include "slice_sampler.h" -#include "m.h" -#include "have_64_bits.h" - -struct AlignmentObservation { -  AlignmentObservation() : src_len(), trg_len(), j(), a_j() {} -  AlignmentObservation(unsigned sl, unsigned tl, unsigned tw, unsigned sw) : -      src_len(sl), trg_len(tl), j(tw), a_j(sw) {} -  unsigned short src_len; -  unsigned short trg_len; -  unsigned short j; -  unsigned short a_j; -}; - -#ifdef HAVE_64_BITS -inline size_t hash_value(const AlignmentObservation& o) { -  return reinterpret_cast<const size_t&>(o); -} -inline bool operator==(const AlignmentObservation& a, const AlignmentObservation& b) { -  return hash_value(a) == hash_value(b); -} -#else -inline size_t hash_value(const AlignmentObservation& o) { -  size_t h = 1; -  boost::hash_combine(h, o.src_len); -  boost::hash_combine(h, o.trg_len); -  boost::hash_combine(h, o.j); -  boost::hash_combine(h, o.a_j); -  return h; -} -#endif - -struct QuasiModel2 { -  explicit QuasiModel2(double alpha, double pnull = 0.1) : -      alpha_(alpha), -      pnull_(pnull), -      pnotnull_(1 - pnull) {} - -  // a_j = 0 => NULL; src_len does *not* include null -  prob_t Prob(unsigned a_j, unsigned j, unsigned src_len, unsigned trg_len) const { -    if (!a_j) return pnull_; -    return pnotnull_ * -       prob_t(UnnormalizedProb(a_j, j, src_len, trg_len, alpha_) / GetOrComputeZ(j, src_len, trg_len)); -  } - -  void Increment(unsigned a_j, unsigned j, unsigned src_len, unsigned trg_len) { -    assert(a_j <= src_len); -    assert(j < trg_len); -    ++obs_[AlignmentObservation(src_len, trg_len, j, a_j)]; -  } - -  void Decrement(unsigned a_j, unsigned j, unsigned src_len, unsigned trg_len) { -    const AlignmentObservation ao(src_len, trg_len, j, a_j); -    int &cc = obs_[ao]; -    assert(cc > 0); -    --cc; -    if (!cc) obs_.erase(ao); -  } - -  struct PNullResampler { -    PNullResampler(const QuasiModel2& m) : m_(m) {} -    const QuasiModel2& m_; -    double operator()(const double& proposed_pnull) const { -      return log(m_.Likelihood(m_.alpha_, proposed_pnull)); -    } -  }; - -  struct AlphaResampler { -    AlphaResampler(const QuasiModel2& m) : m_(m) {} -    const QuasiModel2& m_; -    double operator()(const double& proposed_alpha) const { -      return log(m_.Likelihood(proposed_alpha, m_.pnull_.as_float())); -    } -  }; - -  void ResampleHyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) { -    const PNullResampler dr(*this); -    const AlphaResampler ar(*this); -    for (unsigned i = 0; i < nloop; ++i) { -      double pnull = slice_sampler1d(dr, pnull_.as_float(), *rng, 0.00000001, -                            1.0, 0.0, niterations, 100*niterations); -      pnull_ = prob_t(pnull); -      alpha_ = slice_sampler1d(ar, alpha_, *rng, 0.00000001, -                              std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations); -    } -    std::cerr << "QuasiModel2(alpha=" << alpha_ << ",p_null=" -              << pnull_.as_float() << ") = " << Likelihood() << std::endl; -    zcache_.clear(); -  } - -  prob_t Likelihood() const { -    return Likelihood(alpha_, pnull_.as_float()); -  } - -  prob_t Likelihood(double alpha, double ppnull) const { -    const prob_t pnull(ppnull); -    const prob_t pnotnull(1 - ppnull); - -    prob_t p; -    p.logeq(Md::log_gamma_density(alpha, 0.1, 25));  // TODO configure -    assert(!p.is_0()); -    prob_t prob_of_ppnull; prob_of_ppnull.logeq(Md::log_beta_density(ppnull, 2, 10)); -    assert(!prob_of_ppnull.is_0()); -    p *= prob_of_ppnull; -    for (ObsCount::const_iterator it = obs_.begin(); it != obs_.end(); ++it) { -      const AlignmentObservation& ao = it->first; -      if (ao.a_j) { -        prob_t u = XUnnormalizedProb(ao.a_j, ao.j, ao.src_len, ao.trg_len, alpha); -        prob_t z = XComputeZ(ao.j, ao.src_len, ao.trg_len, alpha); -        prob_t pa(u / z); -        pa *= pnotnull; -        pa.poweq(it->second); -        p *= pa; -      } else { -        p *= pnull.pow(it->second); -      } -    } -    return p; -  } - - private: -  static prob_t XUnnormalizedProb(unsigned a_j, unsigned j, unsigned src_len, unsigned trg_len, double alpha) { -    prob_t p; -    p.logeq(-fabs(double(a_j - 1) / src_len - double(j) / trg_len) * alpha); -    return p; -  } - -  static prob_t XComputeZ(unsigned j, unsigned src_len, unsigned trg_len, double alpha) { -    prob_t z = prob_t::Zero(); -    for (int a_j = 1; a_j <= src_len; ++a_j) -      z += XUnnormalizedProb(a_j, j, src_len, trg_len, alpha); -    return z; -  } - -  static double UnnormalizedProb(unsigned a_j, unsigned j, unsigned src_len, unsigned trg_len, double alpha) { -    return exp(-fabs(double(a_j - 1) / src_len - double(j) / trg_len) * alpha); -  } - -  static double ComputeZ(unsigned j, unsigned src_len, unsigned trg_len, double alpha) { -    double z = 0; -    for (int a_j = 1; a_j <= src_len; ++a_j) -      z += UnnormalizedProb(a_j, j, src_len, trg_len, alpha); -    return z; -  } - -  const double& GetOrComputeZ(unsigned j, unsigned src_len, unsigned trg_len) const { -    if (src_len >= zcache_.size()) -      zcache_.resize(src_len + 1); -    if (trg_len >= zcache_[src_len].size()) -      zcache_[src_len].resize(trg_len + 1); -    std::vector<double>& zv = zcache_[src_len][trg_len]; -    if (zv.size() == 0) -      zv.resize(trg_len); -    double& z = zv[j]; -    if (!z) -      z = ComputeZ(j, src_len, trg_len, alpha_); -    return z; -  } - -  double alpha_; -  prob_t pnull_; -  prob_t pnotnull_; -  mutable std::vector<std::vector<std::vector<double> > > zcache_; -  typedef std::tr1::unordered_map<AlignmentObservation, int, boost::hash<AlignmentObservation> > ObsCount; -  ObsCount obs_; -}; - -#endif diff --git a/gi/pf/reachability.cc b/gi/pf/reachability.cc deleted file mode 100644 index 7d0d04ac..00000000 --- a/gi/pf/reachability.cc +++ /dev/null @@ -1,74 +0,0 @@ -#include "reachability.h" - -#include <vector> -#include <iostream> - -using namespace std; - -struct SState { -  SState() : prev_src_covered(), prev_trg_covered() {} -  SState(int i, int j) : prev_src_covered(i), prev_trg_covered(j) {} -  int prev_src_covered; -  int prev_trg_covered; -}; - -void Reachability::ComputeReachability(int srclen, int trglen, int src_max_phrase_len, int trg_max_phrase_len) { -    typedef boost::multi_array<vector<SState>, 2> array_type; -    array_type a(boost::extents[srclen + 1][trglen + 1]); -    a[0][0].push_back(SState()); -    for (int i = 0; i < srclen; ++i) { -      for (int j = 0; j < trglen; ++j) { -        if (a[i][j].size() == 0) continue; -        const SState prev(i,j); -        for (int k = 1; k <= src_max_phrase_len; ++k) { -          if ((i + k) > srclen) continue; -          for (int l = 1; l <= trg_max_phrase_len; ++l) { -            if ((j + l) > trglen) continue; -            a[i + k][j + l].push_back(prev); -          } -        } -      } -    } -    a[0][0].clear(); -    //cerr << srclen << "," << trglen << ": Final cell contains " << a[srclen][trglen].size() << " back pointers\n"; -    if (a[srclen][trglen].empty()) { -      cerr << "Sequence pair with lengths (" << srclen << ',' << trglen << ") violates reachability constraints\n"; -      nodes = 0; -      return; -    } - -    typedef boost::multi_array<bool, 2> rarray_type; -    rarray_type r(boost::extents[srclen + 1][trglen + 1]); -    r[srclen][trglen] = true; -    nodes = 0; -    for (int i = srclen; i >= 0; --i) { -      for (int j = trglen; j >= 0; --j) { -        vector<SState>& prevs = a[i][j]; -        if (!r[i][j]) { prevs.clear(); } -        for (int k = 0; k < prevs.size(); ++k) { -          r[prevs[k].prev_src_covered][prevs[k].prev_trg_covered] = true; -          int src_delta = i - prevs[k].prev_src_covered; -          edges[prevs[k].prev_src_covered][prevs[k].prev_trg_covered][src_delta][j - prevs[k].prev_trg_covered] = true; -          valid_deltas[prevs[k].prev_src_covered][prevs[k].prev_trg_covered].push_back(make_pair<short,short>(src_delta,j - prevs[k].prev_trg_covered)); -          short &msd = max_src_delta[prevs[k].prev_src_covered][prevs[k].prev_trg_covered]; -          if (src_delta > msd) msd = src_delta; -        } -      } -    } -    assert(!edges[0][0][1][0]); -    assert(!edges[0][0][0][1]); -    assert(!edges[0][0][0][0]); -    assert(max_src_delta[0][0] > 0); -    nodes = 0; -    for (int i = 0; i < srclen; ++i) { -      for (int j = 0; j < trglen; ++j) { -        if (valid_deltas[i][j].size() > 0) { -          node_addresses[i][j] = nodes++; -        } else { -          node_addresses[i][j] = -1; -        } -      } -    } -    cerr << "Sequence pair with lengths (" << srclen << ',' << trglen << ") has " << valid_deltas[0][0].size() << " out edges in its root node, " << nodes << " nodes in total, and outside estimate matrix will require " << sizeof(float)*nodes << " bytes\n"; -  } - diff --git a/gi/pf/reachability.h b/gi/pf/reachability.h deleted file mode 100644 index 1e22c76a..00000000 --- a/gi/pf/reachability.h +++ /dev/null @@ -1,34 +0,0 @@ -#ifndef _REACHABILITY_H_ -#define _REACHABILITY_H_ - -#include "boost/multi_array.hpp" - -// determines minimum and maximum lengths of outgoing edges from all -// coverage positions such that the alignment path respects src and -// trg maximum phrase sizes -// -// runs in O(n^2 * src_max * trg_max) time but should be relatively fast -// -// currently forbids 0 -> n and n -> 0 alignments - -struct Reachability { -  unsigned nodes; -  boost::multi_array<bool, 4> edges;  // edges[src_covered][trg_covered][src_delta][trg_delta] is this edge worth exploring? -  boost::multi_array<short, 2> max_src_delta; // msd[src_covered][trg_covered] -- the largest src delta that's valid -  boost::multi_array<short, 2> node_addresses; // na[src_covered][trg_covered] -- the index of the node in a one-dimensional array (of size "nodes") -  boost::multi_array<std::vector<std::pair<short,short> >, 2> valid_deltas; // valid_deltas[src_covered][trg_covered] list of valid transitions leaving a particular node - -  Reachability(int srclen, int trglen, int src_max_phrase_len, int trg_max_phrase_len) : -      nodes(), -      edges(boost::extents[srclen][trglen][src_max_phrase_len+1][trg_max_phrase_len+1]), -      max_src_delta(boost::extents[srclen][trglen]), -      node_addresses(boost::extents[srclen][trglen]), -      valid_deltas(boost::extents[srclen][trglen]) { -    ComputeReachability(srclen, trglen, src_max_phrase_len, trg_max_phrase_len); -  } - - private: -  void ComputeReachability(int srclen, int trglen, int src_max_phrase_len, int trg_max_phrase_len); -}; - -#endif diff --git a/gi/pf/tied_resampler.h b/gi/pf/tied_resampler.h deleted file mode 100644 index a4f4af36..00000000 --- a/gi/pf/tied_resampler.h +++ /dev/null @@ -1,122 +0,0 @@ -#ifndef _TIED_RESAMPLER_H_ -#define _TIED_RESAMPLER_H_ - -#include <set> -#include <vector> -#include "sampler.h" -#include "slice_sampler.h" -#include "m.h" - -template <class CRP> -struct TiedResampler { -  explicit TiedResampler(double da, double db, double ss, double sr, double d=0.5, double s=1.0) : -      d_alpha(da), -      d_beta(db), -      s_shape(ss), -      s_rate(sr), -      discount(d), -      strength(s) {} - -  void Add(CRP* crp) { -    crps.insert(crp); -    crp->set_discount(discount); -    crp->set_strength(strength); -    assert(!crp->has_discount_prior()); -    assert(!crp->has_strength_prior()); -  } - -  void Remove(CRP* crp) { -    crps.erase(crp); -  } - -  size_t size() const { -    return crps.size(); -  } - -  double LogLikelihood(double d, double s) const { -    if (s <= -d) return -std::numeric_limits<double>::infinity(); -    double llh = Md::log_beta_density(d, d_alpha, d_beta) + -                 Md::log_gamma_density(d + s, s_shape, s_rate); -    for (typename std::set<CRP*>::iterator it = crps.begin(); it != crps.end(); ++it) -      llh += (*it)->log_crp_prob(d, s); -    return llh; -  } - -  double LogLikelihood() const { -    return LogLikelihood(discount, strength); -  } - -  struct DiscountResampler { -    DiscountResampler(const TiedResampler& m) : m_(m) {} -    const TiedResampler& m_; -    double operator()(const double& proposed_discount) const { -      return m_.LogLikelihood(proposed_discount, m_.strength); -    } -  }; - -  struct AlphaResampler { -    AlphaResampler(const TiedResampler& m) : m_(m) {} -    const TiedResampler& m_; -    double operator()(const double& proposed_strength) const { -      return m_.LogLikelihood(m_.discount, proposed_strength); -    } -  }; - -  void ResampleHyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) { -    if (size() == 0) { std::cerr << "EMPTY - not resampling\n"; return; } -    const DiscountResampler dr(*this); -    const AlphaResampler ar(*this); -    for (int iter = 0; iter < nloop; ++iter) { -      strength = slice_sampler1d(ar, strength, *rng, -discount + std::numeric_limits<double>::min(), -                              std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations); -      double min_discount = std::numeric_limits<double>::min(); -      if (strength < 0.0) min_discount -= strength; -      discount = slice_sampler1d(dr, discount, *rng, min_discount, -                          1.0, 0.0, niterations, 100*niterations); -    } -    strength = slice_sampler1d(ar, strength, *rng, -discount + std::numeric_limits<double>::min(), -                            std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations); -    std::cerr << "TiedCRPs(d=" << discount << ",s=" -              << strength << ") = " << LogLikelihood(discount, strength) << std::endl; -    for (typename std::set<CRP*>::iterator it = crps.begin(); it != crps.end(); ++it) -      (*it)->set_hyperparameters(discount, strength); -  } - private: -  std::set<CRP*> crps; -  const double d_alpha, d_beta, s_shape, s_rate; -  double discount, strength; -}; - -// split according to some criterion -template <class CRP> -struct BinTiedResampler { -  explicit BinTiedResampler(unsigned nbins) : -      resamplers(nbins, TiedResampler<CRP>(1,1,1,1)) {} - -  void Add(unsigned bin, CRP* crp) { -    resamplers[bin].Add(crp); -  } - -  void Remove(unsigned bin, CRP* crp) { -    resamplers[bin].Remove(crp); -  } - -  void ResampleHyperparameters(MT19937* rng) { -    for (unsigned i = 0; i < resamplers.size(); ++i) { -      std::cerr << "BIN " << i << " (" << resamplers[i].size() << " CRPs): " << std::flush; -      resamplers[i].ResampleHyperparameters(rng); -    } -  } - -  double LogLikelihood() const { -    double llh = 0; -    for (unsigned i = 0; i < resamplers.size(); ++i) -      llh += resamplers[i].LogLikelihood(); -    return llh; -  } - - private: -  std::vector<TiedResampler<CRP> > resamplers; -}; - -#endif diff --git a/gi/pf/tpf.cc b/gi/pf/tpf.cc deleted file mode 100644 index 7348d21c..00000000 --- a/gi/pf/tpf.cc +++ /dev/null @@ -1,99 +0,0 @@ -#include <iostream> -#include <tr1/memory> -#include <queue> - -#include "sampler.h" - -using namespace std; -using namespace tr1; - -shared_ptr<MT19937> prng; - -struct Particle { -  Particle() : weight(prob_t::One()) {} -  vector<int> states; -  prob_t weight; -  prob_t gamma_last; -}; - -ostream& operator<<(ostream& os, const Particle& p) { -  os << "["; -  for (int i = 0; i < p.states.size(); ++i) os << p.states[i] << ' '; -  os << "| w=" << log(p.weight) << ']'; -  return os; -} - -void Rejuvenate(vector<Particle>& pps) { -  SampleSet<prob_t> ss; -  vector<Particle> nps(pps.size()); -  for (int i = 0; i < pps.size(); ++i) { -//    cerr << pps[i] << endl; -    ss.add(pps[i].weight); -  } -//  cerr << "REJUVINATING...\n"; -  for (int i = 0; i < pps.size(); ++i) { -    nps[i] = pps[prng->SelectSample(ss)]; -    nps[i].weight = prob_t(1.0 / pps.size()); -//    cerr << nps[i] << endl; -  } -  nps.swap(pps); -//  exit(1); -} - -int main(int argc, char** argv) { -  const unsigned particles = 100; -  prng.reset(new MT19937); -  MT19937& rng = *prng; - -  // q(a) = 0.8 -  // q(b) = 0.8 -  // q(c) = 0.4 -  SampleSet<double> ssq; -  ssq.add(0.4); -  ssq.add(0.6); -  ssq.add(0); -  double qz = 1; - -  // p(a) = 0.2 -  // p(b) = 0.8 -  vector<double> p(3); -  p[0] = 0.2; -  p[1] = 0.8; -  p[2] = 0; - -  vector<int> counts(3); -  int tot = 0; - -  vector<Particle> pps(particles); -  SampleSet<prob_t> ppss; -  int LEN = 12; -  int PP = 1; -  while (pps[0].states.size() < LEN) { -    for (int pi = 0; pi < particles; ++pi) { -      Particle& prt = pps[pi]; - -      bool redo = true; -      const Particle savedp = prt; -      while (redo) { -        redo = false; -        for (int i = 0; i < PP; ++i) { -          int s = rng.SelectSample(ssq); -          double gamma_last = p[s]; -          if (!gamma_last) { redo = true; break; } -          double q = ssq[s] / qz; -          prt.states.push_back(s); -          prt.weight *= prob_t(gamma_last / q); -        } -        if (redo) { prt = savedp; continue; } -      } -    } -    Rejuvenate(pps); -  } -  ppss.clear(); -  for (int i = 0; i < particles; ++i) { ppss.add(pps[i].weight); } -  int sp = rng.SelectSample(ppss); -  cerr << pps[sp] << endl; - -  return 0; -} - diff --git a/gi/pf/transliterations.cc b/gi/pf/transliterations.cc deleted file mode 100644 index b2996f65..00000000 --- a/gi/pf/transliterations.cc +++ /dev/null @@ -1,334 +0,0 @@ -#include "transliterations.h" - -#include <iostream> -#include <vector> - -#include "boost/shared_ptr.hpp" - -#include "backward.h" -#include "filelib.h" -#include "tdict.h" -#include "trule.h" -#include "filelib.h" -#include "ccrp_nt.h" -#include "m.h" -#include "reachability.h" - -using namespace std; -using namespace std::tr1; - -struct TruncatedConditionalLengthModel { -  TruncatedConditionalLengthModel(unsigned max_src_size, unsigned max_trg_size, double expected_src_to_trg_ratio) : -      plens(max_src_size+1, vector<prob_t>(max_trg_size+1, 0.0)) { -    for (unsigned i = 1; i <= max_src_size; ++i) { -      prob_t z = prob_t::Zero(); -      for (unsigned j = 1; j <= max_trg_size; ++j) -        z += (plens[i][j] = prob_t(0.01 + exp(Md::log_poisson(j, i * expected_src_to_trg_ratio)))); -      for (unsigned j = 1; j <= max_trg_size; ++j) -        plens[i][j] /= z; -      //for (unsigned j = 1; j <= max_trg_size; ++j) -      //  cerr << "P(trg_len=" << j << " | src_len=" << i << ") = " << plens[i][j] << endl; -    } -  } - -  // return p(tlen | slen) for *chunks* not full words -  inline const prob_t& operator()(int slen, int tlen) const { -    return plens[slen][tlen]; -  } - -  vector<vector<prob_t> > plens; -}; - -struct CondBaseDist { -  CondBaseDist(unsigned max_src_size, unsigned max_trg_size, double expected_src_to_trg_ratio) : -    tclm(max_src_size, max_trg_size, expected_src_to_trg_ratio) {} - -  prob_t operator()(const vector<WordID>& src, unsigned sf, unsigned st, -                    const vector<WordID>& trg, unsigned tf, unsigned tt) const { -    prob_t p = tclm(st - sf, tt - tf);  // target len | source length ~ TCLM(source len) -    assert(!"not impl"); -    return p; -  } -  inline prob_t operator()(const vector<WordID>& src, const vector<WordID>& trg) const { -    return (*this)(src, 0, src.size(), trg, 0, trg.size()); -  } -  TruncatedConditionalLengthModel tclm; -}; - -// represents transliteration phrase probabilities, e.g. -//   p( a l - | A l ) , p( o | A w ) , ... -struct TransliterationChunkConditionalModel { -  explicit TransliterationChunkConditionalModel(const CondBaseDist& pp0) : -      d(0.0), -      strength(1.0), -      rp0(pp0) { -  } - -  void Summary() const { -    std::cerr << "Number of conditioning contexts: " << r.size() << std::endl; -    for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) { -      std::cerr << TD::GetString(it->first) << "   \t(\\alpha = " << it->second.alpha() << ") --------------------------" << std::endl; -      for (CCRP_NoTable<TRule>::const_iterator i2 = it->second.begin(); i2 != it->second.end(); ++i2) -        std::cerr << "   " << i2->second << '\t' << i2->first << std::endl; -    } -  } - -  int DecrementRule(const TRule& rule) { -    RuleModelHash::iterator it = r.find(rule.f_); -    assert(it != r.end());     -    int count = it->second.decrement(rule); -    if (count) { -      if (it->second.num_customers() == 0) r.erase(it); -    } -    return count; -  } - -  int IncrementRule(const TRule& rule) { -    RuleModelHash::iterator it = r.find(rule.f_); -    if (it == r.end()) { -      it = r.insert(make_pair(rule.f_, CCRP_NoTable<TRule>(strength))).first; -    }  -    int count = it->second.increment(rule); -    return count; -  } - -  void IncrementRules(const std::vector<TRulePtr>& rules) { -    for (int i = 0; i < rules.size(); ++i) -      IncrementRule(*rules[i]); -  } - -  void DecrementRules(const std::vector<TRulePtr>& rules) { -    for (int i = 0; i < rules.size(); ++i) -      DecrementRule(*rules[i]); -  } - -  prob_t RuleProbability(const TRule& rule) const { -    prob_t p; -    RuleModelHash::const_iterator it = r.find(rule.f_); -    if (it == r.end()) { -      p = rp0(rule.f_, rule.e_); -    } else { -      p = it->second.prob(rule, rp0(rule.f_, rule.e_)); -    } -    return p; -  } - -  double LogLikelihood(const double& dd, const double& aa) const { -    if (aa <= -dd) return -std::numeric_limits<double>::infinity(); -    //double llh = Md::log_beta_density(dd, 10, 3) + Md::log_gamma_density(aa, 1, 1); -    double llh = //Md::log_beta_density(dd, 1, 1) + -                 Md::log_gamma_density(dd + aa, 1, 1); -    std::tr1::unordered_map<std::vector<WordID>, CCRP_NoTable<TRule>, boost::hash<std::vector<WordID> > >::const_iterator it; -    for (it = r.begin(); it != r.end(); ++it) -      llh += it->second.log_crp_prob(aa); -    return llh; -  } - -  struct AlphaResampler { -    AlphaResampler(const TransliterationChunkConditionalModel& m) : m_(m) {} -    const TransliterationChunkConditionalModel& m_; -    double operator()(const double& proposed_strength) const { -      return m_.LogLikelihood(m_.d, proposed_strength); -    } -  }; - -  void ResampleHyperparameters(MT19937* rng) { -    std::tr1::unordered_map<std::vector<WordID>, CCRP_NoTable<TRule>, boost::hash<std::vector<WordID> > >::iterator it; -    //const unsigned nloop = 5; -    const unsigned niterations = 10; -    //DiscountResampler dr(*this); -    AlphaResampler ar(*this); -#if 0 -    for (int iter = 0; iter < nloop; ++iter) { -      strength = slice_sampler1d(ar, strength, *rng, -d + std::numeric_limits<double>::min(), -                              std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations); -      double min_discount = std::numeric_limits<double>::min(); -      if (strength < 0.0) min_discount -= strength; -      d = slice_sampler1d(dr, d, *rng, min_discount, -                          1.0, 0.0, niterations, 100*niterations); -    } -#endif -    strength = slice_sampler1d(ar, strength, *rng, -d, -                            std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations); -    std::cerr << "CTMModel(alpha=" << strength << ") = " << LogLikelihood(d, strength) << std::endl; -    for (it = r.begin(); it != r.end(); ++it) { -#if 0 -      it->second.set_discount(d); -#endif -      it->second.set_alpha(strength); -    } -  } - -  prob_t Likelihood() const { -    prob_t p; p.logeq(LogLikelihood(d, strength)); -    return p; -  } - -  const CondBaseDist& rp0; -  typedef std::tr1::unordered_map<std::vector<WordID>, -                                  CCRP_NoTable<TRule>, -                                  boost::hash<std::vector<WordID> > > RuleModelHash; -  RuleModelHash r; -  double d, strength; -}; - -struct GraphStructure { -  GraphStructure() : r() {} -  // leak memory - these are basically static -  const Reachability* r; -  bool IsReachable() const { return r->nodes > 0; } -}; - -struct ProbabilityEstimates { -  ProbabilityEstimates() : gs(), backward() {} -  explicit ProbabilityEstimates(const GraphStructure& g) : -      gs(&g), backward() { -    if (g.r->nodes > 0) -      backward = new float[g.r->nodes]; -  } -  // leak memory, these are static - -  // returns an estimate of the marginal probability -  double MarginalEstimate() const { -    if (!backward) return 0; -    return backward[0]; -  } - -  // returns an backward estimate -  double Backward(int src_covered, int trg_covered) const { -    if (!backward) return 0; -    int ind = gs->r->node_addresses[src_covered][trg_covered]; -    if (ind < 0) return 0; -    return backward[ind]; -  } - -  prob_t estp; -  float* backward; - private: -  const GraphStructure* gs; -}; - -struct TransliterationsImpl { -  TransliterationsImpl(int max_src, int max_trg, double sr, const BackwardEstimator& b) : -      cp0(max_src, max_trg, sr), -      tccm(cp0), -      be(b), -      kMAX_SRC_CHUNK(max_src), -      kMAX_TRG_CHUNK(max_trg), -      kS2T_RATIO(sr), -      tot_pairs(), tot_mem() { -  } -  const CondBaseDist cp0; -  TransliterationChunkConditionalModel tccm; -  const BackwardEstimator& be; - -  void Initialize(WordID src, const vector<WordID>& src_lets, WordID trg, const vector<WordID>& trg_lets) { -    const size_t src_len = src_lets.size(); -    const size_t trg_len = trg_lets.size(); - -    // init graph structure -    if (src_len >= graphs.size()) graphs.resize(src_len + 1); -    if (trg_len >= graphs[src_len].size()) graphs[src_len].resize(trg_len + 1); -    GraphStructure& gs = graphs[src_len][trg_len]; -    if (!gs.r) { -      double rat = exp(fabs(log(trg_len / (src_len * kS2T_RATIO)))); -      if (rat > 1.5 || (rat > 2.4 && src_len < 6)) { -        cerr << " ** Forbidding transliterations of size " << src_len << "," << trg_len << ": " << rat << endl; -        gs.r = new Reachability(src_len, trg_len, 0, 0); -      } else { -        gs.r = new Reachability(src_len, trg_len, kMAX_SRC_CHUNK, kMAX_TRG_CHUNK); -      } -    } - -    const Reachability& r = *gs.r; - -    // init backward estimates -    if (src >= ests.size()) ests.resize(src + 1); -    unordered_map<WordID, ProbabilityEstimates>::iterator it = ests[src].find(trg); -    if (it != ests[src].end()) return; // already initialized - -    it = ests[src].insert(make_pair(trg, ProbabilityEstimates(gs))).first; -    ProbabilityEstimates& est = it->second; -    if (!gs.r->nodes) return;  // not derivable subject to length constraints - -    be.InitializeGrid(src_lets, trg_lets, r, kS2T_RATIO, est.backward); -    cerr << TD::GetString(src_lets) << " ||| " << TD::GetString(trg_lets) << " ||| " << (est.backward[0] / trg_lets.size()) << endl; -    tot_pairs++; -    tot_mem += sizeof(float) * gs.r->nodes; -  } - -  void Forbid(WordID src, const vector<WordID>& src_lets, WordID trg, const vector<WordID>& trg_lets) { -    const size_t src_len = src_lets.size(); -    const size_t trg_len = trg_lets.size(); -    // TODO -  } - -  prob_t EstimateProbability(WordID s, const vector<WordID>& src, WordID t, const vector<WordID>& trg) const { -    assert(src.size() < graphs.size()); -    const vector<GraphStructure>& tv = graphs[src.size()]; -    assert(trg.size() < tv.size()); -    const GraphStructure& gs = tv[trg.size()]; -    if (gs.r->nodes == 0) -      return prob_t::Zero(); -    const unordered_map<WordID, ProbabilityEstimates>::const_iterator it = ests[s].find(t); -    assert(it != ests[s].end()); -    return it->second.estp; -  } - -  void GraphSummary() const { -    double to = 0; -    double tn = 0; -    double tt = 0; -    for (int i = 0; i < graphs.size(); ++i) { -      const vector<GraphStructure>& vt = graphs[i]; -      for (int j = 0; j < vt.size(); ++j) { -        const GraphStructure& gs = vt[j]; -        if (!gs.r) continue; -        tt++; -        for (int k = 0; k < i; ++k) { -          for (int l = 0; l < j; ++l) { -            size_t c = gs.r->valid_deltas[k][l].size(); -            if (c) { -              tn += 1; -              to += c; -            } -          } -        } -      } -    } -    cerr << "     Average nodes = " << (tn / tt) << endl; -    cerr << "Average out-degree = " << (to / tn) << endl; -    cerr << " Unique structures = " << tt << endl; -    cerr << "      Unique pairs = " << tot_pairs << endl; -    cerr << "          BEs size = " << (tot_mem / (1024.0*1024.0)) << " MB" << endl; -  } - -  const int kMAX_SRC_CHUNK; -  const int kMAX_TRG_CHUNK; -  const double kS2T_RATIO; -  unsigned tot_pairs; -  size_t tot_mem; -  vector<vector<GraphStructure> > graphs; // graphs[src_len][trg_len] -  vector<unordered_map<WordID, ProbabilityEstimates> > ests; // ests[src][trg] -}; - -Transliterations::Transliterations(int max_src, int max_trg, double sr, const BackwardEstimator& be) : -    pimpl_(new TransliterationsImpl(max_src, max_trg, sr, be)) {} -Transliterations::~Transliterations() { delete pimpl_; } - -void Transliterations::Initialize(WordID src, const vector<WordID>& src_lets, WordID trg, const vector<WordID>& trg_lets) { -  pimpl_->Initialize(src, src_lets, trg, trg_lets); -} - -prob_t Transliterations::EstimateProbability(WordID s, const vector<WordID>& src, WordID t, const vector<WordID>& trg) const { -  return pimpl_->EstimateProbability(s, src,t, trg); -} - -void Transliterations::Forbid(WordID src, const vector<WordID>& src_lets, WordID trg, const vector<WordID>& trg_lets) { -  pimpl_->Forbid(src, src_lets, trg, trg_lets); -} - -void Transliterations::GraphSummary() const { -  pimpl_->GraphSummary(); -} - diff --git a/gi/pf/transliterations.h b/gi/pf/transliterations.h deleted file mode 100644 index 49d14684..00000000 --- a/gi/pf/transliterations.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef _TRANSLITERATIONS_H_ -#define _TRANSLITERATIONS_H_ - -#include <vector> -#include "wordid.h" -#include "prob.h" - -struct BackwardEstimator; -struct TransliterationsImpl; -struct Transliterations { -  // max_src and max_trg indicate how big the transliteration phrases can be -  // see reachability.h for information about filter_ratio -  explicit Transliterations(int max_src, int max_trg, double s2t_rat, const BackwardEstimator& be); -  ~Transliterations(); -  void Initialize(WordID src, const std::vector<WordID>& src_lets, WordID trg, const std::vector<WordID>& trg_lets); -  void Forbid(WordID src, const std::vector<WordID>& src_lets, WordID trg, const std::vector<WordID>& trg_lets); -  void GraphSummary() const; -  prob_t EstimateProbability(WordID s, const std::vector<WordID>& src, WordID t, const std::vector<WordID>& trg) const; - private: -  TransliterationsImpl* pimpl_; -}; - -#endif - diff --git a/gi/pf/unigrams.cc b/gi/pf/unigrams.cc deleted file mode 100644 index 40829775..00000000 --- a/gi/pf/unigrams.cc +++ /dev/null @@ -1,80 +0,0 @@ -#include "unigrams.h" - -#include <string> -#include <cmath> - -#include "stringlib.h" -#include "filelib.h" - -using namespace std; - -void UnigramModel::LoadUnigrams(const string& fname) { -  cerr << "Loading unigram probabilities from " << fname << " ..." << endl; -  ReadFile rf(fname); -  string line; -  istream& in = *rf.stream(); -  assert(in); -  getline(in, line); -  assert(line.empty()); -  getline(in, line); -  assert(line == "\\data\\"); -  getline(in, line); -  size_t pos = line.find("ngram 1="); -  assert(pos == 0); -  assert(line.size() > 8); -  const size_t num_unigrams = atoi(&line[8]); -  getline(in, line); -  assert(line.empty()); -  getline(in, line); -  assert(line == "\\1-grams:"); -  for (size_t i = 0; i < num_unigrams; ++i) { -    getline(in, line); -    assert(line.size() > 0); -    pos = line.find('\t'); -    assert(pos > 0); -    assert(pos + 1 < line.size()); -    const WordID w = TD::Convert(line.substr(pos + 1)); -    line[pos] = 0; -    float p = atof(&line[0]); -    if (w < probs_.size()) probs_[w].logeq(p * log(10)); else cerr << "WARNING: don't know about '" << TD::Convert(w) << "'\n"; -  } -} - -void UnigramWordModel::LoadUnigrams(const string& fname) { -  cerr << "Loading unigram probabilities from " << fname << " ..." << endl; -  ReadFile rf(fname); -  string line; -  istream& in = *rf.stream(); -  assert(in); -  getline(in, line); -  assert(line.empty()); -  getline(in, line); -  assert(line == "\\data\\"); -  getline(in, line); -  size_t pos = line.find("ngram 1="); -  assert(pos == 0); -  assert(line.size() > 8); -  const size_t num_unigrams = atoi(&line[8]); -  getline(in, line); -  assert(line.empty()); -  getline(in, line); -  assert(line == "\\1-grams:"); -  for (size_t i = 0; i < num_unigrams; ++i) { -    getline(in, line); -    assert(line.size() > 0); -    pos = line.find('\t'); -    assert(pos > 0); -    assert(pos + 1 < line.size()); -    size_t cur = pos + 1; -    vector<WordID> w; -    while (cur < line.size()) { -      const size_t len = UTF8Len(line[cur]); -      w.push_back(TD::Convert(line.substr(cur, len))); -      cur += len; -    } -    line[pos] = 0; -    float p = atof(&line[0]); -    probs_[w].logeq(p * log(10.0)); -  } -} - diff --git a/gi/pf/unigrams.h b/gi/pf/unigrams.h deleted file mode 100644 index 1660d1ed..00000000 --- a/gi/pf/unigrams.h +++ /dev/null @@ -1,69 +0,0 @@ -#ifndef _UNIGRAMS_H_ -#define _UNIGRAMS_H_ - -#include <vector> -#include <string> -#include <tr1/unordered_map> -#include <boost/functional.hpp> - -#include "wordid.h" -#include "prob.h" -#include "tdict.h" - -struct UnigramModel { -  explicit UnigramModel(const std::string& fname, unsigned vocab_size) : -      use_uniform_(fname.size() == 0), -      uniform_(1.0 / vocab_size), -      probs_() { -    if (fname.size() > 0) { -      probs_.resize(TD::NumWords() + 1); -      LoadUnigrams(fname); -    } -  } - -  const prob_t& operator()(const WordID& w) const { -    assert(w); -    if (use_uniform_) return uniform_; -    return probs_[w]; -  } - - private: -  void LoadUnigrams(const std::string& fname); - -  const bool use_uniform_; -  const prob_t uniform_; -  std::vector<prob_t> probs_; -}; - - -// reads an ARPA unigram file and converts words like 'cat' into a string 'c a t' -struct UnigramWordModel { -  explicit UnigramWordModel(const std::string& fname) : -      use_uniform_(false), -      uniform_(1.0), -      probs_() { -    LoadUnigrams(fname); -  } - -  explicit UnigramWordModel(const unsigned vocab_size) : -      use_uniform_(true), -      uniform_(1.0 / vocab_size), -      probs_() {} - -  const prob_t& operator()(const std::vector<WordID>& s) const { -    if (use_uniform_) return uniform_; -    const VectorProbHash::const_iterator it = probs_.find(s); -    assert(it != probs_.end()); -    return it->second; -  } - - private: -  void LoadUnigrams(const std::string& fname); - -  const bool use_uniform_; -  const prob_t uniform_; -  typedef std::tr1::unordered_map<std::vector<WordID>, prob_t, boost::hash<std::vector<WordID> > > VectorProbHash; -  VectorProbHash probs_; -}; - -#endif diff --git a/gi/pipeline/OLD.clsp.config b/gi/pipeline/OLD.clsp.config deleted file mode 100644 index cd0f9d65..00000000 --- a/gi/pipeline/OLD.clsp.config +++ /dev/null @@ -1,9 +0,0 @@ -# THIS FILE GIVES THE LOCATIONS OF THE CORPORA USED -# name path aligned-corpus LM xfeats.grammar dev dev-refs test1 testt-eval.sh ... -btec /export/ws10smt/data/btec/ split.zh-en.al lm/en.3gram.lm.gz xgrammar/grammar.gz devtest/devset1_2.zh devtest/devset1_2.lc.en* devtest/devset3.zh eval-devset3.sh -fbis /export/ws10smt/data/chinese-english.fbis corpus.zh-en.al -zhen /export/ws10smt/data/chinese-english corpus.zh-en.al -aren /export/ws10smt/data/arabic-english corpus.ar-en.al -uren /export/ws10smt/data/urdu-english corpus.ur-en.al -nlfr /export/ws10smt/data/dutch-french corpus.nl-fr.al - diff --git a/gi/pipeline/OLD.evaluation-pipeline.pl b/gi/pipeline/OLD.evaluation-pipeline.pl deleted file mode 100755 index 49c303eb..00000000 --- a/gi/pipeline/OLD.evaluation-pipeline.pl +++ /dev/null @@ -1,277 +0,0 @@ -#!/usr/bin/perl -w -use strict; -use Getopt::Long; -use Cwd; -my $CWD = getcwd; - -my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR; } - -my @DEFAULT_FEATS = qw( -  LogRuleCount SingletonRule LexE2F LexF2E WordPenalty -  LogFCount LanguageModel Glue GlueTop PassThrough SingletonF -); - -my %init_weights = qw( -  LogRuleCount 0.2 -  LexE2F -0.3 -  LexF2E -0.3 -  LogFCount 0.1 -  WordPenalty -1.5 -  LanguageModel 1.2 -  Glue -1.0 -  GlueTop 0.00001 -  PassThrough -10.0 -  SingletonRule -0.1 -  X_EGivenF -0.3 -  X_FGivenE -0.3 -  X_LogECount -1 -  X_LogFCount -0.1 -  X_LogRuleCount 0.3 -  X_SingletonE -0.1 -  X_SingletonF -0.1 -  X_SingletonRule -0.5 -); - -my $CDEC = "$SCRIPT_DIR/../../decoder/cdec"; -my $PARALLELIZE = "$SCRIPT_DIR/../../vest/parallelize.pl"; -my $EXTOOLS = "$SCRIPT_DIR/../../extools"; -die "Can't find extools: $EXTOOLS" unless -e $EXTOOLS && -d $EXTOOLS; -my $VEST = "$SCRIPT_DIR/../../vest"; -die "Can't find vest: $VEST" unless -e $VEST && -d $VEST; -my $DISTVEST = "$VEST/dist-vest.pl"; -my $FILTSCORE = "$EXTOOLS/filter_score_grammar"; -my $ADDXFEATS = "$SCRIPT_DIR/scripts/xfeats.pl"; -assert_exec($CDEC, $PARALLELIZE, $FILTSCORE, $DISTVEST, $ADDXFEATS); - -my $config = "$SCRIPT_DIR/OLD.clsp.config"; -print STDERR "CORPORA CONFIGURATION: $config\n"; -open CONF, "<$config" or die "Can't read $config: $!"; -my %paths; -my %corpora; -my %lms; -my %devs; -my %devrefs; -my %tests; -my %testevals; -my %xgrammars; -print STDERR "       LANGUAGE PAIRS:"; -while(<CONF>) { -  chomp; -  next if /^#/; -  next if /^\s*$/; -  s/^\s+//; -  s/\s+$//; -  my ($name, $path, $corpus, $lm, $xgrammar, $dev, $devref, @xtests) = split /\s+/; -  $paths{$name} = $path; -  $corpora{$name} = $corpus; -  $lms{$name} = $lm; -  $xgrammars{$name} = $xgrammar; -  $devs{$name} = $dev; -  $devrefs{$name} = $devref; -  $tests{$name} = $xtests[0]; -  $testevals{$name} = $xtests[1]; -  print STDERR " $name"; -} -print STDERR "\n"; - -my %langpairs = map { $_ => 1 } qw( btec zhen fbis aren uren nlfr ); - -my $outdir = "$CWD/exp"; -my $help; -my $XFEATS; -my $EXTRA_FILTER = ''; -my $dataDir = '/export/ws10smt/data'; -if (GetOptions( -        "data=s" => \$dataDir, -        "xfeats" => \$XFEATS, -) == 0 || @ARGV!=2 || $help) { -        print_help(); -        exit; -} -my $lp = $ARGV[0]; -my $grammar = $ARGV[1]; -print STDERR "   CORPUS REPO: $dataDir\n"; -print STDERR " LANGUAGE PAIR: $lp\n"; -die "I don't know about that language pair\n" unless $paths{$lp}; -my $corpdir = "$dataDir"; -if ($paths{$lp} =~ /^\//) { $corpdir = $paths{$lp}; } else { $corpdir .= '/' . $paths{$lp}; } -die "I can't find the corpora directory: $corpdir" unless -d $corpdir; -print STDERR "       GRAMMAR: $grammar\n"; -my $LANG_MODEL = mydircat($corpdir, $lms{$lp}); -print STDERR "            LM: $LANG_MODEL\n"; -my $CORPUS = mydircat($corpdir, $corpora{$lp}); -die "Can't find corpus: $CORPUS" unless -f $CORPUS; - -my $dev = mydircat($corpdir, $devs{$lp}); -my $drefs = $devrefs{$lp}; -die "Can't find dev: $dev\n" unless -f $dev; -die "Dev refs not set" unless $drefs; -$drefs = mydircat($corpdir, $drefs); - -my $test = mydircat($corpdir, $tests{$lp}); -my $teval = mydircat($corpdir, $testevals{$lp}); -die "Can't find test: $test\n" unless -f $test; -assert_exec($teval); - -if ($XFEATS) { -  my $xgram = mydircat($corpdir, $xgrammars{$lp}); -  die "Can't find x-grammar: $xgram" unless -f $xgram; -  $EXTRA_FILTER = "$ADDXFEATS $xgram |"; -  print STDERR "ADDING X-FEATS FROM $xgram\n"; -} - -# MAKE DEV -print STDERR "\nFILTERING FOR dev...\n"; -print STDERR "DEV: $dev (REFS=$drefs)\n"; -`mkdir -p $outdir`; -my $devgrammar = filter($grammar, $dev, 'dev', $outdir); -my $devini = mydircat($outdir, "cdec-dev.ini"); -write_cdec_ini($devini, $devgrammar); - - -# MAKE TEST -print STDERR "\nFILTERING FOR test...\n"; -print STDERR "TEST: $test (EVAL=$teval)\n"; -`mkdir -p $outdir`; -my $testgrammar = filter($grammar, $test, 'test', $outdir); -my $testini = mydircat($outdir, "cdec-test.ini"); -write_cdec_ini($testini, $testgrammar); - - -# CREATE INIT WEIGHTS -print STDERR "\nCREATING INITIAL WEIGHTS FILE: weights.init\n"; -my $weights = mydircat($outdir, "weights.init"); -write_random_weights_file($weights); - - -# VEST -print STDERR "\nMINIMUM ERROR TRAINING\n"; -my $tuned_weights = mydircat($outdir, 'weights.tuned'); -if (-f $tuned_weights) { -  print STDERR "TUNED WEIGHTS $tuned_weights EXISTS: REUSING\n"; -} else { -  my $cmd = "$DISTVEST --ref-files=$drefs --source-file=$dev --weights $weights $devini"; -  print STDERR "MERT COMMAND: $cmd\n"; -  `rm -rf $outdir/vest 2> /dev/null`; -  chdir $outdir or die "Can't chdir to $outdir: $!"; -  $weights = `$cmd`; -  die "MERT reported non-zero exit code" unless $? == 0; -  chomp $weights; -  safesystem($tuned_weights, "cp $weights $tuned_weights"); -  print STDERR "TUNED WEIGHTS: $tuned_weights\n"; -  die "$tuned_weights is missing!" unless -f $tuned_weights; -} - -# DECODE -print STDERR "\nDECODE TEST SET\n"; -my $decolog = mydircat($outdir, "test-decode.log"); -my $testtrans = mydircat($outdir, "test.trans"); -my $cmd = "cat $test | $PARALLELIZE -j 20 -e $decolog -- $CDEC -c $testini -w $tuned_weights > $testtrans"; -safesystem($testtrans, $cmd) or die "Failed to decode test set!"; - - -# EVALUATE -print STDERR "\nEVALUATE TEST SET\n"; -print STDERR "TEST: $testtrans\n"; -$cmd = "$teval $testtrans"; -safesystem(undef, $cmd) or die "Failed to evaluate!"; -exit 0; - - -sub write_random_weights_file { -  my ($file, @extras) = @_; -  open F, ">$file" or die "Can't write $file: $!"; -  my @feats = (@DEFAULT_FEATS, @extras); -  if ($XFEATS) { -    my @xfeats = qw( -      X_LogRuleCount X_LogECount X_LogFCount X_EGivenF X_FGivenE X_SingletonRule X_SingletonE X_SingletonF -    ); -    @feats = (@feats, @xfeats); -  } -  for my $feat (@feats) { -    my $r = rand(1.6); -    my $w = $init_weights{$feat} * $r; -    if ($w == 0) { $w = 0.0001; print STDERR "WARNING: $feat had no initial weight!\n"; } -    print F "$feat $w\n"; -  } -  close F; -} - -sub filter { -  my ($grammar, $set, $name, $outdir) = @_; -  my $outgrammar = mydircat($outdir, "$name.scfg.gz"); -  if (-f $outgrammar) { print STDERR "$outgrammar exists - REUSING!\n"; } else { -    my $cmd = "gunzip -c $grammar | $FILTSCORE -c $CORPUS -t $set | $EXTRA_FILTER gzip > $outgrammar"; -    safesystem($outgrammar, $cmd) or die "Can't filter and score grammar!"; -  } -  return $outgrammar; -} - -sub mydircat { - my ($base, $suffix) = @_; - if ($suffix =~ /^\//) { return $suffix; } - my $res = $base . '/' . $suffix; - $res =~ s/\/\//\//g; - return $res; -} - -sub write_cdec_ini { -  my ($filename, $grammar_path) = (@_); -  open CDECINI, ">$filename" or die "Can't write $filename: $!"; -  print CDECINI <<EOT; -formalism=scfg -cubepruning_pop_limit=100 -add_pass_through_rules=true -scfg_extra_glue_grammar=/export/ws10smt/data/glue/glue.scfg.gz -grammar=$grammar_path -feature_function=WordPenalty -feature_function=LanguageModel -o 3 $LANG_MODEL -EOT -  close CDECINI; -}; - -sub print_help { -  print STDERR<<EOT; - -Usage: $0 [OPTIONS] language-pair unfiltered-grammar.gz - -Given an induced grammar for an entire corpus (i.e., generated by -local-gi-pipeline.pl), filter and featurize it for a dev and test set, -run MERT, report scores. - -EOT -} - -sub safesystem { -  my $output = shift @_; -  print STDERR "Executing: @_\n"; -  system(@_); -  if ($? == -1) { -      print STDERR "ERROR: Failed to execute: @_\n  $!\n"; -      if (defined $output && -e $output) { printf STDERR "Removing $output\n"; `rm -rf $output`; } -      exit(1); -  } -  elsif ($? & 127) { -      printf STDERR "ERROR: Execution of: @_\n  died with signal %d, %s coredump\n", -          ($? & 127),  ($? & 128) ? 'with' : 'without'; -      if (defined $output && -e $output) { printf STDERR "Removing $output\n"; `rm -rf $output`; } -      exit(1); -  } -  else { -    my $exitcode = $? >> 8; -    if ($exitcode) { -      print STDERR "Exit code: $exitcode\n"; -      if (defined $output && -e $output) { printf STDERR "Removing $output\n"; `rm -rf $output`; } -    } -    return ! $exitcode; -  } -} - -sub assert_exec { -  my @files = @_; -  for my $file (@files) { -    die "Can't find $file - did you run make?\n" unless -e $file; -    die "Can't execute $file" unless -e $file; -  } -}; - diff --git a/gi/pipeline/backoff-pipe.pl b/gi/pipeline/backoff-pipe.pl deleted file mode 100644 index ac103c8b..00000000 --- a/gi/pipeline/backoff-pipe.pl +++ /dev/null @@ -1,215 +0,0 @@ -#!/usr/bin/perl -w -use strict; - -use Getopt::Long "GetOptions"; - -my @grammars; -my $OUTPUTPREFIX = './giwork/bo.hier.grammar'; -safemkdir($OUTPUTPREFIX); -my $backoff_levels = 1; -my $glue_levels = 1; - -usage() unless &GetOptions('grmr=s@' => \ @grammars, -                           'outprefix=s' => \ $OUTPUTPREFIX, -                           'bo-lvls=i' => \ $backoff_levels, -                           'glue-lvls=i' => \ $glue_levels, -); -                            -my $OUTDIR = $OUTPUTPREFIX . '/hier'; -print STDERR "@grammars\n"; - - -my %grmr = (); -foreach my $grammar (@grammars) { -    $grammar =~ m/\/[^\/]*\.t(\d+)\.[^\/]*/; -    $grmr{$1} = $grammar; -} - -my @index = sort keys %grmr; -$OUTDIR = $OUTDIR . join('-',@index); -safemkdir($OUTDIR); -my $BACKOFF_GRMR = $OUTDIR . '/backoff.hier.gz'; -safesystem("echo \"\" | gzip > $BACKOFF_GRMR"); -my $GLUE_GRMR = $OUTDIR . '/glue.hier.gz'; -safesystem("echo \"\" | gzip > $GLUE_GRMR"); -my $joinedgrammars = $OUTDIR . '/grammar.hier.gz'; - -join_grammars(); - -for my $i (0..(scalar @index)-2) { -    my $freqs = extract_freqs($index[$i], $index[$i+1]); -    if ($i < $backoff_levels) { -        create_backoff_rules($index[$i],$index[$i+1],$freqs); -    } -    if ($i < $glue_levels) { -        add_glue_rules($index[$i]); -    } -} - -output_grammar_info(); - - -sub usage { -  print <<EOT; - -Usage: $0 [OPTIONS] corpus.fr-en-al - -Induces a grammar using Pitman-Yor topic modeling or Posterior Regularisation. - -EOT -  exit 1; -}; - -sub safemkdir { -  my $dir = shift; -  if (-d $dir) { return 1; } -  return mkdir($dir); -} - - -sub safesystem { -  print STDERR "Executing: @_\n"; -  system(@_); -  if ($? == -1) { -      print STDERR "ERROR: Failed to execute: @_\n  $!\n"; -      exit(1); -  } -  elsif ($? & 127) { -      printf STDERR "ERROR: Execution of: @_\n  died with signal %d, %s coredump\n", -          ($? & 127),  ($? & 128) ? 'with' : 'without'; -      exit(1); -  } -  else { -    my $exitcode = $? >> 8; -    print STDERR "Exit code: $exitcode\n" if $exitcode; -    return ! $exitcode; -  } -} - - -sub join_grammars { -    print STDERR "\n!!! JOINING GRAMMARS\n"; -    if(-e $joinedgrammars) { -        print STDERR "$joinedgrammars exists, reusing...\n"; -        return; -    } -    safesystem("echo \"\" | gzip > $joinedgrammars"); -    foreach my $i (@index) { -        my $g = $grmr{$i}; -        safesystem("zcat $g | sed -r -e 's/X([0-9]+)/X$i\\1/g' - | gzip > $g.2.gz"); -        safesystem("zcat $joinedgrammars $g.2.gz | gzip > $joinedgrammars.2.gz"); -        safesystem("mv $joinedgrammars.2.gz $joinedgrammars"); -    } -} - - -sub extract_freqs { -    my($grmr1,$grmr2) = @_; -    print STDERR "\n!!!EXTRACTING FREQUENCIES: $grmr1->$grmr2\n"; -    my $IN_COARSE = substr($grmr{$grmr1},0,index($grmr{$grmr1},".grammar/")) . "/labeled_spans.txt"; -    my $IN_FINE = substr($grmr{$grmr2},0,index($grmr{$grmr2},".grammar/")) . "/labeled_spans.txt"; -    my $OUT_SPANS = "$OUTDIR/labeled_spans.hier$grmr1-$grmr2.txt"; -    my $FREQS = "$OUTDIR/label_freq.hier$grmr1-$grmr2.txt"; -    if(-e $OUT_SPANS && -e $FREQS) { -        print STDERR "$OUT_SPANS exists, reusing...\n"; -        print STDERR "$FREQS exists, reusing...\n"; -        return $FREQS; -    } -     -    safesystem("paste -d ' ' $IN_COARSE $IN_FINE > $OUT_SPANS"); -     -    my %FREQ_HIER = (); -    my %finehier = (); -     -    open SPANS, $OUT_SPANS or die $!; -    while (<SPANS>) { -        my ($tmp, $coarse, $fine) = split /\|\|\|/; -        my @coarse_spans = $coarse =~ /\d+-\d+:X(\d+)/g; -        my @fine_spans = $fine =~ /\d+-\d+:X(\d+)/g; -         -        foreach my $i (0..(scalar @coarse_spans)-1) { -            my $coarse_cat = $coarse_spans[$i]; -            my $fine_cat = $fine_spans[$i]; -             -            $FREQ_HIER{$coarse_cat}{$fine_cat}++; -        } -    } -    close SPANS; -    foreach (values %FREQ_HIER) { -        my $coarse_freq = $_; -        my $total = 0; -        $total+=$_ for (values %{ $coarse_freq }); -        $coarse_freq->{$_}=log($coarse_freq->{$_}/$total) for (keys %{ $coarse_freq }); -    } -    open FREQS, ">", $FREQS or die $!; -    foreach my $coarse_cat (keys %FREQ_HIER) { -        print FREQS "$coarse_cat |||"; -        foreach my $fine_cat (keys %{$FREQ_HIER{$coarse_cat}}) { -            my $freq = $FREQ_HIER{$coarse_cat}{$fine_cat}; -            print FREQS " $fine_cat:$freq"; -            if(! exists $finehier{$fine_cat} || $finehier{$fine_cat} < $freq) { -               $finehier{$fine_cat} = $coarse_cat; -            }   -        } -        print FREQS "\n"; -    } -#    foreach my $fine_cat (keys %finehier) { -#        print FREQS "$fine_cat -> $finehier{$fine_cat}\n"; -#    } -    close FREQS; -    return $FREQS; -} - - -sub create_backoff_rules { -    print STDERR "\n!!! CREATING BACKOFF RULES\n"; -    my ($grmr1, $grmr2, $freq) = @_; -    my $OUTFILE = "$OUTDIR/backoff.hier$grmr1-$grmr2.txt"; -    if(-e $OUTFILE) { -        print STDERR "$OUTFILE exists, reusing...\n"; -        return; -    } -    open FREQS, $freq or die $!; -    open TMP, ">", $OUTFILE or die $!; -    while (<FREQS>) { -        my $line = $_; -        $line = m/^(\d+) \|\|\| (.+)$/; -        my $coarse = $1; -        $line = $2; -        my @finefreq = $line =~ m/(\d+):(\S+)/g; -        for(my $i = 0; $i < scalar @finefreq; $i+=2) { -            my $finecat = $finefreq[$i]; -            my $finefreq = $finefreq[$i+1]; -            print TMP "[X$grmr1$coarse] ||| [X$grmr2$finecat,1]\t[1] ||| BackoffRule=$finefreq A=0-0\n"; -        } -    } -    close TMP; -    close FREQS; -    safesystem("zcat $BACKOFF_GRMR | cat - $OUTFILE | gzip > $BACKOFF_GRMR.2.gz"); -    safesystem("mv $BACKOFF_GRMR.2.gz $BACKOFF_GRMR"); -} - -sub add_glue_rules { -    print STDERR "\n!!! CREATING GLUE RULES\n"; -    my ($grmr) = @_; -    my $OUTFILE = "$OUTDIR/glue.$grmr.gz"; -    if (-e $OUTFILE) { -        print STDERR "$OUTFILE exists, reusing...\n"; -        return; -    } -    open TMP, ">", $OUTFILE or die $!; -    for my $i (0..($grmr-1)) { -        print TMP "[S] ||| [S,1] [X$grmr$i,2] ||| [1] [2] ||| Glue=1\n"; -        print TMP "[S] ||| [X$grmr$i,1] ||| [1] ||| GlueTop=1\n"; -    } -    close TMP; -    safesystem("zcat $GLUE_GRMR | cat - $OUTFILE | gzip > $GLUE_GRMR.2.gz"); -    safesystem("mv $GLUE_GRMR.2.gz $GLUE_GRMR"); -} - -sub output_grammar_info { -    print STDERR "\n!!! GRAMMAR INFORMATION\n"; -    print STDOUT "GRAMMAR: \t$joinedgrammars\n"; -    print STDOUT "GLUE: \t$GLUE_GRMR\n"; -    print STDOUT "BACKOFF: \t$BACKOFF_GRMR\n"; -} diff --git a/gi/pipeline/blacklight.config b/gi/pipeline/blacklight.config deleted file mode 100644 index fc59a604..00000000 --- a/gi/pipeline/blacklight.config +++ /dev/null @@ -1,9 +0,0 @@ -# THIS FILE GIVES THE LOCATIONS OF THE CORPORA USED -# name path aligned-corpus LM dev dev-refs test1 testt-eval.sh ... -/usr/users/0/cdyer/ws10smt/data -btec /home/cdyer/ws10smt-data/btec/ split.zh-en.al lm/en.3gram.lm.gz devtest/devset1_2.zh devtest/devset1_2.lc.en* devtest/devset3.zh eval-devset3.sh -zhen /home/cdyer/ws10smt-data/chinese-english corpus.zh-en.al lm/c2e.3gram.lm.gz dev_and_test/mt02.src.txt dev_and_test/mt02.ref.* dev_and_test/mt03.src.txt eval-mt03.sh -aren /home/cdyer/ws10smt-data/arabic-english corpus.ar-en-al lm/a2e.3gram.lm.gz dev_and_test/dev.src.txt dev_and_test/dev.ref.txt.* dev_and_test/mt05.src.txt eval-mt05.sh -uren /usr/users/0/cdyer/ws10smt/data/urdu-english corpus.ur-en.al lm/u2e.en.lm.gz dev/dev.ur dev/dev.en* devtest/devtest.ur eval-devtest.sh -nlfr /home/cdyer/ws10smt-data/dutch-french corpus.nl-fr.al - diff --git a/gi/pipeline/clsp.config b/gi/pipeline/clsp.config deleted file mode 100644 index c23d409f..00000000 --- a/gi/pipeline/clsp.config +++ /dev/null @@ -1,10 +0,0 @@ -# THIS FILE GIVES THE LOCATIONS OF THE CORPORA USED -# name path aligned-corpus LM dev dev-refs test1 testt-eval.sh ... -/export/ws10smt/data -btec /export/ws10smt/data/btec/ split.zh-en.al lm/en.3gram.lm.gz devtest/devset1_2.zh devtest/devset1_2.lc.en* devtest/devset3.zh eval-devset3.sh -fbis /export/ws10smt/data/chinese-english.fbis corpus.zh-en.al -zhen /export/ws10smt/data/chinese-english corpus.zh-en.al lm/c2e.3gram.lm.gz dev_and_test/mt02.src.txt dev_and_test/mt02.ref.* dev_and_test/mt03.src.txt eval-mt03.sh -aren /export/ws10smt/data/arabic-english corpus.ar-en-al lm/a2e.3gram.lm.gz dev_and_test/dev.src.txt dev_and_test/dev.ref.txt.* dev_and_test/mt05.src.txt eval-mt05.sh -uren /export/ws10smt/data/urdu-english corpus.ur-en.al lm/u2e.en.lm.gz dev/dev.ur dev/dev.en* devtest/devtest.ur eval-devtest.sh -nlfr /export/ws10smt/data/dutch-french corpus.nl-fr.al - diff --git a/gi/pipeline/evaluation-pipeline.pl b/gi/pipeline/evaluation-pipeline.pl deleted file mode 100755 index 4b4529d9..00000000 --- a/gi/pipeline/evaluation-pipeline.pl +++ /dev/null @@ -1,364 +0,0 @@ -#!/usr/bin/perl -w -use strict; -use Getopt::Long; -use Cwd; -my $CWD = getcwd; - -my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../../environment"; } -use LocalConfig; - -my $JOBS = 15; -my $PMEM = "9G"; -my $NUM_TRANSLATIONS = 50; -my $GOAL = "S"; - -# featurize_grammar may add multiple features from a single feature extractor -# the key in this map is the extractor name, the value is a list of the extracted features -my $feat_map = { -  "LogRuleCount" => [ "LogRuleCount", "SingletonRule" ] , -#  "XFeatures" => [ "XFE","XEF" ] , -  "XFeatures" => [ "XFE","XEF","LabelledEF","LabelledFE"], # ,"XE_Singleton","XF_Singleton"] , -  "LabelledRuleConditionals" => [ "LabelledFE","LabelledEF" ] , -  "LexProb" => [ "LexE2F", "LexF2E" ] , -  "BackoffRule" => [ "BackoffRule" ] , -  "RulePenalty" => [ "RulePenalty" ] , -  "LHSProb" => [ "LHSProb" ] , -  "LabellingShape" => [ "LabellingShape" ] , -  "GenerativeProb" => [ "GenerativeProb" ] , -}; - -my %init_weights = qw( -  EGivenF -0.735245 -  FGivenE -0.219391 -  Glue -0.306709 -  GlueTop 0.0473331 -  LanguageModel 2.40403 -  LexE2F -0.266989 -  LexF2E -0.550373 -  LogECount -0.129853 -  LogFCount -0.194037 -  LogRuleCount 0.256706 -  BackoffRule 0.5 -  XFE -0.256706 -  XEF -0.256706 -  XF_Singleton -0.05 -  XE_Singleton -0.8 -  LabelledFE -0.256706 -  LabelledEF -0.256706 -  PassThrough -0.9304905 -  SingletonE -3.04161 -  SingletonF 0.0714027 -  SingletonRule -0.889377 -  WordPenalty -1.99495 -  RulePenalty -0.1 -  LabellingShape -0.1 -  LHSProb -0.1 -  GenerativeProb -0.1 -); - - -# these features are included by default -my @DEFAULT_FEATS = qw( PassThrough Glue GlueTop LanguageModel WordPenalty ); - - - -my $FILTERBYF = "$SCRIPT_DIR/scripts/filter-by-f.pl"; -my $CDEC = "$SCRIPT_DIR/../../decoder/cdec"; -my $PARALLELIZE = "$SCRIPT_DIR/../../vest/parallelize.pl"; -my $EXTOOLS = "$SCRIPT_DIR/../../extools"; -die "Can't find extools: $EXTOOLS" unless -e $EXTOOLS && -d $EXTOOLS; -my $VEST = "$SCRIPT_DIR/../../vest"; -die "Can't find vest: $VEST" unless -e $VEST && -d $VEST; -my $DISTVEST = "$VEST/dist-vest.pl"; -my $FILTER = "$EXTOOLS/filter_grammar"; -my $FEATURIZE = "$EXTOOLS/featurize_grammar"; -assert_exec($CDEC, $PARALLELIZE, $FILTER, $FEATURIZE, $DISTVEST, $FILTERBYF); - -my $numtopics = 25; - -my $config = "$SCRIPT_DIR/" . (lc environment_name()) . '.config'; -print STDERR "CORPORA CONFIGURATION: $config\n"; -open CONF, "<$config" or die "Can't read $config: $!"; -my %paths; -my %corpora; -my %lms; -my %devs; -my %devrefs; -my %tests; -my %testevals; -my $datadir; -print STDERR "       LANGUAGE PAIRS:"; -while(<CONF>) { -  chomp; -  next if /^#/; -  next if /^\s*$/; -  s/^\s+//; -  s/\s+$//; -  if (! defined $datadir) { $datadir = $_; next; } -  my ($name, $path, $corpus, $lm, $dev, $devref, @xtests) = split /\s+/; -  $paths{$name} = $path; -  $corpora{$name} = $corpus; -  $lms{$name} = $lm; -  $devs{$name} = $dev; -  $devrefs{$name} = $devref; -  $tests{$name} = $xtests[0]; -  $testevals{$name} = $xtests[1]; -  print STDERR " $name"; -} -print STDERR "\n"; - -my %langpairs = map { $_ => 1 } qw( btec zhen fbis aren uren nlfr ); - -my $outdir = "$CWD/exp"; -my $help; -my $FEATURIZER_OPTS = ''; -my $dataDir = '/export/ws10smt/data'; -my @features; -my $bkoffgram; -my $gluegram; -my $oovgram; -my $usefork; -my $lmorder = 3; -my $density; -if (GetOptions( -        "backoff-grammar=s" => \$bkoffgram, -        "density-prune=f" => \$density, -        "glue-grammar=s" => \$gluegram, -        "oov-grammar=s" => \$oovgram, -        "data=s" => \$dataDir, -        "pmem=s" => \$PMEM, -        "n=i" => \$NUM_TRANSLATIONS, -        "features=s@" => \@features, -        "use-fork" => \$usefork, -        "jobs=i" => \$JOBS, -        "out-dir=s" => \$outdir, -        "lmorder=i" => \$lmorder, -        "goal=s" => \$GOAL, -) == 0 || @ARGV!=2 || $help) { -        print_help(); -        exit; -} -my $DENSITY_PRUNE = ''; -if ($density) { -  $DENSITY_PRUNE = "--density-prune $density"; -} -if ($usefork) { $usefork="--use-fork"; } else { $usefork = ''; } -my @fkeys = keys %$feat_map; -die "You must specify one or more features with -f. Known features: @fkeys\n" unless scalar @features > 0; -my @xfeats; -for my $feat (@features) { -  my $rs = $feat_map->{$feat}; -  if (!defined $rs) { die "DON'T KNOW ABOUT FEATURE $feat\n"; } -  my @xfs = @$rs; -  @xfeats = (@xfeats, @xfs); -  $FEATURIZER_OPTS .= " -f $feat" unless $feat eq "BackoffRule"; -} -print STDERR "X-FEATS: @xfeats\n"; - -my $lp = $ARGV[0]; -my $grammar = $ARGV[1]; -print STDERR "   CORPUS REPO: $dataDir\n"; -print STDERR " LANGUAGE PAIR: $lp\n"; -die "I don't know about that language pair\n" unless $paths{$lp}; -my $corpdir = "$dataDir"; -if ($paths{$lp} =~ /^\//) { $corpdir = $paths{$lp}; } else { $corpdir .= '/' . $paths{$lp}; } -die "I can't find the corpora directory: $corpdir" unless -d $corpdir; -print STDERR "       GRAMMAR: $grammar\n"; -my $LANG_MODEL = mydircat($corpdir, $lms{$lp}); -print STDERR "            LM: $LANG_MODEL\n"; -my $CORPUS = mydircat($corpdir, $corpora{$lp}); -die "Can't find corpus: $CORPUS" unless -f $CORPUS; - -my $dev = mydircat($corpdir, $devs{$lp}); -my $drefs = $devrefs{$lp}; -die "Can't find dev: $dev\n" unless -f $dev; -die "Dev refs not set" unless $drefs; -$drefs = mydircat($corpdir, $drefs); - -my $test = mydircat($corpdir, $tests{$lp}); -my $teval = mydircat($corpdir, $testevals{$lp}); -#die "Can't find test: $test\n" unless -f $test; -#assert_exec($teval); - -`mkdir -p $outdir`; - -# CREATE INIT WEIGHTS -print STDERR "\nCREATING INITIAL WEIGHTS FILE: weights.init\n"; -my $weights = mydircat($outdir, "weights.init"); -write_random_weights_file($weights, @xfeats); - -my $bkoff_grmr; -my $glue_grmr; -if($bkoffgram) { -    print STDERR "Placing backoff grammar…\n"; -    $bkoff_grmr = mydircat($outdir, "backoff.scfg.gz"); -    print STDERR "cp $bkoffgram $bkoff_grmr\n"; -    safesystem(undef,"cp $bkoffgram $bkoff_grmr"); -} -if($gluegram) { -    print STDERR "Placing glue grammar…\n"; -    $glue_grmr = mydircat($outdir, "glue.bo.scfg.gz"); -    print STDERR "cp $gluegram $glue_grmr\n"; -    safesystem(undef,"cp $gluegram $glue_grmr"); -} - -# MAKE DEV -print STDERR "\nFILTERING FOR dev...\n"; -print STDERR "DEV: $dev (REFS=$drefs)\n"; -my $devgrammar = filter($grammar, $dev, 'dev', $outdir); -my $devini = mydircat($outdir, "cdec-dev.ini"); -write_cdec_ini($devini, $devgrammar); - - -# MAKE TEST -print STDERR "\nFILTERING FOR test...\n"; -print STDERR "TEST: $test (EVAL=$teval)\n"; -`mkdir -p $outdir`; -my $testgrammar = filter($grammar, $test, 'test', $outdir); -my $testini = mydircat($outdir, "cdec-test.ini"); -write_cdec_ini($testini, $testgrammar); - - -# VEST -print STDERR "\nMINIMUM ERROR TRAINING\n"; -my $tuned_weights = mydircat($outdir, 'weights.tuned'); -if (-f $tuned_weights) { -  print STDERR "TUNED WEIGHTS $tuned_weights EXISTS: REUSING\n"; -} else { -  my $cmd = "$DISTVEST $usefork $DENSITY_PRUNE --decode-nodes $JOBS --pmem=$PMEM --ref-files=$drefs --source-file=$dev --weights $weights $devini"; -  print STDERR "MERT COMMAND: $cmd\n"; -  `rm -rf $outdir/vest 2> /dev/null`; -  chdir $outdir or die "Can't chdir to $outdir: $!"; -  $weights = `$cmd`; -  die "MERT reported non-zero exit code" unless $? == 0; -  chomp $weights; -  safesystem($tuned_weights, "cp $weights $tuned_weights"); -  print STDERR "TUNED WEIGHTS: $tuned_weights\n"; -  die "$tuned_weights is missing!" unless -f $tuned_weights; -} - -# DECODE -print STDERR "\nDECODE TEST SET\n"; -my $decolog = mydircat($outdir, "test-decode.log"); -my $testtrans = mydircat($outdir, "test.trans"); -my $cmd = "cat $test | $PARALLELIZE $usefork -j $JOBS -e $decolog -- $CDEC -c $testini -w $tuned_weights > $testtrans"; -safesystem($testtrans, $cmd) or die "Failed to decode test set!"; - - -# EVALUATE -print STDERR "\nEVALUATE TEST SET\n"; -print STDERR "TEST: $testtrans\n"; -$cmd = "$teval $testtrans"; -safesystem(undef, $cmd) or die "Failed to evaluate!"; -exit 0; - - -sub write_random_weights_file { -  my ($file, @extras) = @_; -  if (-f $file) { -    print STDERR "$file exists - REUSING!\n"; -    return; -  } -  open F, ">$file" or die "Can't write $file: $!"; -  my @feats = (@DEFAULT_FEATS, @extras); -  for my $feat (@feats) { -    my $r = rand(0.4) + 0.8; -    my $w = $init_weights{$feat} * $r; -    if ($w == 0) { $w = 0.0001; print STDERR "WARNING: $feat had no initial weight!\n"; } -    print F "$feat $w\n"; -  } -  close F; -} - -sub filter { -  my ($grammar, $set, $name, $outdir) = @_; -  my $out1 = mydircat($outdir, "$name.filt.gz"); -  my $out2 = mydircat($outdir, "$name.f_feat.gz"); -  my $outgrammar = mydircat($outdir, "$name.scfg.gz"); -  if (-f $outgrammar) { print STDERR "$outgrammar exists - REUSING!\n"; } else { -    my $cmd = "gunzip -c $grammar | $FILTER -t $set | gzip > $out1"; -    safesystem($out1, $cmd) or die "Filtering failed."; -    $cmd = "gunzip -c $out1 | $FEATURIZE $FEATURIZER_OPTS -g $out1 -c $CORPUS | gzip > $out2"; -    safesystem($out2, $cmd) or die "Featurizing failed"; -    $cmd = "$FILTERBYF $NUM_TRANSLATIONS $out2 $outgrammar"; -    safesystem($outgrammar, $cmd) or die "Secondary filtering failed"; -  } -  return $outgrammar; -}   - -sub mydircat { - my ($base, $suffix) = @_; - if ($suffix =~ /^\//) { return $suffix; } - my $res = $base . '/' . $suffix; - $res =~ s/\/\//\//g; - return $res; -} - -sub write_cdec_ini { -  my ($filename, $grammar_path) = (@_); -  open CDECINI, ">$filename" or die "Can't write $filename: $!"; -  my $glue = ($gluegram ? "$glue_grmr" : "$datadir/glue/glue.scfg.gz"); -  my $oov = ($oovgram ? "$oovgram" : "$datadir/oov.scfg.gz"); -  print CDECINI <<EOT; -formalism=scfg -cubepruning_pop_limit=100 -add_pass_through_rules=true -scfg_extra_glue_grammar=$glue -grammar=$oov -grammar=$grammar_path -scfg_default_nt=OOV -scfg_no_hiero_glue_grammar=true -feature_function=WordPenalty -feature_function=LanguageModel -o $lmorder $LANG_MODEL -goal=$GOAL -EOT -  print CDECINI "grammar=$bkoff_grmr\n" if $bkoffgram; -  close CDECINI; -}; - -sub print_help { -  print STDERR<<EOT; - -Usage: $0 [-c data-config-file] [-n N] language-pair grammar.bidir.gz [OPTIONS] - -Given an induced grammar for an entire corpus (i.e., generated by -local-gi-pipeline.pl), filter and featurize it for a dev and test set, -run MERT, report scores. Use -n to specify the number of translations -to keep for a given source (30 is default). - -EOT -} - -sub safesystem { -  my $output = shift @_; -  print STDERR "Executing: @_\n"; -  system(@_); -  if ($? == -1) { -      print STDERR "ERROR: Failed to execute: @_\n  $!\n"; -      if (defined $output && -e $output) { printf STDERR "Removing $output\n"; `rm -rf $output`; } -      exit(1); -  } -  elsif ($? & 127) { -      printf STDERR "ERROR: Execution of: @_\n  died with signal %d, %s coredump\n", -          ($? & 127),  ($? & 128) ? 'with' : 'without'; -      if (defined $output && -e $output) { printf STDERR "Removing $output\n"; `rm -rf $output`; } -      exit(1); -  } -  else { -    my $exitcode = $? >> 8; -    if ($exitcode) { -      print STDERR "Exit code: $exitcode\n"; -      if (defined $output && -e $output) { printf STDERR "Removing $output\n"; `rm -rf $output`; } -    } -    return ! $exitcode; -  } -} - -sub assert_exec { -  my @files = @_; -  for my $file (@files) { -    die "Can't find $file - did you run make?\n" unless -e $file; -    die "Can't execute $file" unless -e $file; -  } -}; - diff --git a/gi/pipeline/local-gi-pipeline.pl b/gi/pipeline/local-gi-pipeline.pl deleted file mode 100755 index e31167a2..00000000 --- a/gi/pipeline/local-gi-pipeline.pl +++ /dev/null @@ -1,465 +0,0 @@ -#!/usr/bin/perl -w -use strict; -use File::Copy; - -my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path cwd /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR; } - -use Getopt::Long "GetOptions"; - -my $GZIP = 'gzip'; -my $ZCAT = 'gunzip -c'; -my $SED = 'sed -e'; -my $BASE_PHRASE_MAX_SIZE = 10; -my $COMPLETE_CACHE = 1; -my $ITEMS_IN_MEMORY = 10000000;  # cache size in extractors -my $NUM_TOPICS = 50; -my $NUM_TOPICS_COARSE; -my $NUM_TOPICS_FINE = $NUM_TOPICS; -my $NUM_SAMPLES = 1000; -my $CONTEXT_SIZE = 1; -my $BIDIR = 0; -my $TOPICS_CONFIG = "pyp-topics.conf"; -my $LANGUAGE = "target"; -my $LABEL_THRESHOLD = "0"; -my $PRESERVE_PHRASES; - -my $MODEL = "pyp"; -my $NUM_ITERS = 100; -my $PR_SCALE_P = 0; -my $PR_SCALE_C = 0; -my $PR_FLAGS = ""; -my $MORFMARK = ""; - -my $EXTOOLS = "$SCRIPT_DIR/../../extools"; -die "Can't find extools: $EXTOOLS" unless -e $EXTOOLS && -d $EXTOOLS; -my $PYPTOOLS = "$SCRIPT_DIR/../pyp-topics/src"; -die "Can't find pyp-topics: $PYPTOOLS" unless -e $PYPTOOLS && -d $PYPTOOLS; -my $PYPSCRIPTS = "$SCRIPT_DIR/../pyp-topics/scripts"; -die "Can't find pyp-topics: $PYPSCRIPTS" unless -e $PYPSCRIPTS && -d $PYPSCRIPTS; -my $PRTOOLS = "$SCRIPT_DIR/../posterior-regularisation"; -die "Can't find posterior-regularisation: $PRTOOLS" unless -e $PRTOOLS && -d $PRTOOLS; -my $REDUCER = "$EXTOOLS/mr_stripe_rule_reduce"; -my $C2D = "$PYPSCRIPTS/contexts2documents.py"; -my $S2L = "$PYPSCRIPTS/spans2labels.py"; -my $SPLIT = "$SCRIPT_DIR/../posterior-regularisation/split-languages.py"; - -my $PREM_TRAIN="$PRTOOLS/prjava/train-PR-cluster.sh"; - -my $SORT_KEYS = "$SCRIPT_DIR/scripts/sort-by-key.sh"; -my $PATCH_CORPUS = "$SCRIPT_DIR/scripts/patch-corpus.pl"; -my $REMOVE_TAGS_CORPUS = "$SCRIPT_DIR/scripts/remove-tags-from-corpus.pl"; -my $REMOVE_TAGS_CONTEXT = "$SCRIPT_DIR/scripts/remove-tags-from-contexts.pl"; -my $EXTRACTOR = "$EXTOOLS/extractor"; -my $TOPIC_TRAIN = "$PYPTOOLS/pyp-contexts-train"; -my $MORF_DOC_FILTER = "$SCRIPT_DIR/../morf-segmentation/filter_docs.pl"; - -assert_exec($PATCH_CORPUS, $SORT_KEYS, $REDUCER, $EXTRACTOR, -            $S2L, $C2D, $TOPIC_TRAIN, $SPLIT, $REMOVE_TAGS_CONTEXT, $REMOVE_TAGS_CORPUS, $MORF_DOC_FILTER); - -my $BACKOFF_GRAMMAR; -my $DEFAULT_CAT; -my $HIER_CAT; -my %FREQ_HIER = (); -my $TAGGED_CORPUS; - -my $NAME_SHORTCUT; - -my $OUTPUT = './giwork'; -usage() unless &GetOptions('base_phrase_max_size=i' => \$BASE_PHRASE_MAX_SIZE, -                           'backoff_grammar' => \$BACKOFF_GRAMMAR, -                           'output=s' => \$OUTPUT, -                           'model=s' => \$MODEL, -                           'topics=i' => \$NUM_TOPICS_FINE, -                           'coarse_topics=i' => \$NUM_TOPICS_COARSE, -                           'trg_context=i' => \$CONTEXT_SIZE, -                           'samples=i' => \$NUM_SAMPLES, -                           'label_threshold=f' => \$LABEL_THRESHOLD, -                           'use_default_cat' => \$DEFAULT_CAT, -                           'topics-config=s' => \$TOPICS_CONFIG, -                           'iterations=i' => \$NUM_ITERS, -                           'pr-scale-phrase=f' => \$PR_SCALE_P, -                           'pr-scale-context=f' => \$PR_SCALE_C, -                           'pr-flags=s' => \$PR_FLAGS, -                           'tagged_corpus=s' => \$TAGGED_CORPUS, -                           'language=s' => \$LANGUAGE, -                           'get_name_only' => \$NAME_SHORTCUT, -                           'preserve_phrases' => \$PRESERVE_PHRASES, -                           'morf=s' => \$MORFMARK, -                          ); -if ($NAME_SHORTCUT) { -  $NUM_TOPICS = $NUM_TOPICS_FINE; -  print STDERR labeled_dir(); -  exit 0; -} -usage() unless scalar @ARGV == 1; -my $CORPUS = $ARGV[0]; -open F, "<$CORPUS" or die "Can't read $CORPUS: $!"; close F; - -$NUM_TOPICS = $NUM_TOPICS_FINE; - -$HIER_CAT = ( $NUM_TOPICS_COARSE ? 1 : 0 ); - -print STDERR "   Output: $OUTPUT\n"; -my $DATA_DIR = $OUTPUT . '/corpora'; -my $LEX_NAME = "corpus.f_e_a.$LANGUAGE.lex"; -my $CORPUS_LEX = $DATA_DIR . '/' . $LEX_NAME;  # corpus used to extract rules -my $CORPUS_CLUSTER = $DATA_DIR . "/corpus.f_e_a.$LANGUAGE.cluster"; # corpus used for clustering (often identical) - -my $CONTEXT_DIR = $OUTPUT . '/' . context_dir(); -my $CLUSTER_DIR = $OUTPUT . '/' . cluster_dir(); -my $LABELED_DIR = $OUTPUT . '/' . labeled_dir(); -my $CLUSTER_DIR_C; -my $CLUSTER_DIR_F; -my $LABELED_DIR_C; -my $LABELED_DIR_F; -if($HIER_CAT) { -    $CLUSTER_DIR_F = $CLUSTER_DIR; -    $LABELED_DIR_F = $LABELED_DIR; -    $NUM_TOPICS = $NUM_TOPICS_COARSE; -    $CLUSTER_DIR_C = $OUTPUT . '/' . cluster_dir(); -    $LABELED_DIR_C = $OUTPUT . '/' . labeled_dir(); -    $NUM_TOPICS = $NUM_TOPICS_FINE; -} -my $GRAMMAR_DIR = $OUTPUT . '/' . grammar_dir(); -print STDERR "  Context: $CONTEXT_DIR\n  Cluster: $CLUSTER_DIR\n  Labeled: $LABELED_DIR\n  Grammar: $GRAMMAR_DIR\n"; -safemkdir($OUTPUT) or die "Couldn't create output directory $OUTPUT: $!"; -safemkdir($DATA_DIR) or die "Couldn't create output directory $DATA_DIR: $!"; -safemkdir($CONTEXT_DIR) or die "Couldn't create output directory $CONTEXT_DIR: $!"; -safemkdir($CLUSTER_DIR) or die "Couldn't create output directory $CLUSTER_DIR: $!"; -if($HIER_CAT) { -    safemkdir($CLUSTER_DIR_C) or die "Couldn't create output directory $CLUSTER_DIR_C: $!"; -    safemkdir($LABELED_DIR_C) or die "Couldn't create output directory $LABELED_DIR_C: $!"; -} -safemkdir($LABELED_DIR) or die "Couldn't create output directory $LABELED_DIR: $!"; -safemkdir($GRAMMAR_DIR) or die "Couldn't create output directory $GRAMMAR_DIR: $!"; -if(-e $TOPICS_CONFIG) { -    copy($TOPICS_CONFIG, $CLUSTER_DIR) or die "Copy failed: $!"; -} - -setup_data(); - -if (lc($MODEL) eq "blagree") { -    extract_bilingual_context(); -} else { -    extract_context(); -} - -if (lc($MODEL) eq "pyp") { -    if($HIER_CAT) { -        $NUM_TOPICS = $NUM_TOPICS_COARSE; -        $CLUSTER_DIR = $CLUSTER_DIR_C; -        topic_train(); -        $NUM_TOPICS = $NUM_TOPICS_FINE; -        $CLUSTER_DIR = $CLUSTER_DIR_F; -        topic_train(); -    } else { -        topic_train(); -    } -} elsif (lc($MODEL) =~ /pr|em|agree/) { -    prem_train(); -} else { die "Unsupported model type: $MODEL. Must be one of PYP or PREM.\n"; } -if($HIER_CAT) { -    $NUM_TOPICS = $NUM_TOPICS_COARSE; -    $CLUSTER_DIR = $CLUSTER_DIR_C; -    $LABELED_DIR = $LABELED_DIR_C; -    label_spans_with_topics(); -    $NUM_TOPICS = $NUM_TOPICS_FINE; -    $CLUSTER_DIR = $CLUSTER_DIR_F; -    $LABELED_DIR = $LABELED_DIR_F; -    label_spans_with_topics(); -    extract_freqs(); -} else { -    label_spans_with_topics(); -} -my $res; -if ($BIDIR) { -  $res = grammar_extract_bidir(); -} else { -  $res = grammar_extract(); -} -print STDERR "\n!!!COMPLETE!!!\n"; -print STDERR "GRAMMAR: $res\nYou should probably run: $SCRIPT_DIR/evaluation-pipeline.pl LANGPAIR giwork/ct1s0.L10.PYP.t4.s20.grammar/grammar.gz -f FEAT1 -f FEAT2\n\n"; -exit 0; - -sub setup_data { -  print STDERR "\n!!!PREPARE CORPORA!!!\n"; -  if (-f $CORPUS_LEX && $CORPUS_CLUSTER) { -    print STDERR "$CORPUS_LEX and $CORPUS_CLUSTER exist, reusing...\n"; -    return; -  } -  copy($CORPUS, $CORPUS_LEX); -  if ($TAGGED_CORPUS) { -    die "Can't find $TAGGED_CORPUS" unless -f $TAGGED_CORPUS; -    my $opt=""; -    $opt = "-s" if ($LANGUAGE eq "source"); -    $opt = $opt . " -a" if ($PRESERVE_PHRASES); -    my $cmd="$PATCH_CORPUS $opt $TAGGED_CORPUS $CORPUS_LEX > $CORPUS_CLUSTER"; -    safesystem($cmd) or die "Failed to extract contexts."; -  } else { -    symlink($LEX_NAME, $CORPUS_CLUSTER); -  } -} - -sub context_dir { -  return "ct${CONTEXT_SIZE}s0.L$BASE_PHRASE_MAX_SIZE.l$LANGUAGE"; -} - -sub cluster_dir { -    if (lc($MODEL) eq "pyp") { -        return context_dir() . ".PYP.t$NUM_TOPICS.s$NUM_SAMPLES"; -    } elsif (lc($MODEL) eq "em") { -        return context_dir() . ".EM.t$NUM_TOPICS.i$NUM_ITERS"; -    } elsif (lc($MODEL) eq "pr") { -        return context_dir() . ".PR.t$NUM_TOPICS.i$NUM_ITERS.sp$PR_SCALE_P.sc$PR_SCALE_C"; -    } elsif (lc($MODEL) eq "agree") { -        return context_dir() . ".AGREE.t$NUM_TOPICS.i$NUM_ITERS"; -    } elsif (lc($MODEL) eq "blagree") { -        return context_dir() . ".BLAGREE.t$NUM_TOPICS.i$NUM_ITERS"; -    } -} - -sub labeled_dir { -  if (lc($MODEL) eq "pyp" && $LABEL_THRESHOLD ne "0") { -    return cluster_dir() . "_lt$LABEL_THRESHOLD"; -  } else { -    return cluster_dir(); -  } -} - -sub grammar_dir { -  # TODO add grammar config options -- adjacent NTs, etc -  if($HIER_CAT) { -    return cluster_dir() . ".hier$NUM_TOPICS_COARSE-$NUM_TOPICS_FINE.grammar"; -  } else { -    return labeled_dir() . ".grammar"; -  } -} - - - -sub safemkdir { -  my $dir = shift; -  if (-d $dir) { return 1; } -  return mkdir($dir); -} - -sub usage { -  print <<EOT; - -Usage: $0 [OPTIONS] corpus.fr-en-al - -Induces a grammar using Pitman-Yor topic modeling or Posterior Regularisation. - -EOT -  exit 1; -}; - -sub assert_exec { -  my @files = @_; -  for my $file (@files) { -    die "Can't find $file - did you run make?\n" unless -e $file; -    die "Can't execute $file" unless -e $file; -  } -}; - -sub extract_context { - print STDERR "\n!!!CONTEXT EXTRACTION\n";  - my $OUT_CONTEXTS = "$CONTEXT_DIR/context.txt.gz"; - if (-e $OUT_CONTEXTS) { -   print STDERR "$OUT_CONTEXTS exists, reusing...\n"; - } else { -   my $ccopt = "-c $ITEMS_IN_MEMORY"; -   my $postsort = "| $REDUCER "; -   if ($COMPLETE_CACHE) { -     print STDERR "COMPLETE_CACHE is set: removing memory limits on cache.\n"; -     $ccopt = "-c 0"; -     $postsort = "" unless ($PRESERVE_PHRASES); -   } - -   my $presort = ($PRESERVE_PHRASES ? "| $REMOVE_TAGS_CONTEXT --phrase=tok --context=tag " : ""); - -   if ($MORFMARK ne "") {  -     $presort = $presort . "| $MORF_DOC_FILTER \"$MORFMARK\" ";  -   } - -   my $cmd = "$EXTRACTOR -i $CORPUS_CLUSTER $ccopt -L $BASE_PHRASE_MAX_SIZE -C -S $CONTEXT_SIZE --phrase_language $LANGUAGE --context_language $LANGUAGE $presort | $SORT_KEYS $postsort | $GZIP > $OUT_CONTEXTS"; -   safesystem($cmd) or die "Failed to extract contexts."; -  } -} - -sub extract_bilingual_context { - print STDERR "\n!!!CONTEXT EXTRACTION\n";  - my $OUT_SRC_CONTEXTS = "$CONTEXT_DIR/context.source"; - my $OUT_TGT_CONTEXTS = "$CONTEXT_DIR/context.target"; - - if (-e $OUT_SRC_CONTEXTS . ".gz" and -e $OUT_TGT_CONTEXTS . ".gz") { -   print STDERR "$OUT_SRC_CONTEXTS.gz and $OUT_TGT_CONTEXTS.gz exist, reusing...\n"; - } else { -   my $OUT_BI_CONTEXTS = "$CONTEXT_DIR/context.bilingual.txt.gz"; -   my $cmd = "$EXTRACTOR -i $CORPUS_CLUSTER -c $ITEMS_IN_MEMORY -L $BASE_PHRASE_MAX_SIZE -C -S $CONTEXT_SIZE --phrase_language both --context_language both | $SORT_KEYS | $REDUCER | $GZIP > $OUT_BI_CONTEXTS"; -   if ($COMPLETE_CACHE) { -     print STDERR "COMPLETE_CACHE is set: removing memory limits on cache.\n"; -     $cmd = "$EXTRACTOR -i $CORPUS_CLUSTER -c 0 -L $BASE_PHRASE_MAX_SIZE -C -S $CONTEXT_SIZE  --phrase_language both --context_language both  | $SORT_KEYS | $GZIP > $OUT_BI_CONTEXTS"; -   } -   safesystem($cmd) or die "Failed to extract contexts."; - -   safesystem("$ZCAT $OUT_BI_CONTEXTS | $SPLIT $OUT_SRC_CONTEXTS $OUT_TGT_CONTEXTS") or die "Failed to split contexts.\n"; -   safesystem("$GZIP -f $OUT_SRC_CONTEXTS") or die "Failed to zip output contexts.\n"; -   safesystem("$GZIP -f $OUT_TGT_CONTEXTS") or die "Failed to zip output contexts.\n"; - } -} - - -sub topic_train { -  print STDERR "\n!!!TRAIN PYP TOPICS\n"; -  my $IN_CONTEXTS = "$CONTEXT_DIR/context.txt.gz"; -  my $OUT_CLUSTERS = "$CLUSTER_DIR/docs.txt.gz"; -  if (-e $OUT_CLUSTERS) { -    print STDERR "$OUT_CLUSTERS exists, reusing...\n"; -  } else { -    safesystem("$TOPIC_TRAIN --data $IN_CONTEXTS --backoff-type simple -t $NUM_TOPICS -s $NUM_SAMPLES -o $OUT_CLUSTERS -c $TOPICS_CONFIG -w /dev/null") or die "Topic training failed.\n"; -  } -} - -sub prem_train { -  print STDERR "\n!!!TRAIN PR/EM model\n"; -  my $OUT_CLUSTERS = "$CLUSTER_DIR/docs.txt.gz"; -  if (-e $OUT_CLUSTERS) { -    print STDERR "$OUT_CLUSTERS exists, reusing...\n"; -  } else { -    my $in = "--in $CONTEXT_DIR/context.txt.gz"; -    my $opts = ""; -    if (lc($MODEL) eq "pr") { -        $opts = "--scale-phrase $PR_SCALE_P --scale-context $PR_SCALE_C"; -    } elsif (lc($MODEL) eq "agree") { -        $opts = "--agree-direction"; -    } elsif (lc($MODEL) eq "blagree") { -        $in = "--in $CONTEXT_DIR/context.source.gz --in1 $CONTEXT_DIR/context.target.gz"; -        $opts = "--agree-language"; -    } -    safesystem("$PREM_TRAIN $in --topics $NUM_TOPICS --out $OUT_CLUSTERS --iterations $NUM_ITERS $opts $PR_FLAGS") or die "Topic training failed.\n"; -  } -} - -sub label_spans_with_topics { -  my ($file) = (@_); -  print STDERR "\n!!!LABEL SPANS\n"; -  my $IN_CLUSTERS = "$CLUSTER_DIR/docs.txt.gz"; -  my $OUT_SPANS = "$LABELED_DIR/labeled_spans.txt"; -  if (-e $OUT_SPANS) { -    print STDERR "$OUT_SPANS exists, reusing...\n"; -  } else { -    my $extra = "tt"; -    if ($LANGUAGE eq "source") { -        $extra = "ss"; -    } elsif ($LANGUAGE eq "both") { -        $extra = "bb"; -    } else { die "Invalid language specifier $LANGUAGE\n" unless $LANGUAGE eq "target" }; -    $extra = $extra . " tok,tag" if ($PRESERVE_PHRASES); -    safesystem("$ZCAT $IN_CLUSTERS > $CLUSTER_DIR/clusters.txt") or die "Failed to unzip"; -    safesystem("$EXTRACTOR --base_phrase_spans -i $CORPUS_CLUSTER -c $ITEMS_IN_MEMORY -L $BASE_PHRASE_MAX_SIZE -S $CONTEXT_SIZE | $S2L $CLUSTER_DIR/clusters.txt $CONTEXT_SIZE $LABEL_THRESHOLD $extra > $OUT_SPANS") or die "Failed to label spans"; -    unlink("$CLUSTER_DIR/clusters.txt") or warn "Failed to remove $CLUSTER_DIR/clusters.txt"; -    safesystem("paste -d ' ' $CORPUS_LEX $OUT_SPANS | sed 's/ *||| *\$//'  > $LABELED_DIR/corpus.src_trg_al_label") or die "Couldn't paste"; -  } -} - -sub extract_freqs { -    print STDERR "\n!!!EXTRACTING FREQUENCIES\n"; -    my $IN_COARSE = "$LABELED_DIR_C/labeled_spans.txt"; -    my $IN_FINE = "$LABELED_DIR_F/labeled_spans.txt"; -    my $OUT_SPANS = "$LABELED_DIR_F/labeled_spans.hier$NUM_TOPICS_COARSE-$NUM_TOPICS_FINE.txt"; -    my $FREQS = "$LABELED_DIR_F/label_freq.hier$NUM_TOPICS_COARSE-$NUM_TOPICS_FINE.txt"; -    my $COARSE_EXPR = "\'s/\\(X[0-9][0-9]*\\)/\\1c/g\'"; #' -    my $FINE_EXPR = "\'s/\\(X[0-9][0-9]*\\)/\\1f/g\'"; #' -    my %finehier = (); -    if (-e $OUT_SPANS) { -        print STDERR "$OUT_SPANS exists, reusing...\n"; -    } else { -        safesystem("paste -d ' ' $IN_COARSE $IN_FINE > $OUT_SPANS"); -    } -    open SPANS, $OUT_SPANS or die $!; -    while (<SPANS>) { -        my ($tmp, $coarse, $fine) = split /\|\|\|/; -        my @coarse_spans = $coarse =~ /\d+-\d+:X(\d+)/g; -        my @fine_spans = $fine =~ /\d+-\d+:X(\d+)/g; -         -        foreach my $i (0..(scalar @coarse_spans)-1) { -            my $coarse_cat = $coarse_spans[$i]; -            my $fine_cat = $fine_spans[$i]; -             -            $FREQ_HIER{$coarse_cat}{$fine_cat}++; -        } -    } -    close SPANS; -    foreach (values %FREQ_HIER) { -        my $coarse_freq = $_; -        my $total = 0; -        $total+=$_ for (values %{ $coarse_freq }); -        $coarse_freq->{$_}=log($coarse_freq->{$_}/$total) for (keys %{ $coarse_freq }); -    } -    open FREQS, ">", $FREQS or die $!; -    foreach my $coarse_cat (keys %FREQ_HIER) { -        print FREQS "$coarse_cat |||"; -        foreach my $fine_cat (keys %{$FREQ_HIER{$coarse_cat}}) { -            my $res = $FREQ_HIER{$coarse_cat}{$fine_cat}; -            print FREQS " $fine_cat:$res"; -            if(! exists $finehier{$fine_cat} || $finehier{$fine_cat} < $res) { -               $finehier{$fine_cat} = $coarse_cat; -            }   -        } -        print FREQS "\n"; -    } -#    foreach my $fine_cat (keys %finehier) { -#        print FREQS "$fine_cat -> $finehier{$fine_cat}\n"; -#    } -    close FREQS; -    $CLUSTER_DIR = $CLUSTER_DIR_F; -} - -sub grammar_extract { -  my $LABELED = "$LABELED_DIR/corpus.src_trg_al_label"; -  print STDERR "\n!!!EXTRACTING GRAMMAR\n"; -  my $OUTGRAMMAR = "$GRAMMAR_DIR/grammar.gz"; -  if (-e $OUTGRAMMAR) { -    print STDERR "$OUTGRAMMAR exists, reusing...\n"; -  } else { -    my $BACKOFF_ARG = ($BACKOFF_GRAMMAR ? "-g" : ""); -    my $DEFAULT_CAT_ARG = ($DEFAULT_CAT ? "-d X" : ""); -    safesystem("$EXTRACTOR -i $LABELED -c $ITEMS_IN_MEMORY -L $BASE_PHRASE_MAX_SIZE -t $NUM_TOPICS $BACKOFF_ARG $DEFAULT_CAT_ARG | $SORT_KEYS | $REDUCER -p | $GZIP > $OUTGRAMMAR") or die "Couldn't extract grammar"; -  } -  return $OUTGRAMMAR; -} - -sub grammar_extract_bidir { -#gzcat ex.output.gz | ./mr_stripe_rule_reduce -p -b | sort -t $'\t' -k 1 | ./mr_stripe_rule_reduce | gzip > phrase-table.gz -  my $LABELED = "$LABELED_DIR/corpus.src_trg_al_label"; -  print STDERR "\n!!!EXTRACTING GRAMMAR\n"; -  my $OUTGRAMMAR = "$GRAMMAR_DIR/grammar.bidir.gz"; -  if (-e $OUTGRAMMAR) { -    print STDERR "$OUTGRAMMAR exists, reusing...\n"; -  } else { -    my $BACKOFF_ARG = ($BACKOFF_GRAMMAR ? "-g" : ""); -    safesystem("$EXTRACTOR -i $LABELED -c $ITEMS_IN_MEMORY -L $BASE_PHRASE_MAX_SIZE -b -t $NUM_TOPICS $BACKOFF_ARG | $SORT_KEYS | $REDUCER -p -b | $SORT_KEYS | $REDUCER | $GZIP > $OUTGRAMMAR") or die "Couldn't extract grammar"; -  } -  return $OUTGRAMMAR; -} - -sub safesystem { -  print STDERR "Executing: @_\n"; -  system(@_); -  if ($? == -1) { -      print STDERR "ERROR: Failed to execute: @_\n  $!\n"; -      exit(1); -  } -  elsif ($? & 127) { -      printf STDERR "ERROR: Execution of: @_\n  died with signal %d, %s coredump\n", -          ($? & 127),  ($? & 128) ? 'with' : 'without'; -      exit(1); -  } -  else { -    my $exitcode = $? >> 8; -    print STDERR "Exit code: $exitcode\n" if $exitcode; -    return ! $exitcode; -  } -} - diff --git a/gi/pipeline/lticluster.config b/gi/pipeline/lticluster.config deleted file mode 100644 index 3e23c8cb..00000000 --- a/gi/pipeline/lticluster.config +++ /dev/null @@ -1,9 +0,0 @@ -# THIS FILE GIVES THE LOCATIONS OF THE CORPORA USED -# name path aligned-corpus LM dev dev-refs test1 testt-eval.sh ... -/home/cdyer/ws10smt-data -btec /home/cdyer/ws10smt-data/btec/ split.zh-en.al lm/en.3gram.lm.gz devtest/devset1_2.zh devtest/devset1_2.lc.en* devtest/devset3.zh eval-devset3.sh -zhen /home/cdyer/ws10smt-data/chinese-english corpus.zh-en.al lm/c2e.3gram.lm.gz dev_and_test/mt02.src.txt dev_and_test/mt02.ref.* dev_and_test/mt03.src.txt eval-mt03.sh -aren /home/cdyer/ws10smt-data/arabic-english corpus.ar-en-al lm/a2e.3gram.lm.gz dev_and_test/dev.src.txt dev_and_test/dev.ref.txt.* dev_and_test/mt05.src.txt eval-mt05.sh -uren /home/cdyer/ws10smt-data/urdu-english corpus.ur-en.al lm/u2e.en.lm.gz dev/dev.ur dev/dev.en* devtest/devtest.ur eval-devtest.sh -nlfr /home/cdyer/ws10smt-data/dutch-french corpus.nl-fr.al - diff --git a/gi/pipeline/scripts/filter-by-f.pl b/gi/pipeline/scripts/filter-by-f.pl deleted file mode 100755 index 0cef0606..00000000 --- a/gi/pipeline/scripts/filter-by-f.pl +++ /dev/null @@ -1,56 +0,0 @@ -#!/usr/bin/perl -w -use strict; - -my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR; } - -my $REKEY="$SCRIPT_DIR/rekey.pl"; -my $REFILTER="$SCRIPT_DIR/refilter.pl"; -my $SORT="$SCRIPT_DIR/sort-by-key.sh"; -assert_exec($REKEY, $REFILTER, $SORT); - - -die "Usage: $0 NUM-TRANSLATIONS ingrammar.gz outgrammar.gz\n" unless scalar @ARGV == 3; -my $translations = shift @ARGV; -die "Need number: $translations" unless $translations > 0; -die unless $ARGV[0] =~ /\.gz$/; -die unless $ARGV[1] =~ /\.gz$/; -die if $ARGV[0] eq $ARGV[1]; -die "Can't find $ARGV[0]" unless -f $ARGV[0]; - -my $cmd = "gunzip -c $ARGV[0] | $REKEY | $SORT | $REFILTER $translations | gzip > $ARGV[1]"; -safesystem($ARGV[1], $cmd) or die "Filtering failed"; -exit 0; - -sub assert_exec { -  my @files = @_; -  for my $file (@files) { -    die "Can't find $file - did you run make?\n" unless -e $file; -    die "Can't execute $file" unless -e $file; -  } -}; - -sub safesystem { -  my $output = shift @_; -  print STDERR "Executing: @_\n"; -  system(@_); -  if ($? == -1) { -      print STDERR "ERROR: Failed to execute: @_\n  $!\n"; -      if (defined $output && -e $output) { printf STDERR "Removing $output\n"; `rm -rf $output`; } -      exit(1); -  } -  elsif ($? & 127) { -      printf STDERR "ERROR: Execution of: @_\n  died with signal %d, %s coredump\n", -          ($? & 127),  ($? & 128) ? 'with' : 'without'; -      if (defined $output && -e $output) { printf STDERR "Removing $output\n"; `rm -rf $output`; } -      exit(1); -  } -  else { -    my $exitcode = $? >> 8; -    if ($exitcode) { -      print STDERR "Exit code: $exitcode\n"; -      if (defined $output && -e $output) { printf STDERR "Removing $output\n"; `rm -rf $output`; } -    } -    return ! $exitcode; -  } -} - diff --git a/gi/pipeline/scripts/patch-corpus.pl b/gi/pipeline/scripts/patch-corpus.pl deleted file mode 100755 index c0eec43e..00000000 --- a/gi/pipeline/scripts/patch-corpus.pl +++ /dev/null @@ -1,65 +0,0 @@ -#!/usr/bin/perl -w -use strict; - -my $PATCH = shift @ARGV; -my $TGT = 1; -my $APPEND; -while ($PATCH eq "-s" || $PATCH eq "-a") { -    if ($PATCH eq "-s") { -        undef $TGT; -    } else { -        $APPEND = 1; -    } -    $PATCH = shift @ARGV; -} - -die "Usage: $0 [-s] [-a] tagged.en[_fr] < lexical.en_fr_al[_...]\n" unless $PATCH; - -open P, "<$PATCH" or die "Can't read tagged corpus $PATCH: $!"; -my $first=<P>; close P; -my @fields = split / \|\|\| /, $first; -die "Bad format!" if (scalar @fields > 2); - -if (scalar @fields != 1) { -  # TODO support this -  die "Patching source and target not supported yet!"; -} - -my $line = 0; -open P, "<$PATCH" or die "Can't read tagged corpus $PATCH: $!"; -while(my $pline = <P>) { -  chomp $pline; -  $line++; -  my $line = <>; -  die "Too few lines in lexical corpus!" unless $line; -  chomp $line; -  @fields = split / \|\|\| /, $line; -  my @pwords = split /\s+/, $pline; -  if ($TGT) { -      my @lwords = split /\s+/, $fields[1]; -      die "Length mismatch in line $line!\n" unless (scalar @pwords == scalar @lwords); -      if ($APPEND) { -          foreach my $i (0..(scalar @pwords-1)) { -              $lwords[$i] = $lwords[$i] . '_' . $pwords[$i]; -          } -          $fields[1] = join ' ', @lwords; -      } else { -          $fields[1] = $pline; -      } -  } else { # source side -      my @lwords = split /\s+/, $fields[0]; -      die "Length mismatch in line $line!\n" unless (scalar @pwords == scalar @lwords); -      if ($APPEND) { -          foreach my $i (0..(scalar @pwords-1)) { -              $lwords[$i] = $lwords[$i] . '_' . $pwords[$i]; -          } -          $fields[0] = join ' ', @lwords; -      } else { -          $fields[0] = $pline; -      } -  } -  print join ' ||| ', @fields; -  print "\n"; -} - - diff --git a/gi/pipeline/scripts/refilter.pl b/gi/pipeline/scripts/refilter.pl deleted file mode 100755 index a783eb4e..00000000 --- a/gi/pipeline/scripts/refilter.pl +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/perl -w -use strict; - -my $NUM_TRANSLATIONS = shift @ARGV; -unless ($NUM_TRANSLATIONS) { $NUM_TRANSLATIONS=30; } -print STDERR "KEEPING $NUM_TRANSLATIONS TRANSLATIONS FOR SOURCE\n"; - -my $pk = ''; -my %dict; -while(<>) { -  s/^(.+)\t//; -  my $key = $1; -  if ($key ne $pk) { -    if ($pk) { -      emit_dict(); -    } -    %dict = (); -    $pk = $key; -  } -  my ($lhs, $f, $e, $s) = split / \|\|\| /; -  my $score = 0; -  if ($s =~ /XEF=([^ ]+)/) { -    $score += $1; -  } else { die; } -  if ($s =~ /GenerativeProb=([^ ]+)/) { -    $score += ($1 / 10); -  } else { die; } -  $dict{"$lhs ||| $f ||| $e ||| $s"} = $score; -} -emit_dict(); - -sub emit_dict { -  my $cc = 0; -  for my $k (sort { $dict{$a} <=> $dict{$b} } keys %dict) { -    print "$k"; -    $cc++; -    if ($cc >= $NUM_TRANSLATIONS) { last; } -  } -} - diff --git a/gi/pipeline/scripts/rekey.pl b/gi/pipeline/scripts/rekey.pl deleted file mode 100755 index 31eb86b8..00000000 --- a/gi/pipeline/scripts/rekey.pl +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/perl - -while(<>) { -  my ($lhs, $f, $e, $s) = split / \|\|\| /; -  $f =~ s/\[X[0-9]+\]/\[X\]/g; -  print "$f\t$_"; -} - diff --git a/gi/pipeline/scripts/remove-tags-from-contexts.pl b/gi/pipeline/scripts/remove-tags-from-contexts.pl deleted file mode 100755 index 20698816..00000000 --- a/gi/pipeline/scripts/remove-tags-from-contexts.pl +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/perl -w -use strict; - -use Getopt::Long "GetOptions"; - -my $PHRASE = 'tok'; -my $CONTEXT = 'tag'; - -die "Usage: $0 [--phrase=tok|tag] [--context=tok|tag] < corpus"  -    unless &GetOptions('phrase=s' => \$PHRASE, 'context=s' => \$CONTEXT); - -my $lno = 0; -while(my $line = <>) { -    $lno++; -    chomp $line; -    my @top = split /\t/, $line; -    die unless (scalar @top == 2);  - -    my @pwords = split /\s+/, $top[0]; -    foreach my $token (@pwords) { -        #print $token . "\n"; -        my @parts = split /_(?!.*_)/, $token; -        die unless (scalar @parts == 2);  -        if ($PHRASE eq "tok") { -            $token = $parts[0] -        } elsif ($PHRASE eq "tag") { -            $token = $parts[1] -        } -    } - -    my @fields = split / \|\|\| /, $top[1]; -    foreach my $i (0..((scalar @fields) / 2 - 1)) { -        #print $i . ": " . $fields[2*$i] . " of " . (scalar @fields) . "\n"; -        my @cwords = split /\s+/, $fields[2*$i]; -        foreach my $token (@cwords) { -            #print $i . ": " . $token . "\n"; -            my @parts = split /_(?!.*_)/, $token; -            if (scalar @parts == 2) { -                if ($CONTEXT eq "tok") { -                    $token = $parts[0] -                } elsif ($CONTEXT eq "tag") { -                    $token = $parts[1] -                } -            } -        } -        $fields[2*$i] = join ' ', @cwords; -    } - -    print join ' ', @pwords; -    print "\t"; -    print join ' ||| ', @fields; -    print "\n"; -} diff --git a/gi/pipeline/scripts/remove-tags-from-corpus.pl b/gi/pipeline/scripts/remove-tags-from-corpus.pl deleted file mode 100755 index be3e97c0..00000000 --- a/gi/pipeline/scripts/remove-tags-from-corpus.pl +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/perl -w -use strict; - -use Getopt::Long "GetOptions"; - -my $LANGUAGE = shift @ARGV; -$LANGUAGE = 'target' unless ($LANGUAGE); - -my $lno = 0; -while(my $line = <>) { -    $lno++; -    chomp $line; - -    my @fields = split / \|\|\| /, $line; - -    if ($LANGUAGE eq "source" or $LANGUAGE eq "both") { -        my @cwords = split /\s+/, $fields[0]; -        foreach my $token (@cwords) { -            my @parts = split /_(?!.*_)/, $token; -            if (scalar @parts == 2) { -                $token = $parts[0] -            } else { -                print STDERR "WARNING: invalid tagged token $token\n"; -            } -        } -        $fields[0] = join ' ', @cwords; -    } - -    if ($LANGUAGE eq "target" or $LANGUAGE eq "both") { -        my @cwords = split /\s+/, $fields[1]; -        foreach my $token (@cwords) { -            my @parts = split /_(?!.*_)/, $token; -            if (scalar @parts == 2) { -                $token = $parts[1] -            } else { -                print STDERR "WARNING: invalid tagged token $token\n"; -            } -        } -        $fields[0] = join ' ', @cwords; -    } - -    print join ' ||| ', @fields; -    print "\n"; -} diff --git a/gi/pipeline/scripts/sort-by-key.sh b/gi/pipeline/scripts/sort-by-key.sh deleted file mode 100755 index 7ae33e03..00000000 --- a/gi/pipeline/scripts/sort-by-key.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash - -export LANG=C -sort -t $'\t' -k 1 -T /tmp -S 6000000000 - diff --git a/gi/pipeline/scripts/xfeats.pl b/gi/pipeline/scripts/xfeats.pl deleted file mode 100755 index dc578513..00000000 --- a/gi/pipeline/scripts/xfeats.pl +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/perl -w -use strict; - -die "Usage: $0 x-grammar.scfg[.gz] < cat-grammar.scfg\n" unless scalar @ARGV > 0; - -my $xgrammar = shift @ARGV; -die "Can't find $xgrammar" unless -f $xgrammar; -my $fh; -if ($xgrammar =~ /\.gz$/) { -  open $fh, "gunzip -c $xgrammar|" or die "Can't fork: $!"; -} else { -  open $fh, "<$xgrammar" or die "Can't read $xgrammar: $!"; -} -print STDERR "Reading X-feats from $xgrammar...\n"; -my %dict; -while(<$fh>) { -  chomp; -  my ($lhs, $f, $e, $feats) = split / \|\|\| /; -  my $xfeats; -  my $cc = 0; -  my @xfeats = (); -  while ($feats =~ /(EGivenF|FGivenE|LogRuleCount|LogECount|LogFCount|SingletonRule|SingletonE|SingletonF)=([^ ]+)( |$)/og) { -    push @xfeats, "X_$1=$2"; -  } -  #print "$lhs ||| $f ||| $e ||| @xfeats\n"; -  $dict{"$lhs ||| $f ||| $e"} = "@xfeats"; -} -close $fh; - -print STDERR "Add features...\n"; -while(<>) { -  chomp; -  my ($lhs, $f, $e) = split / \|\|\| /; -  $f=~ s/\[[^]]+,([12])\]/\[X,$1\]/g; -  my $xfeats = $dict{"[X] ||| $f ||| $e"}; -  die "Can't find x features for: $_\n" unless $xfeats; -  print "$_ $xfeats\n"; -} - diff --git a/gi/pipeline/valhalla.config b/gi/pipeline/valhalla.config deleted file mode 100644 index e00a8485..00000000 --- a/gi/pipeline/valhalla.config +++ /dev/null @@ -1,9 +0,0 @@ -# THIS FILE GIVES THE LOCATIONS OF THE CORPORA USED -# name path aligned-corpus LM dev dev-refs test1 testt-eval.sh ... -/home/chris/ws10smt/data -btec /home/chris/ws10smt/data/btec/ split.zh-en.al lm/en.3gram.lm.gz devtest/devset1_2.zh devtest/devset1_2.lc.en* devtest/devset3.zh eval-devset3.sh -fbis /home/chris/ws10smt/data/chinese-english.fbis corpus.zh-en.al -zhen /home/chris/ws10smt/data/chinese-english corpus.zh-en.al -aren /home/chris/ws10smt/data/arabic-english corpus.ar-en.al -uren /home/chris/ws10smt/data/urdu-english corpus.ur-en.al lm/u2e.en.lm.gz dev/dev.ur dev/dev.en* devtest/devtest.ur eval-devtest.sh -nlfr /home/chris/ws10smt/data/dutch-french corpus.nl-fr.al diff --git a/gi/posterior-regularisation/Corpus.java b/gi/posterior-regularisation/Corpus.java deleted file mode 100644 index 07b27387..00000000 --- a/gi/posterior-regularisation/Corpus.java +++ /dev/null @@ -1,167 +0,0 @@ -import gnu.trove.TIntArrayList; - -import java.io.*; -import java.util.*; -import java.util.regex.Pattern; - -public class Corpus -{ -	private Lexicon<String> tokenLexicon = new Lexicon<String>(); -	private Lexicon<TIntArrayList> phraseLexicon = new Lexicon<TIntArrayList>(); -	private Lexicon<TIntArrayList> contextLexicon = new Lexicon<TIntArrayList>(); -	private List<Edge> edges = new ArrayList<Edge>(); -	private List<List<Edge>> phraseToContext = new ArrayList<List<Edge>>(); -	private List<List<Edge>> contextToPhrase = new ArrayList<List<Edge>>(); -	 -	public class Edge -	{ -		Edge(int phraseId, int contextId, int count) -		{ -			this.phraseId = phraseId; -			this.contextId = contextId; -			this.count = count; -		} -		public int getPhraseId() -		{ -			return phraseId; -		} -		public TIntArrayList getPhrase() -		{ -			return phraseLexicon.lookup(phraseId); -		} -		public String getPhraseString() -		{ -			StringBuffer b = new StringBuffer(); -			for (int tid: getPhrase().toNativeArray()) -			{ -				if (b.length() > 0) -					b.append(" "); -				b.append(tokenLexicon.lookup(tid)); -			} -			return b.toString(); -		}		 -		public int getContextId() -		{ -			return contextId; -		} -		public TIntArrayList getContext() -		{ -			return contextLexicon.lookup(contextId); -		} -		public String getContextString() -		{ -			StringBuffer b = new StringBuffer(); -			for (int tid: getContext().toNativeArray()) -			{ -				if (b.length() > 0) -					b.append(" "); -				b.append(tokenLexicon.lookup(tid)); -			} -			return b.toString(); -		} -		public int getCount() -		{ -			return count; -		} -		private int phraseId; -		private int contextId; -		private int count; -	} - -	List<Edge> getEdges() -	{ -		return edges; -	} -	 -	int getNumEdges() -	{ -		return edges.size(); -	} - -	int getNumPhrases() -	{ -		return phraseLexicon.size(); -	} -	 -	List<Edge> getEdgesForPhrase(int phraseId) -	{ -		return phraseToContext.get(phraseId); -	} -	 -	int getNumContexts() -	{ -		return contextLexicon.size(); -	} -	 -	List<Edge> getEdgesForContext(int contextId) -	{ -		return contextToPhrase.get(contextId); -	} -	 -	int getNumTokens() -	{ -		return tokenLexicon.size(); -	} -	 -	static Corpus readFromFile(Reader in) throws IOException -	{ -		Corpus c = new Corpus(); -		 -		// read in line-by-line -		BufferedReader bin = new BufferedReader(in); -		String line; -		Pattern separator = Pattern.compile(" \\|\\|\\| "); - -		while ((line = bin.readLine()) != null) -		{ -			// split into phrase and contexts -			StringTokenizer st = new StringTokenizer(line, "\t"); -			assert (st.hasMoreTokens()); -			String phraseToks = st.nextToken(); -			assert (st.hasMoreTokens()); -			String rest = st.nextToken(); -			assert (!st.hasMoreTokens()); - -			// process phrase	 -			st = new StringTokenizer(phraseToks, " "); -			TIntArrayList ptoks = new TIntArrayList(); -			while (st.hasMoreTokens()) -				ptoks.add(c.tokenLexicon.insert(st.nextToken())); -			int phraseId = c.phraseLexicon.insert(ptoks); -			if (phraseId == c.phraseToContext.size()) -				c.phraseToContext.add(new ArrayList<Edge>()); -			 -			// process contexts -			String[] parts = separator.split(rest); -			assert (parts.length % 2 == 0); -			for (int i = 0; i < parts.length; i += 2) -			{ -				// process pairs of strings - context and count -				TIntArrayList ctx = new TIntArrayList(); -				String ctxString = parts[i]; -				String countString = parts[i + 1]; -				StringTokenizer ctxStrtok = new StringTokenizer(ctxString, " "); -				while (ctxStrtok.hasMoreTokens()) -				{ -					String token = ctxStrtok.nextToken(); -					if (!token.equals("<PHRASE>")) -						ctx.add(c.tokenLexicon.insert(token)); -				} -				int contextId = c.contextLexicon.insert(ctx); -				if (contextId == c.contextToPhrase.size()) -					c.contextToPhrase.add(new ArrayList<Edge>()); - -				assert (countString.startsWith("C=")); -				Edge e = c.new Edge(phraseId, contextId,  -						Integer.parseInt(countString.substring(2).trim())); -				c.edges.add(e); -				 -				// index the edge for fast phrase, context lookup -				c.phraseToContext.get(phraseId).add(e); -				c.contextToPhrase.get(contextId).add(e); -			} -		} -		 -		return c; -	}	 -} diff --git a/gi/posterior-regularisation/Lexicon.java b/gi/posterior-regularisation/Lexicon.java deleted file mode 100644 index 9f0245ee..00000000 --- a/gi/posterior-regularisation/Lexicon.java +++ /dev/null @@ -1,32 +0,0 @@ -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -public class Lexicon<T> -{ -	public int insert(T word) -	{ -		Integer i = wordToIndex.get(word); -		if (i == null) -		{ -			i = indexToWord.size(); -			wordToIndex.put(word, i); -			indexToWord.add(word); -		} -		return i; -	} - -	public T lookup(int index) -	{ -		return indexToWord.get(index); -	} - -	public int size() -	{ -		return indexToWord.size(); -	} - -	private Map<T, Integer> wordToIndex = new HashMap<T, Integer>(); -	private List<T> indexToWord = new ArrayList<T>(); -}
\ No newline at end of file diff --git a/gi/posterior-regularisation/PhraseContextModel.java b/gi/posterior-regularisation/PhraseContextModel.java deleted file mode 100644 index 85bcfb89..00000000 --- a/gi/posterior-regularisation/PhraseContextModel.java +++ /dev/null @@ -1,466 +0,0 @@ -// Input of the form: -// " the phantom of the opera "    tickets for <PHRASE> tonight ? ||| C=1 ||| seats for <PHRASE> ? </s> ||| C=1 ||| i see <PHRASE> ? </s> ||| C=1 -//                      phrase TAB [context]+ -// where    context =   phrase ||| C=...        which are separated by ||| - -// Model parameterised as follows: -// - each phrase, p, is allocated a latent state, t -// - this is used to generate the contexts, c -// - each context is generated using 4 independent multinomials, one for each position LL, L, R, RR - -// Training with EM: -// - e-step is estimating q(t) = P(t|p,c) for all x,c -// - m-step is estimating model parameters P(c,t|p) = P(t) P(c|t) -// - PR uses alternate e-step, which first optimizes lambda  -//      min_q KL(q||p) + delta sum_pt max_c E_q[phi_ptc] -//   where -//      q(t|p,c) propto p(t,c|p) exp( -phi_ptc ) -//   Then q is used to obtain expectations for vanilla M-step. - -// Sexing it up: -// - learn p-specific conditionals P(t|p) -// - or generate phrase internals, e.g., generate edge words from -//   different distribution to central words -// - agreement between phrase->context model and context->phrase model - -import java.io.*; -import optimization.gradientBasedMethods.*; -import optimization.gradientBasedMethods.stats.OptimizerStats; -import optimization.gradientBasedMethods.stats.ProjectedOptimizerStats; -import optimization.linesearch.ArmijoLineSearchMinimizationAlongProjectionArc; -import optimization.linesearch.GenericPickFirstStep; -import optimization.linesearch.InterpolationPickFirstStep; -import optimization.linesearch.LineSearchMethod; -import optimization.linesearch.WolfRuleLineSearch; -import optimization.projections.SimplexProjection; -import optimization.stopCriteria.CompositeStopingCriteria; -import optimization.stopCriteria.NormalizedProjectedGradientL2Norm; -import optimization.stopCriteria.NormalizedValueDifference; -import optimization.stopCriteria.ProjectedGradientL2Norm; -import optimization.stopCriteria.StopingCriteria; -import optimization.stopCriteria.ValueDifference; -import optimization.util.MathUtils; -import java.util.*; -import java.util.regex.*; -import gnu.trove.TDoubleArrayList; -import gnu.trove.TIntArrayList; -import static java.lang.Math.*; - -class PhraseContextModel -{ -	// model/optimisation configuration parameters -	int numTags; -	boolean posteriorRegularisation = true; -	double constraintScale = 3; // FIXME: make configurable -	 -	// copied from L1LMax in depparsing code -	final double c1= 0.0001, c2=0.9, stoppingPrecision = 1e-5, maxStep = 10; -	final int maxZoomEvals = 10, maxExtrapolationIters = 200; -	int maxProjectionIterations = 200; -	int minOccurrencesForProjection = 0; - -	// book keeping -	int numPositions; -	Random rng = new Random(); - -	// training set -	Corpus training; - -	// model parameters (learnt) -	double emissions[][][]; // position in 0 .. 3 x tag x word Pr(word | tag, position) -	double prior[][]; // phrase x tag Pr(tag | phrase) -	double lambda[]; // edge = (phrase, context) x tag flattened lagrange multipliers - -	PhraseContextModel(Corpus training, int tags) -	{ -		this.training = training; -		this.numTags = tags; -		assert (!training.getEdges().isEmpty()); -		assert (numTags > 1); - -		// now initialise emissions -		numPositions = training.getEdges().get(0).getContext().size(); -		assert (numPositions > 0); - -		emissions = new double[numPositions][numTags][training.getNumTokens()]; -		prior = new double[training.getNumEdges()][numTags]; -		if (posteriorRegularisation) -			lambda = new double[training.getNumEdges() * numTags]; - -		for (double[][] emissionTW : emissions) -		{ -			for (double[] emissionW : emissionTW) -			{ -				randomise(emissionW); -//				for (int i = 0; i < emissionW.length; ++i) -//					emissionW[i] = i+1; -//				normalise(emissionW); -			} -		} -					 -		for (double[] priorTag : prior) -		{ -			randomise(priorTag); -//			for (int i = 0; i < priorTag.length; ++i) -//				priorTag[i] = i+1; -//			normalise(priorTag); -		} -	} - -	void expectationMaximisation(int numIterations) -	{ -		double lastLlh = Double.NEGATIVE_INFINITY; - -		for (int iteration = 0; iteration < numIterations; ++iteration) -		{ -			double emissionsCounts[][][] = new double[numPositions][numTags][training.getNumTokens()]; -			double priorCounts[][] = new double[training.getNumPhrases()][numTags]; -			 -			// E-step -			double llh = 0; -			if (posteriorRegularisation) -			{ -				EStepDualObjective objective = new EStepDualObjective(); -				 -				// copied from x2y2withconstraints -//				LineSearchMethod ls = new ArmijoLineSearchMinimizationAlongProjectionArc(new InterpolationPickFirstStep(1));				 -//				OptimizerStats stats = new OptimizerStats(); -//				ProjectedGradientDescent optimizer = new ProjectedGradientDescent(ls); -//				CompositeStopingCriteria compositeStop = new CompositeStopingCriteria(); -//				compositeStop.add(new ProjectedGradientL2Norm(0.001)); -//				compositeStop.add(new ValueDifference(0.001)); -//				optimizer.setMaxIterations(50); -//				boolean succeed = optimizer.optimize(objective,stats,compositeStop); -				 -				// copied from depparser l1lmaxobjective -				ProjectedOptimizerStats stats = new ProjectedOptimizerStats(); -				GenericPickFirstStep pickFirstStep = new GenericPickFirstStep(1); -				LineSearchMethod linesearch = new WolfRuleLineSearch(pickFirstStep, c1, c2); -				ProjectedGradientDescent optimizer = new ProjectedGradientDescent(linesearch); -				optimizer.setMaxIterations(maxProjectionIterations); -		        CompositeStopingCriteria stop = new CompositeStopingCriteria(); -		        stop.add(new NormalizedProjectedGradientL2Norm(stoppingPrecision)); -		        stop.add(new NormalizedValueDifference(stoppingPrecision)); -		        boolean succeed = optimizer.optimize(objective, stats, stop); - -				System.out.println("Ended optimzation Projected Gradient Descent\n" + stats.prettyPrint(1)); -				//System.out.println("Solution: " + objective.parameters); -				if (!succeed) -					System.out.println("Failed to optimize"); -				//System.out.println("Ended optimization in " + optimizer.getCurrentIteration());				 - -				//lambda = objective.getParameters(); -				llh = objective.primal(); -				 -				for (int i = 0; i < training.getNumPhrases(); ++i) -				{ -					List<Corpus.Edge> edges = training.getEdgesForPhrase(i); -					for (int j = 0; j < edges.size(); ++j) -					{ -						Corpus.Edge e = edges.get(j); -						for (int t = 0; t < numTags; t++) -						{ -							double p = objective.q.get(i).get(j).get(t); -							priorCounts[i][t] += e.getCount() * p; -							TIntArrayList tokens = e.getContext(); -							for (int k = 0; k < tokens.size(); ++k) -								emissionsCounts[k][t][tokens.get(k)] += e.getCount() * p; -						} -					} -				} -			} -			else -			{ -				for (int i = 0; i < training.getNumPhrases(); ++i) -				{ -					List<Corpus.Edge> edges = training.getEdgesForPhrase(i); -					for (int j = 0; j < edges.size(); ++j) -					{ -						Corpus.Edge e = edges.get(j); -						double probs[] = posterior(i, e);			 -						double z = normalise(probs); -						llh += log(z) * e.getCount(); -						 -						TIntArrayList tokens = e.getContext(); -						for (int t = 0; t < numTags; ++t) -						{ -							priorCounts[i][t] += e.getCount() * probs[t]; -							for (int k = 0; k < tokens.size(); ++k) -								emissionsCounts[j][t][tokens.get(k)] += e.getCount() * probs[t]; -						} -					} -				} -			} - -			// M-step: normalise -			for (double[][] emissionTW : emissionsCounts) -				for (double[] emissionW : emissionTW) -					normalise(emissionW); - -			for (double[] priorTag : priorCounts) -				normalise(priorTag); - -			emissions = emissionsCounts; -			prior = priorCounts; - -			System.out.println("Iteration " + iteration + " llh " + llh); - -//			if (llh - lastLlh < 1e-4) -//				break; -//			else -//				lastLlh = llh; -		} -	} - -	static double normalise(double probs[]) -	{ -		double z = 0; -		for (double p : probs) -			z += p; -		for (int i = 0; i < probs.length; ++i) -			probs[i] /= z; -		return z; -	} - -	void randomise(double probs[]) -	{ -		double z = 0; -		for (int i = 0; i < probs.length; ++i) -		{ -			probs[i] = 10 + rng.nextDouble(); -			z += probs[i]; -		} - -		for (int i = 0; i < probs.length; ++i) -			probs[i] /= z; -	} - -	static int argmax(double probs[]) -	{ -		double m = Double.NEGATIVE_INFINITY; -		int mi = -1; -		for (int i = 0; i < probs.length; ++i) -		{ -			if (probs[i] > m) -			{ -				m = probs[i]; -				mi = i; -			} -		} -		return mi; -	} - -	double[] posterior(int phraseId, Corpus.Edge e) // unnormalised -	{ -		double probs[] = new double[numTags]; -		TIntArrayList tokens = e.getContext(); -		for (int t = 0; t < numTags; ++t) -		{ -			probs[t] = prior[phraseId][t]; -			for (int k = 0; k < tokens.size(); ++k) -				probs[t] *= emissions[k][t][tokens.get(k)]; -		} -		return probs; -	} - -	void displayPosterior() -	{ -		for (int i = 0; i < training.getNumPhrases(); ++i) -		{ -			List<Corpus.Edge> edges = training.getEdgesForPhrase(i); -			for (Corpus.Edge e: edges) -			{ -				double probs[] = posterior(i, e); -				normalise(probs); - -				// emit phrase -				System.out.print(e.getPhraseString()); -				System.out.print("\t"); -				System.out.print(e.getContextString()); -				System.out.print("||| C=" + e.getCount() + " |||"); - -				int t = argmax(probs); -				System.out.print(" " + t + " ||| " + probs[t]); -				// for (int t = 0; t < numTags; ++t) -				// System.out.print(" " + probs[t]); -				System.out.println(); -			} -		} -	} - -	public static void main(String[] args) -	{ -		assert (args.length >= 2); -		try -		{ -			Corpus corpus = Corpus.readFromFile(new FileReader(new File(args[0]))); -			PhraseContextModel model = new PhraseContextModel(corpus, Integer.parseInt(args[1])); -			model.expectationMaximisation(Integer.parseInt(args[2])); -			model.displayPosterior(); -		}  -		catch (IOException e) -		{ -			System.out.println("Failed to read input file: " + args[0]); -			e.printStackTrace(); -		} -	} - -	class EStepDualObjective extends ProjectedObjective -	{ -		List<List<TDoubleArrayList>> conditionals; // phrase id x context # x tag - precomputed -		List<List<TDoubleArrayList>> q; // ditto, but including exp(-lambda) terms -		double objective = 0; // log(z) -		// Objective.gradient = d log(z) / d lambda = E_q[phi] -		double llh = 0; - -		public EStepDualObjective() -		{ -			super(); -			// compute conditionals p(context, tag | phrase) for all training instances -			conditionals = new ArrayList<List<TDoubleArrayList>>(training.getNumPhrases()); -			q = new ArrayList<List<TDoubleArrayList>>(training.getNumPhrases()); -			for (int i = 0; i < training.getNumPhrases(); ++i) -			{ -				List<Corpus.Edge> edges = training.getEdgesForPhrase(i); - -				conditionals.add(new ArrayList<TDoubleArrayList>(edges.size())); -				q.add(new ArrayList<TDoubleArrayList>(edges.size())); - -				for (int j = 0; j < edges.size(); ++j) -				{ -					Corpus.Edge e = edges.get(j); -					double probs[] = posterior(i, e); -					double z = normalise(probs); -					llh += log(z) * e.getCount(); -					conditionals.get(i).add(new TDoubleArrayList(probs)); -					q.get(i).add(new TDoubleArrayList(probs)); -				} -			} -			 -			gradient = new double[training.getNumEdges()*numTags]; -			setInitialParameters(lambda); -			computeObjectiveAndGradient(); -		} - -		@Override -		public double[] projectPoint(double[] point) -		{ -			SimplexProjection p = new SimplexProjection(constraintScale); - -			double[] newPoint = point.clone(); -			int edgeIndex = 0; -			for (int i = 0; i < training.getNumPhrases(); ++i) -			{ -				List<Corpus.Edge> edges = training.getEdgesForPhrase(i); - -				for (int t = 0; t < numTags; t++) -				{ -					double[] subPoint = new double[edges.size()]; -					for (int j = 0; j < edges.size(); ++j) -						subPoint[j] = point[edgeIndex+j*numTags+t]; -				 -					p.project(subPoint); -					for (int j = 0; j < edges.size(); ++j) -						newPoint[edgeIndex+j*numTags+t] = subPoint[j]; -				} -				 -				edgeIndex += edges.size() * numTags; -			} -//			System.out.println("Proj from: " + Arrays.toString(point));  -//			System.out.println("Proj to:   " + Arrays.toString(newPoint));  -			return newPoint; -		} - -		@Override -		public void setParameters(double[] params) -		{ -			super.setParameters(params); -			computeObjectiveAndGradient(); -		} - -		@Override -		public double[] getGradient() -		{ -			gradientCalls += 1; -			return gradient; -		} - -		@Override -		public double getValue() -		{ -			functionCalls += 1; -			return objective; -		} - -		public void computeObjectiveAndGradient() -		{ -			int edgeIndex = 0; -			objective = 0; -			Arrays.fill(gradient, 0); -			for (int i = 0; i < training.getNumPhrases(); ++i) -			{ -				List<Corpus.Edge> edges = training.getEdgesForPhrase(i); - -				for (int j = 0; j < edges.size(); ++j) -				{ -					Corpus.Edge e = edges.get(j); -					 -					double z = 0; -					for (int t = 0; t < numTags; t++) -					{ -						double v = conditionals.get(i).get(j).get(t) * exp(-parameters[edgeIndex+t]); -						q.get(i).get(j).set(t, v); -						z += v; -					} -					objective += log(z) * e.getCount(); - -					for (int t = 0; t < numTags; t++) -					{ -						double v = q.get(i).get(j).get(t) / z;  -						q.get(i).get(j).set(t, v); -						gradient[edgeIndex+t] -= e.getCount() * v; -					} -					 -					edgeIndex += numTags; -				} -			}			 -//			System.out.println("computeObjectiveAndGradient logz=" + objective); -//			System.out.println("lambda=  " + Arrays.toString(parameters)); -//			System.out.println("gradient=" + Arrays.toString(gradient)); -		} - -		public String toString() -		{ -			StringBuilder sb = new StringBuilder(); -			sb.append(getClass().getCanonicalName()).append(" with "); -			sb.append(parameters.length).append(" parameters and "); -			sb.append(training.getNumPhrases() * numTags).append(" constraints"); -			return sb.toString(); -		} -				 -		double primal() -		{ -			// primal = llh + KL(q||p) + scale * sum_pt max_c E_q[phi_pct] -			// kl = sum_Y q(Y) log q(Y) / p(Y|X) -			//    = sum_Y q(Y) { -lambda . phi(Y) - log Z } -			//    = -log Z - lambda . E_q[phi] -			//    = -objective + lambda . gradient -			 -			double kl = -objective + MathUtils.dotProduct(parameters, gradient); -			double l1lmax = 0; -			for (int i = 0; i < training.getNumPhrases(); ++i) -			{ -				List<Corpus.Edge> edges = training.getEdgesForPhrase(i); -				for (int t = 0; t < numTags; t++) -				{ -					double lmax = Double.NEGATIVE_INFINITY; -					for (int j = 0; j < edges.size(); ++j) -						lmax = max(lmax, q.get(i).get(j).get(t)); -					l1lmax += lmax; -				} -			} - -			return llh + kl + constraintScale * l1lmax; -		} -	} -} diff --git a/gi/posterior-regularisation/README b/gi/posterior-regularisation/README deleted file mode 100644 index a3d54ffc..00000000 --- a/gi/posterior-regularisation/README +++ /dev/null @@ -1,3 +0,0 @@ -  557  ./cdec_extools/extractor -i btec/split.zh-en.al -c 500000 -L 12 -C  | sort -t $'\t' -k 1 | ./cdec_extools/mr_stripe_rule_reduce > btec.concordance -  559  wc -l btec.concordance  -  588  cat btec.concordance  | sed  's/.*	//' | awk '{ for (i=1; i < NF; i++) { x=substr($i, 1, 2); if (x == "C=") printf "\n"; else if (x != "||") printf "%s ", $i; }; printf "\n"; }' | sort | uniq | wc -l diff --git a/gi/posterior-regularisation/alphabet.hh b/gi/posterior-regularisation/alphabet.hh deleted file mode 100644 index 1db928da..00000000 --- a/gi/posterior-regularisation/alphabet.hh +++ /dev/null @@ -1,61 +0,0 @@ -#ifndef _alphabet_hh -#define _alphabet_hh - -#include <cassert> -#include <iosfwd> -#include <map> -#include <string> -#include <vector> - -// Alphabet: indexes a set of types  -template <typename T> -class Alphabet: protected std::map<T, int> -{ -public: -    Alphabet() {}; - -    bool empty() const { return std::map<T,int>::empty(); } -    int size() const { return std::map<T,int>::size(); } - -    int operator[](const T &k) const -    { -        typename std::map<T,int>::const_iterator cit = find(k); -        if (cit != std::map<T,int>::end()) -            return cit->second; -        else -            return -1; -    } - -    int lookup(const T &k) const { return (*this)[k]; } - -    int insert(const T &k)  -    { -        int sz = size(); -        assert((unsigned) sz == _items.size()); - -        std::pair<typename std::map<T,int>::iterator, bool> -            ins = std::map<T,int>::insert(make_pair(k, sz)); - -        if (ins.second)  -            _items.push_back(k); - -        return ins.first->second; -    } - -    const T &type(int i) const -    { -        assert(i >= 0); -        assert(i < size()); -        return _items[i]; -    } - -    std::ostream &display(std::ostream &out, int i) const -    { -        return out << type(i); -    } - -private: -    std::vector<T> _items; -}; - -#endif diff --git a/gi/posterior-regularisation/canned.concordance b/gi/posterior-regularisation/canned.concordance deleted file mode 100644 index 710973ff..00000000 --- a/gi/posterior-regularisation/canned.concordance +++ /dev/null @@ -1,4 +0,0 @@ -a	0 0 <PHRASE> 0 0 ||| C=1 ||| 1 1 <PHRASE> 1 1 ||| C=1 ||| 2 2 <PHRASE> 2 2 ||| C=1 -b	0 0 <PHRASE> 0 0 ||| C=1 ||| 1 1 <PHRASE> 1 1 ||| C=1  -c	2 2 <PHRASE> 2 2 ||| C=1 ||| 4 4 <PHRASE> 4 4 ||| C=1 ||| 5 5 <PHRASE> 5 5 ||| C=1 -d	4 4 <PHRASE> 4 4 ||| C=1 ||| 5 5 <PHRASE> 5 5 ||| C=1 diff --git a/gi/posterior-regularisation/em.cc b/gi/posterior-regularisation/em.cc deleted file mode 100644 index f6c9fd68..00000000 --- a/gi/posterior-regularisation/em.cc +++ /dev/null @@ -1,830 +0,0 @@ -// Input of the form: -// " the phantom of the opera "    tickets for <PHRASE> tonight ? ||| C=1 ||| seats for <PHRASE> ? </s> ||| C=1 ||| i see <PHRASE> ? </s> ||| C=1 -//                      phrase TAB [context]+ -// where    context =   phrase ||| C=...        which are separated by ||| - -// Model parameterised as follows: -// - each phrase, p, is allocated a latent state, t -// - this is used to generate the contexts, c -// - each context is generated using 4 independent multinomials, one for each position LL, L, R, RR - -// Training with EM: -// - e-step is estimating P(t|p,c) for all x,c -// - m-step is estimating model parameters P(p,c,t) = P(t) P(p|t) P(c|t) - -// Sexing it up: -// - constrain the posteriors P(t|c) and P(t|p) to have few high-magnitude entries -// - improve the generation of phrase internals, e.g., generate edge words from -//   different distribution to central words - -#include "alphabet.hh" -#include "log_add.hh" -#include <algorithm> -#include <fstream> -#include <iostream> -#include <iterator> -#include <map> -#include <sstream> -#include <stdexcept> -#include <vector> -#include <tr1/random> -#include <tr1/tuple> -#include <nlopt.h> - -using namespace std; -using namespace std::tr1; - -const int numTags = 5; -const int numIterations = 100; -const bool posterior_regularisation = true; -const double PHRASE_VIOLATION_WEIGHT = 10; -const double CONTEXT_VIOLATION_WEIGHT = 0; -const bool includePhraseProb = false; - -// Data structures: -Alphabet<string> lexicon; -typedef vector<int> Phrase; -typedef tuple<int, int, int, int> Context; -Alphabet<Phrase> phrases; -Alphabet<Context> contexts; - -typedef map<int, int> ContextCounts; -typedef map<int, int> PhraseCounts; -typedef map<int, ContextCounts> PhraseToContextCounts; -typedef map<int, PhraseCounts> ContextToPhraseCounts; - -PhraseToContextCounts concordancePhraseToContexts; -ContextToPhraseCounts concordanceContextToPhrases; - -typedef vector<double> Dist; -typedef vector<Dist> ConditionalDist; -Dist prior; // class -> P(class) -vector<ConditionalDist> probCtx; // word -> class -> P(word | class), for each position of context word -ConditionalDist probPhrase; // class -> P(word | class) -Dist probPhraseLength; // class -> P(length | class) expressed as geometric distribution parameter - -mt19937 randomGenerator((size_t) time(NULL)); -uniform_real<double> uniDist(0.0, 1e-1); -variate_generator< mt19937, uniform_real<double> > rng(randomGenerator, uniDist); - -void addRandomNoise(Dist &d); -void normalise(Dist &d); -void addTo(Dist &d, const Dist &e); -int argmax(const Dist &d); - -map<Phrase, map<Context, int> > lambda_indices; - -Dist conditional_probs(const Phrase &phrase, const Context &context, double *normalisation = 0); -template <typename T> -Dist -penalised_conditionals(const Phrase &phrase, const Context &context,  -                       const T &lambda, double *normalisation); -//Dist penalised_conditionals(const Phrase &phrase, const Context &context, const double *lambda, double *normalisation = 0); -double penalised_log_likelihood(int n, const double *lambda, double *gradient, void *data); -void optimise_lambda(double delta, double gamma, vector<double> &lambda); -double expected_violation_phrases(const double *lambda); -double expected_violation_contexts(const double *lambda); -double primal_kl_divergence(const double *lambda); -double dual(const double *lambda); -void print_primal_dual(const double *lambda, double delta, double gamma); - -ostream &operator<<(ostream &, const Phrase &); -ostream &operator<<(ostream &, const Context &); -ostream &operator<<(ostream &, const Dist &); -ostream &operator<<(ostream &, const ConditionalDist &); - -int -main(int argc, char *argv[]) -{ -    randomGenerator.seed(time(NULL)); - -    int edges = 0; -    istream &input = cin; -    while (input.good()) -    { -        // read the phrase -        string phraseString; -        Phrase phrase; -        getline(input, phraseString, '\t'); -        istringstream pinput(phraseString); -        string token; -        while (pinput >> token) -            phrase.push_back(lexicon.insert(token)); -        int phraseId = phrases.insert(phrase); - -        // read the rest, storing each context -        string remainder; -        getline(input, remainder, '\n'); -        istringstream rinput(remainder); -        Context context(-1, -1, -1, -1); -        int index = 0; -        while (rinput >> token) -        { -            if (token != "|||" && token != "<PHRASE>") -            { -                if (index < 4) -                { -                    // eugh! damn templates -                    switch (index) -                    { -                        case 0: get<0>(context) = lexicon.insert(token); break; -                        case 1: get<1>(context) = lexicon.insert(token); break; -                        case 2: get<2>(context) = lexicon.insert(token); break; -                        case 3: get<3>(context) = lexicon.insert(token); break; -                        default: assert(false); -                    } -                    index += 1; -                } -                else if (token.find("C=") == 0) -                { -                    int contextId = contexts.insert(context); -                    int count = atoi(token.substr(strlen("C=")).c_str()); -                    concordancePhraseToContexts[phraseId][contextId] += count; -                    concordanceContextToPhrases[contextId][phraseId] += count; -                    index = 0; -                    context = Context(-1, -1, -1, -1); -                    edges += 1; -                } -            } -        } - -        // trigger EOF -        input >> ws; -    } - -    cout << "Read in " << phrases.size() << " phrases" -         << " and " << contexts.size() << " contexts" -         << " and " << edges << " edges" -         << " and " << lexicon.size() << " word types\n"; - -    // FIXME: filter out low count phrases and low count contexts (based on individual words?) -    // now populate model parameters with uniform + random noise -    prior.resize(numTags, 1.0); -    addRandomNoise(prior); -    normalise(prior); - -    probCtx.resize(4, ConditionalDist(numTags, Dist(lexicon.size(), 1.0))); -    if (includePhraseProb) -        probPhrase.resize(numTags, Dist(lexicon.size(), 1.0)); -    for (int t = 0; t < numTags; ++t) -    { -        for (int j = 0; j < 4; ++j) -        { -            addRandomNoise(probCtx[j][t]); -            normalise(probCtx[j][t]); -        } -        if (includePhraseProb) -        { -            addRandomNoise(probPhrase[t]); -            normalise(probPhrase[t]); -        } -    } -    if (includePhraseProb) -    { -        probPhraseLength.resize(numTags, 0.5); // geometric distribution p=0.5 -        addRandomNoise(probPhraseLength); -    } - -    cout << "\tprior:     " << prior << "\n"; -    //cout << "\tcontext:   " << probCtx << "\n"; -    //cout << "\tphrase:    " << probPhrase << "\n"; -    //cout << "\tphraseLen: " << probPhraseLength << endl; - -    vector<double> lambda; - -    // now do EM training -    for (int iteration = 0; iteration < numIterations; ++iteration) -    { -        cout << "EM iteration " << iteration << endl; - -        if (posterior_regularisation) -            optimise_lambda(PHRASE_VIOLATION_WEIGHT, CONTEXT_VIOLATION_WEIGHT, lambda); -        //cout << "\tlambda " << lambda << endl; - -        Dist countsPrior(numTags, 0.0); -        vector<ConditionalDist> countsCtx(4, ConditionalDist(numTags, Dist(lexicon.size(), 1e-10))); -        ConditionalDist countsPhrase(numTags, Dist(lexicon.size(), 1e-10)); -        Dist countsPhraseLength(numTags, 0.0); -        Dist nPhrases(numTags, 0.0); - -        double llh = 0; -        for (PhraseToContextCounts::iterator pcit = concordancePhraseToContexts.begin(); -             pcit != concordancePhraseToContexts.end(); ++pcit) -        { -            const Phrase &phrase = phrases.type(pcit->first); - -            // e-step: estimate latent class probs; compile (class,word) stats for m-step -            for (ContextCounts::iterator ccit = pcit->second.begin(); -                 ccit != pcit->second.end(); ++ccit) -            { -                const Context &context = contexts.type(ccit->first); - -                double z = 0; -                Dist tagCounts; -                if (!posterior_regularisation) -                    tagCounts = conditional_probs(phrase, context, &z); -                else -                    tagCounts = penalised_conditionals(phrase, context, lambda, &z); - -                llh += log(z) * ccit->second; -                addTo(countsPrior, tagCounts); // FIXME: times ccit->secon - -                for (int t = 0; t < numTags; ++t) -                { -                    for (int j = 0; j < 4; ++j) -                        countsCtx[j][t][get<0>(context)] += tagCounts[t] * ccit->second; - -                    if (includePhraseProb) -                    { -                        for (Phrase::const_iterator pit = phrase.begin(); pit != phrase.end(); ++pit) -                            countsPhrase[t][*pit] += tagCounts[t] * ccit->second; -                        countsPhraseLength[t] += phrase.size() * tagCounts[t] * ccit->second; -                        nPhrases[t] += tagCounts[t] * ccit->second; -                    } -                } -            } -        } - -        cout << "M-step\n"; - -        // m-step: normalise prior and (class,word) stats and assign to model parameters -        normalise(countsPrior); -        prior = countsPrior; -        for (int t = 0; t < numTags; ++t) -        { -            //cout << "\t\tt " << t << " prior " << countsPrior[t] << "\n"; -            for (int j = 0; j < 4; ++j) -                normalise(countsCtx[j][t]); -            if (includePhraseProb) -            { -                normalise(countsPhrase[t]); -                countsPhraseLength[t] = nPhrases[t] / countsPhraseLength[t]; -            } -        } -        probCtx = countsCtx; -        if (includePhraseProb) -        { -            probPhrase = countsPhrase; -            probPhraseLength = countsPhraseLength; -        } - -        double *larray = new double[lambda.size()]; -        copy(lambda.begin(), lambda.end(), larray); -        print_primal_dual(larray, PHRASE_VIOLATION_WEIGHT, CONTEXT_VIOLATION_WEIGHT); -        delete [] larray; - -        //cout << "\tllh " << llh << endl; -        //cout << "\tprior:     " << prior << "\n"; -        //cout << "\tcontext:   " << probCtx << "\n"; -        //cout << "\tphrase:    " << probPhrase << "\n"; -        //cout << "\tphraseLen: " << probPhraseLength << "\n"; -    } - -    // output class membership -    for (PhraseToContextCounts::iterator pcit = concordancePhraseToContexts.begin(); -         pcit != concordancePhraseToContexts.end(); ++pcit) -    { -        const Phrase &phrase = phrases.type(pcit->first); -        for (ContextCounts::iterator ccit = pcit->second.begin(); -             ccit != pcit->second.end(); ++ccit) -        { -            const Context &context = contexts.type(ccit->first); -            Dist tagCounts = conditional_probs(phrase, context, 0); -            cout << phrase << " ||| " << context << " ||| " << argmax(tagCounts) << "\n"; -        } -    } - -    return 0; -} - -void addRandomNoise(Dist &d) -{ -    for (Dist::iterator dit = d.begin(); dit != d.end(); ++dit) -        *dit += rng(); -} - -void normalise(Dist &d) -{ -    double z = 0; -    for (Dist::iterator dit = d.begin(); dit != d.end(); ++dit) -        z += *dit; -    for (Dist::iterator dit = d.begin(); dit != d.end(); ++dit) -        *dit /= z; -} - -void addTo(Dist &d, const Dist &e) -{ -    assert(d.size() == e.size()); -    for (int i = 0; i < (int) d.size(); ++i) -        d[i] += e[i]; -} - -int argmax(const Dist &d) -{ -    double best = d[0]; -    int index = 0; -    for (int i = 1; i < (int) d.size(); ++i) -    { -        if (d[i] > best) -        { -            best = d[i]; -            index = i; -        } -    } -    return index; -} - -ostream &operator<<(ostream &out, const Phrase &phrase) -{ -    for (Phrase::const_iterator pit = phrase.begin(); pit != phrase.end(); ++pit) -        lexicon.display(((pit == phrase.begin()) ? out : out << " "), *pit); -    return out; -} - -ostream &operator<<(ostream &out, const Context &context) -{ -    lexicon.display(out, get<0>(context)); -    lexicon.display(out << " ", get<1>(context)); -    lexicon.display(out << " <PHRASE> ", get<2>(context)); -    lexicon.display(out << " ", get<3>(context)); -    return out; -} - -ostream &operator<<(ostream &out, const Dist &dist) -{ -    for (Dist::const_iterator dit = dist.begin(); dit != dist.end(); ++dit) -        out << ((dit == dist.begin()) ? "" : " ") << *dit; -    return out; -} - -ostream &operator<<(ostream &out, const ConditionalDist &dist) -{ -    for (ConditionalDist::const_iterator dit = dist.begin(); dit != dist.end(); ++dit) -        out << ((dit == dist.begin()) ? "" : "; ") << *dit; -    return out; -} - -// FIXME: slow - just use the phrase index, context index to do the mapping -// (n.b. it's a sparse setup, not just equal to 3d array index) -int -lambda_index(const Phrase &phrase, const Context &context, int tag) -{ -    return lambda_indices[phrase][context] + tag; -} - -template <typename T> -Dist -penalised_conditionals(const Phrase &phrase, const Context &context,  -                       const T &lambda, double *normalisation) -{ -    Dist d = conditional_probs(phrase, context, 0); - -    double z = 0; -    for (int t = 0; t < numTags; ++t) -    { -        d[t] *= exp(-lambda[lambda_index(phrase, context, t)]); -        z += d[t]; -    } - -    if (normalisation) -        *normalisation = z; - -    for (int t = 0; t < numTags; ++t) -        d[t] /= z; - -    return d; -} - -Dist  -conditional_probs(const Phrase &phrase, const Context &context, double *normalisation) -{ -    Dist tagCounts(numTags, 0.0); -    double z = 0; -    for (int t = 0; t < numTags; ++t) -    { -        double prob = prior[t]; -        prob *= (probCtx[0][t][get<0>(context)] * probCtx[1][t][get<1>(context)] * -                 probCtx[2][t][get<2>(context)] * probCtx[3][t][get<3>(context)]); - -        if (includePhraseProb) -        { -            prob *= pow(1 - probPhraseLength[t], phrase.size() - 1) * probPhraseLength[t]; -            for (Phrase::const_iterator pit = phrase.begin(); pit != phrase.end(); ++pit) -                prob *= probPhrase[t][*pit]; -        } - -        tagCounts[t] = prob; -        z += prob; -    } -    if (normalisation) -        *normalisation = z; - -    for (int t = 0; t < numTags; ++t) -        tagCounts[t] /= z; - -    return tagCounts; -} - -double  -penalised_log_likelihood(int n, const double *lambda, double *grad, void *) -{ -    // return log Z(lambda, theta) over the corpus -    // where theta are the global parameters (prior, probCtx*, probPhrase*)  -    // and lambda are lagrange multipliers for the posterior sparsity constraints -    // -    // this is formulated as:  -    // f = log Z(lambda) = sum_i log ( sum_i p_theta(t_i|p_i,c_i) exp [-lambda_{t_i,p_i,c_i}] ) -    // where i indexes the training examples - specifying the (p, c) pair (which may occur with count > 1) -    // -    // with derivative: -    // f'_{tpc} = frac { - count(t,p,c) p_theta(t|p,c) exp (-lambda_{t,p,c}) } -    //                 { sum_t' p_theta(t'|p,c) exp (-lambda_{t',p,c}) } - -    //cout << "penalised_log_likelihood with lambda "; -    //copy(lambda, lambda+n, ostream_iterator<double>(cout, " ")); -    //cout << "\n"; - -    double f = 0; -    if (grad) -    { -        for (int i = 0; i < n; ++i) -            grad[i] = 0.0; -    } - -    for (int p = 0; p < phrases.size(); ++p) -    { -        const Phrase &phrase = phrases.type(p); -        PhraseToContextCounts::const_iterator pcit = concordancePhraseToContexts.find(p); -        for (ContextCounts::const_iterator ccit = pcit->second.begin(); -             ccit != pcit->second.end(); ++ccit) -        { -            const Context &context = contexts.type(ccit->first); -            double z = 0; -            Dist scores = penalised_conditionals(phrase, context, lambda, &z); - -            f += ccit->second * log(z); -            //cout << "\tphrase: " << phrase << " context: " << context << " count: " << ccit->second << " z " << z << endl; -            //cout << "\t\tscores: " << scores << "\n"; - -            if (grad) -            { -                for (int t = 0; t < numTags; ++t) -                { -                    int i = lambda_index(phrase, context, t); // FIXME: redundant lookups -                    assert(grad[i] == 0.0); -                    grad[i] = - ccit->second * scores[t]; -                } -            } -        } -    } - -    //cout << "penalised_log_likelihood returning " << f; -    //if (grad) -    //{ -        //cout << "\ngradient: "; -        //copy(grad, grad+n, ostream_iterator<double>(cout, " ")); -    //} -    //cout << "\n"; - -    return f; -} - -typedef struct  -{ -    // one of p or c should be set to -1, in which case it will be marginalised out  -    // i.e. sum_p' lambda_{p'ct} <= threshold -    //   or sum_c' lambda_{pc't} <= threshold -    int p, c, t, threshold; -} constraint_data; - -double  -constraint_and_gradient(int n, const double *lambda, double *grad, void *data) -{ -    constraint_data *d = (constraint_data *) data; -    assert(d->t >= 0); -    assert(d->threshold >= 0); - -    //cout << "constraint_and_gradient: t " << d->t << " p " << d->p << " c " << d->c << " tau " << d->threshold << endl; -    //cout << "\tlambda "; -    //copy(lambda, lambda+n, ostream_iterator<double>(cout, " ")); -    //cout << "\n"; - -    // FIXME: it's crazy to use a dense gradient here => will only have a handful of non-zero entries -    if (grad) -    { -        for (int i = 0; i < n; ++i) -            grad[i] = 0.0; -    } - -    //cout << "constraint_and_gradient: " << d->p << "; " << d->c << "; " << d->t << "; " << d->threshold << endl; - -    if (d->p >= 0) -    { -        assert(d->c < 0); -        //    sum_c lambda_pct          <= delta [a.k.a. threshold] -        // => sum_c lambda_pct - delta  <= 0 -        // derivative_pct = { 1, if p and t match; 0, otherwise } - -        double val = -d->threshold; - -        const Phrase &phrase = phrases.type(d->p); -        PhraseToContextCounts::const_iterator pcit = concordancePhraseToContexts.find(d->p); -        assert(pcit != concordancePhraseToContexts.end()); -        for (ContextCounts::const_iterator ccit = pcit->second.begin(); -             ccit != pcit->second.end(); ++ccit) -        { -            const Context &context = contexts.type(ccit->first); -            int i = lambda_index(phrase, context, d->t); -            val += lambda[i]; -            if (grad) grad[i] = 1; -        } -        //cout << "\treturning " << val << endl; - -        return val; -    } -    else -    { -        assert(d->c >= 0); -        assert(d->p < 0); -        //    sum_p lambda_pct          <= gamma [a.k.a. threshold] -        // => sum_p lambda_pct - gamma  <= 0 -        // derivative_pct = { 1, if c and t match; 0, otherwise } - -        double val = -d->threshold; - -        const Context &context = contexts.type(d->c); -        ContextToPhraseCounts::iterator cpit = concordanceContextToPhrases.find(d->c); -        assert(cpit != concordanceContextToPhrases.end()); -        for (PhraseCounts::iterator pcit = cpit->second.begin(); -             pcit != cpit->second.end(); ++pcit) -        { -            const Phrase &phrase = phrases.type(pcit->first); -            int i = lambda_index(phrase, context, d->t); -            val += lambda[i]; -            if (grad) grad[i] = 1; -        } -        //cout << "\treturning " << val << endl; - -        return val; -    } -} - -void -optimise_lambda(double delta, double gamma, vector<double> &lambdav) -{ -    int num_lambdas = lambdav.size(); -    if (lambda_indices.empty() || lambdav.empty()) -    { -        lambda_indices.clear(); -        lambdav.clear(); - -        int i = 0; -        for (int p = 0; p < phrases.size(); ++p) -        { -            const Phrase &phrase = phrases.type(p); -            PhraseToContextCounts::iterator pcit = concordancePhraseToContexts.find(p); -            for (ContextCounts::iterator ccit = pcit->second.begin(); -                 ccit != pcit->second.end(); ++ccit) -            { -                const Context &context = contexts.type(ccit->first); -                lambda_indices[phrase][context] = i; -                i += numTags; -            } -        } -        num_lambdas = i; -        lambdav.resize(num_lambdas); -    } -    //cout << "optimise_lambda: #langrange multipliers " << num_lambdas << endl; - -    // FIXME: better to work with an implicit representation to save memory usage -    int num_constraints = (((delta > 0) ? phrases.size() : 0) + ((gamma > 0) ? contexts.size() : 0)) * numTags; -    //cout << "optimise_lambda: #constraints " << num_constraints << endl; -    constraint_data *data = new constraint_data[num_constraints]; -    int i = 0; -    if (delta > 0) -    { -        for (int p = 0; p < phrases.size(); ++p) -        { -            for (int t = 0; t < numTags; ++t, ++i) -            { -                constraint_data &d = data[i]; -                d.p = p; -                d.c = -1; -                d.t = t; -                d.threshold = delta; -            } -        } -    } - -    if (gamma > 0) -    { -        for (int c = 0; c < contexts.size(); ++c) -        { -            for (int t = 0; t < numTags; ++t, ++i) -            { -                constraint_data &d = data[i]; -                d.p = -1; -                d.c = c; -                d.t = t; -                d.threshold = gamma; -            } -        } -    } -    assert(i == num_constraints); - -    double lambda[num_lambdas]; -    double lb[num_lambdas], ub[num_lambdas]; -    for (i = 0; i < num_lambdas; ++i) -    { -        lambda[i] = lambdav[i]; // starting value -        lb[i] = 0;              // lower bound -        if (delta <= 0)         // upper bound -            ub[i] = gamma;       -        else if (gamma <= 0) -            ub[i] = delta; -        else -            assert(false); -    } - -    //print_primal_dual(lambda, delta, gamma); -    -    double minf; -    int error_code = nlopt_minimize_constrained(NLOPT_LN_COBYLA, num_lambdas, penalised_log_likelihood, NULL, -                                                num_constraints, constraint_and_gradient, data, sizeof(constraint_data), -                                                lb, ub, lambda, &minf, -HUGE_VAL, 0.0, 0.0, 1e-4, NULL, 0, 0.0); -    //cout << "optimise error code " << error_code << endl; - -    //print_primal_dual(lambda, delta, gamma); - -    delete [] data; - -    if (error_code < 0) -        cout << "WARNING: optimisation failed with error code: " << error_code << endl; -    //else -    //{ -        //cout << "success; minf " << minf << endl; -        //print_primal_dual(lambda, delta, gamma); -    //} - -    lambdav = vector<double>(&lambda[0], &lambda[0] + num_lambdas); -} - -// FIXME: inefficient - cache the scores -double -expected_violation_phrases(const double *lambda) -{ -    // sum_pt max_c E_q[phi_pct] -    double violation = 0; - -    for (int p = 0; p < phrases.size(); ++p) -    { -        const Phrase &phrase = phrases.type(p); -        PhraseToContextCounts::const_iterator pcit = concordancePhraseToContexts.find(p); - -        for (int t = 0; t < numTags; ++t) -        { -            double best = 0; -            for (ContextCounts::const_iterator ccit = pcit->second.begin(); -                 ccit != pcit->second.end(); ++ccit) -            { -                const Context &context = contexts.type(ccit->first); -                Dist scores = penalised_conditionals(phrase, context, lambda, 0); -                best = max(best, scores[t]); -            } -            violation += best; -        } -    } - -    return violation; -} - -// FIXME: inefficient - cache the scores -double -expected_violation_contexts(const double *lambda) -{ -    // sum_ct max_p E_q[phi_pct] -    double violation = 0; - -    for (int c = 0; c < contexts.size(); ++c) -    { -        const Context &context = contexts.type(c); -        ContextToPhraseCounts::iterator cpit = concordanceContextToPhrases.find(c); - -        for (int t = 0; t < numTags; ++t) -        { -            double best = 0; -            for (PhraseCounts::iterator pit = cpit->second.begin(); -                 pit != cpit->second.end(); ++pit) -            { -                const Phrase &phrase = phrases.type(pit->first); -                Dist scores = penalised_conditionals(phrase, context, lambda, 0); -                best = max(best, scores[t]); -            } -            violation += best; -        } -    } - -    return violation; -} - -// FIXME: possibly inefficient -double  -primal_likelihood() // FIXME: primal evaluation needs to use lambda and calculate l1linf terms -{ -    double llh = 0; -    for (int p = 0; p < phrases.size(); ++p) -    { -        const Phrase &phrase = phrases.type(p); -        PhraseToContextCounts::const_iterator pcit = concordancePhraseToContexts.find(p); -        for (ContextCounts::const_iterator ccit = pcit->second.begin(); -             ccit != pcit->second.end(); ++ccit) -        { -            const Context &context = contexts.type(ccit->first); -            double z = 0; -            Dist scores = conditional_probs(phrase, context, &z); -            llh += ccit->second * log(z); -        } -    } -    return llh; -} - -// FIXME: inefficient - cache the scores -double  -primal_kl_divergence(const double *lambda) -{ -    // return KL(q || p) = sum_y q(y) { log q(y) - log p(y | x) } -    //                   = sum_y q(y) { log p(y | x) - lambda . phi(x, y) - log Z - log p(y | x) } -    //                   = sum_y q(y) { - lambda . phi(x, y) } - log Z -    // and q(y) factors with each edge, ditto for Z -     -    double feature_sum = 0, log_z = 0; -    for (int p = 0; p < phrases.size(); ++p) -    { -        const Phrase &phrase = phrases.type(p); -        PhraseToContextCounts::const_iterator pcit = concordancePhraseToContexts.find(p); -        for (ContextCounts::const_iterator ccit = pcit->second.begin(); -             ccit != pcit->second.end(); ++ccit) -        { -            const Context &context = contexts.type(ccit->first); - -            double local_z = 0; -            double local_f = 0; -            Dist d = conditional_probs(phrase, context, 0); -            for (int t = 0; t < numTags; ++t) -            { -                int i = lambda_index(phrase, context, t); -                double s = d[t] * exp(-lambda[i]); -                local_f += lambda[i] * s; -                local_z += s; -            } - -            log_z += ccit->second * log(local_z); -            feature_sum += ccit->second * (local_f / local_z); -        } -    } - -    return -feature_sum - log_z; -} - -// FIXME: inefficient - cache the scores -double  -dual(const double *lambda) -{ -    // return log(Z) = - log { sum_y p(y | x) exp( - lambda . phi(x, y) } -    // n.b. have flipped the sign as we're minimising -     -    double z = 0; -    for (int p = 0; p < phrases.size(); ++p) -    { -        const Phrase &phrase = phrases.type(p); -        PhraseToContextCounts::const_iterator pcit = concordancePhraseToContexts.find(p); -        for (ContextCounts::const_iterator ccit = pcit->second.begin(); -             ccit != pcit->second.end(); ++ccit) -        { -            const Context &context = contexts.type(ccit->first); -            double lz = 0; -            Dist scores = penalised_conditionals(phrase, context, lambda, &z); -            z += lz * ccit->second; -        } -    } -    return log(z); -} - -void -print_primal_dual(const double *lambda, double delta, double gamma) -{ -    double likelihood = primal_likelihood(); -    double kl = primal_kl_divergence(lambda); -    double sum_pt = expected_violation_phrases(lambda); -    double sum_ct = expected_violation_contexts(lambda); -    //double d = dual(lambda); - -    cout << "\tllh=" << likelihood -         << " kl=" << kl -         << " violations phrases=" << sum_pt -         << " contexts=" << sum_ct -         //<< " primal=" << (kl + delta * sum_pt + gamma * sum_ct)  -         //<< " dual=" << d -         << " objective=" << (likelihood - kl + delta * sum_pt + gamma * sum_ct)  -         << endl; -} diff --git a/gi/posterior-regularisation/invert.hh b/gi/posterior-regularisation/invert.hh deleted file mode 100644 index d06356e9..00000000 --- a/gi/posterior-regularisation/invert.hh +++ /dev/null @@ -1,45 +0,0 @@ -// The following code inverts the matrix input using LU-decomposition with -// backsubstitution of unit vectors. Reference: Numerical Recipies in C, 2nd -// ed., by Press, Teukolsky, Vetterling & Flannery.  -// Code written by Fredrik Orderud. -// http://www.crystalclearsoftware.com/cgi-bin/boost_wiki/wiki.pl?LU_Matrix_Inversion - -#ifndef INVERT_MATRIX_HPP -#define INVERT_MATRIX_HPP - -// REMEMBER to update "lu.hpp" header includes from boost-CVS -#include <boost/numeric/ublas/vector.hpp> -#include <boost/numeric/ublas/vector_proxy.hpp> -#include <boost/numeric/ublas/matrix.hpp> -#include <boost/numeric/ublas/triangular.hpp> -#include <boost/numeric/ublas/lu.hpp> -#include <boost/numeric/ublas/io.hpp> - -namespace ublas = boost::numeric::ublas; - -/* Matrix inversion routine. -   Uses lu_factorize and lu_substitute in uBLAS to invert a matrix */ -template<class T> -bool invert_matrix(const ublas::matrix<T>& input, ublas::matrix<T>& inverse)  -{ -    using namespace boost::numeric::ublas; -    typedef permutation_matrix<std::size_t> pmatrix; -    // create a working copy of the input -    matrix<T> A(input); -    // create a permutation matrix for the LU-factorization -    pmatrix pm(A.size1()); - -    // perform LU-factorization -    int res = lu_factorize(A,pm); -    if( res != 0 ) return false; - -    // create identity matrix of "inverse" -    inverse.assign(ublas::identity_matrix<T>(A.size1())); - -    // backsubstitute to get the inverse -    lu_substitute(A, pm, inverse); -     -    return true; -} - -#endif //INVERT_MATRIX_HPP diff --git a/gi/posterior-regularisation/linesearch.py b/gi/posterior-regularisation/linesearch.py deleted file mode 100644 index 5a3f2e9c..00000000 --- a/gi/posterior-regularisation/linesearch.py +++ /dev/null @@ -1,58 +0,0 @@ -## Automatically adapted for scipy Oct 07, 2005 by convertcode.py - -from scipy.optimize import minpack2 -import numpy - -import __builtin__ -pymin = __builtin__.min - -def line_search(f, myfprime, xk, pk, gfk, old_fval, old_old_fval, -                args=(), c1=1e-4, c2=0.9, amax=50): - -    fc = 0 -    gc = 0 -    phi0 = old_fval -    derphi0 = numpy.dot(gfk,pk) -    alpha1 = pymin(1.0,1.01*2*(phi0-old_old_fval)/derphi0) -    # trevor: added this test -    alpha1 = pymin(alpha1,amax) - -    if isinstance(myfprime,type(())): -        eps = myfprime[1] -        fprime = myfprime[0] -        newargs = (f,eps) + args -        gradient = False -    else: -        fprime = myfprime -        newargs = args -        gradient = True - -    xtol = 1e-14 -    amin = 1e-8 -    isave = numpy.zeros((2,), numpy.intc) -    dsave = numpy.zeros((13,), float) -    task = 'START' -    fval = old_fval -    gval = gfk - -    while 1: -        stp,fval,derphi,task = minpack2.dcsrch(alpha1, phi0, derphi0, c1, c2, -                                               xtol, task, amin, amax,isave,dsave) -        #print 'minpack2.dcsrch', alpha1, phi0, derphi0, c1, c2, xtol, task, amin, amax,isave,dsave -        #print 'returns', stp,fval,derphi,task - -        if task[:2] == 'FG': -            alpha1 = stp -            fval = f(xk+stp*pk,*args) -            fc += 1 -            gval = fprime(xk+stp*pk,*newargs) -            if gradient: gc += 1 -            else: fc += len(xk) + 1 -            phi0 = fval -            derphi0 = numpy.dot(gval,pk) -        else: -            break - -    if task[:5] == 'ERROR' or task[1:4] == 'WARN': -        stp = None  # failed -    return stp, fc, gc, fval, old_fval, gval diff --git a/gi/posterior-regularisation/log_add.hh b/gi/posterior-regularisation/log_add.hh deleted file mode 100644 index e0620c5a..00000000 --- a/gi/posterior-regularisation/log_add.hh +++ /dev/null @@ -1,30 +0,0 @@ -#ifndef log_add_hh -#define log_add_hh - -#include <limits> -#include <iostream> -#include <cassert> -#include <cmath> - -template <typename T> -struct Log -{ -    static T zero() { return -std::numeric_limits<T>::infinity(); }  - -    static T add(T l1, T l2) -    { -        if (l1 == zero()) return l2; -        if (l1 > l2)  -            return l1 + std::log(1 + exp(l2 - l1)); -        else -            return l2 + std::log(1 + exp(l1 - l2)); -    } - -    static T subtract(T l1, T l2) -    { -        //std::assert(l1 >= l2); -        return l1 + log(1 - exp(l2 - l1)); -    } -}; - -#endif diff --git a/gi/posterior-regularisation/prjava.jar b/gi/posterior-regularisation/prjava.jar deleted file mode 120000 index da8bf761..00000000 --- a/gi/posterior-regularisation/prjava.jar +++ /dev/null @@ -1 +0,0 @@ -prjava/prjava-20100708.jar
\ No newline at end of file diff --git a/gi/posterior-regularisation/prjava/Makefile b/gi/posterior-regularisation/prjava/Makefile deleted file mode 100755 index bd3bfca0..00000000 --- a/gi/posterior-regularisation/prjava/Makefile +++ /dev/null @@ -1,8 +0,0 @@ -all: -	ant dist - -check: -	echo no tests - -clean: -	ant clean diff --git a/gi/posterior-regularisation/prjava/build.xml b/gi/posterior-regularisation/prjava/build.xml deleted file mode 100644 index 7222b3c8..00000000 --- a/gi/posterior-regularisation/prjava/build.xml +++ /dev/null @@ -1,38 +0,0 @@ -<project name="prjava" default="dist" basedir="."> -  <!-- set global properties for this build --> -  <property name="src" location="src"/> -  <property name="build" location="build"/> -  <property name="dist" location="lib"/> -  <path id="classpath"> -      <pathelement location="lib/trove-2.0.2.jar"/> -      <pathelement location="lib/optimization.jar"/> -      <pathelement location="lib/jopt-simple-3.2.jar"/> -      <pathelement location="lib/commons-math-2.1.jar"/> -  </path> - -  <target name="init"> -    <!-- Create the time stamp --> -    <tstamp/> -    <!-- Create the build directory structure used by compile --> -    <mkdir dir="${build}"/> -  </target> - -  <target name="compile" depends="init" -        description="compile the source " > -    <!-- Compile the java code from ${src} into ${build} --> -    <javac srcdir="${src}" destdir="${build}" includeantruntime="false"> -            <classpath refid="classpath"/> -    </javac> -  </target> - -  <target name="dist" depends="compile" -        description="generate the distribution" > -    <jar jarfile="${dist}/prjava-${DSTAMP}.jar" basedir="${build}"/> -    <symlink link="./prjava.jar" resource="${dist}/prjava-${DSTAMP}.jar" overwrite="true"/> -  </target> - -  <target name="clean" -        description="clean up" > -    <delete dir="${build}"/> -  </target> -</project> diff --git a/gi/posterior-regularisation/prjava/lib/commons-math-2.1.jar b/gi/posterior-regularisation/prjava/lib/commons-math-2.1.jar Binary files differdeleted file mode 100644 index 43b4b369..00000000 --- a/gi/posterior-regularisation/prjava/lib/commons-math-2.1.jar +++ /dev/null diff --git a/gi/posterior-regularisation/prjava/lib/jopt-simple-3.2.jar b/gi/posterior-regularisation/prjava/lib/jopt-simple-3.2.jar Binary files differdeleted file mode 100644 index 56373621..00000000 --- a/gi/posterior-regularisation/prjava/lib/jopt-simple-3.2.jar +++ /dev/null diff --git a/gi/posterior-regularisation/prjava/lib/trove-2.0.2.jar b/gi/posterior-regularisation/prjava/lib/trove-2.0.2.jar Binary files differdeleted file mode 100644 index 3e59fbf3..00000000 --- a/gi/posterior-regularisation/prjava/lib/trove-2.0.2.jar +++ /dev/null diff --git a/gi/posterior-regularisation/prjava/src/arr/F.java b/gi/posterior-regularisation/prjava/src/arr/F.java deleted file mode 100644 index be0a6ed6..00000000 --- a/gi/posterior-regularisation/prjava/src/arr/F.java +++ /dev/null @@ -1,99 +0,0 @@ -package arr;
 -
 -import java.util.Arrays;
 -import java.util.Random;
 -
 -public class F {
 -	public static Random rng = new Random();
 -
 -	public static void randomise(double probs[])
 -	{
 -		randomise(probs, true);
 -	}
 -
 -	public static void randomise(double probs[], boolean normalise)
 -	{
 -		double z = 0;
 -		for (int i = 0; i < probs.length; ++i)
 -		{
 -			probs[i] = 10 + rng.nextDouble();
 -			if (normalise)
 -				z += probs[i];
 -		}
 -
 -		if (normalise)
 -			for (int i = 0; i < probs.length; ++i)
 -				probs[i] /= z;
 -	}
 -	
 -	public static void uniform(double probs[])
 -	{
 -		for (int i = 0; i < probs.length; ++i)
 -			probs[i] = 1.0 / probs.length;
 -	}
 -	
 -	public static void l1normalize(double [] a){
 -		double sum=0;
 -		for(int i=0;i<a.length;i++){
 -			sum+=a[i];
 -		}
 -		if(sum==0)
 -			Arrays.fill(a, 1.0/a.length);
 -		else
 -		{
 -			for(int i=0;i<a.length;i++){
 -				a[i]/=sum;
 -			}
 -		}
 -	}
 -	
 -	public  static void l1normalize(double [][] a){
 -		double sum=0;
 -		for(int i=0;i<a.length;i++){
 -			for(int j=0;j<a[i].length;j++){
 -				sum+=a[i][j];
 -			}
 -		}
 -		if(sum==0){
 -			return;
 -		}
 -		for(int i=0;i<a.length;i++){
 -			for(int j=0;j<a[i].length;j++){
 -				a[i][j]/=sum;
 -			}
 -		}
 -	}
 -	
 -	public static double l1norm(double a[]){
 -		// FIXME: this isn't the l1 norm for a < 0
 -		double norm=0;
 -		for(int i=0;i<a.length;i++){
 -			norm += a[i];
 -		}
 -		return norm;
 -	}
 -	
 -	public static double l2norm(double a[]){
 -		double norm=0;
 -		for(int i=0;i<a.length;i++){
 -			norm += a[i]*a[i];
 -		}
 -		return Math.sqrt(norm);
 -	}
 -	
 -	public static int argmax(double probs[])
 -	{
 -		double m = Double.NEGATIVE_INFINITY;
 -		int mi = -1;
 -		for (int i = 0; i < probs.length; ++i)
 -		{
 -			if (probs[i] > m)
 -			{
 -				m = probs[i];
 -				mi = i;
 -			}
 -		}
 -		return mi;
 -	}
 -	
 -}
 diff --git a/gi/posterior-regularisation/prjava/src/data/Corpus.java b/gi/posterior-regularisation/prjava/src/data/Corpus.java deleted file mode 100644 index 425ede11..00000000 --- a/gi/posterior-regularisation/prjava/src/data/Corpus.java +++ /dev/null @@ -1,233 +0,0 @@ -package data;
 -
 -import java.util.ArrayList;
 -import java.util.Arrays;
 -import java.util.HashMap;
 -import java.util.Scanner;
 -
 -public class Corpus {
 -
 -	public static final String alphaFilename="../posdata/corpus.alphabet";
 -	public static final String tagalphaFilename="../posdata/corpus.tag.alphabet";
 -	
 -//	public static final String START_SYM="<s>";
 -	public static final String END_SYM="<e>";
 -	public static final String NUM_TOK="<NUM>";
 -	
 -	public static final String UNK_TOK="<unk>";
 -	
 -	private ArrayList<String[]>sent;
 -	private ArrayList<int[]>data;
 -	
 -	public ArrayList<String[]>tag;
 -	public  ArrayList<int[]>tagData;
 -	
 -	public static boolean convertNumTok=true;
 -	
 -	private HashMap<String,Integer>freq;
 -	public HashMap<String,Integer>vocab;
 -	
 -	public HashMap<String,Integer>tagVocab;
 -	private int tagV;
 -	
 -	private int V;
 -	
 -	public static void main(String[] args) {
 -		Corpus c=new Corpus("../posdata/en_test.conll");
 -		System.out.println(
 -			Arrays.toString(c.get(0))	
 -		);
 -		System.out.println(
 -				Arrays.toString(c.getInt(0))	
 -			);
 -		
 -		System.out.println(
 -				Arrays.toString(c.get(1))	
 -			);
 -			System.out.println(
 -					Arrays.toString(c.getInt(1))	
 -				);
 -	}
 -
 -	public Corpus(String filename,HashMap<String,Integer>dict){
 -		V=0;
 -		tagV=0;
 -		freq=new HashMap<String,Integer>();
 -		tagVocab=new HashMap<String,Integer>();
 -		vocab=dict;
 -		
 -		sent=new ArrayList<String[]>();
 -		tag=new ArrayList<String[]>();
 -		
 -		Scanner sc=io.FileUtil.openInFile(filename);
 -		ArrayList<String>s=new ArrayList<String>();
 -	//	s.add(START_SYM);
 -		while(sc.hasNextLine()){
 -			String line=sc.nextLine();
 -			String toks[]=line.split("\t");
 -			if(toks.length<2){
 -				s.add(END_SYM);
 -				sent.add(s.toArray(new String[0]));
 -				s=new ArrayList<String>();
 -		//		s.add(START_SYM);
 -				continue;
 -			}
 -			String tok=toks[1].toLowerCase();
 -			s.add(tok);
 -		}
 -		sc.close();
 -
 -		buildData();
 -	}
 -	
 -	public Corpus(String filename){
 -		V=0;
 -		freq=new HashMap<String,Integer>();
 -		vocab=new HashMap<String,Integer>();
 -		tagVocab=new HashMap<String,Integer>();
 -		
 -		sent=new ArrayList<String[]>();
 -		tag=new ArrayList<String[]>();
 -		
 -		System.out.println("Reading:"+filename);
 -		
 -		Scanner sc=io.FileUtil.openInFile(filename);
 -		ArrayList<String>s=new ArrayList<String>();
 -		ArrayList<String>tags=new ArrayList<String>();
 -		//s.add(START_SYM);
 -		while(sc.hasNextLine()){
 -			String line=sc.nextLine();
 -			String toks[]=line.split("\t");
 -			if(toks.length<2){
 -				s.add(END_SYM);
 -				tags.add(END_SYM);
 -				if(s.size()>2){
 -					sent.add(s.toArray(new String[0]));
 -					tag.add(tags.toArray(new String [0]));
 -				}
 -				s=new ArrayList<String>();
 -				tags=new ArrayList<String>();
 -			//	s.add(START_SYM);
 -				continue;
 -			}
 -			
 -			String tok=toks[1].toLowerCase();
 -			if(convertNumTok && tok.matches(".*\\d.*")){
 -				tok=NUM_TOK;
 -			}
 -			s.add(tok);
 -			
 -			if(toks.length>3){
 -				tok=toks[3].toLowerCase();
 -			}else{
 -				tok="_";
 -			}
 -			tags.add(tok);
 -			
 -		}
 -		sc.close();
 -		
 -		for(int i=0;i<sent.size();i++){
 -			String[]toks=sent.get(i);
 -			for(int j=0;j<toks.length;j++){
 -				addVocab(toks[j]);
 -				addTag(tag.get(i)[j]);
 -			}
 -		}
 -		
 -		buildVocab();
 -		buildData();
 -		System.out.println(data.size()+"sentences, "+vocab.keySet().size()+" word types");
 -	}
 -
 -	public String[] get(int idx){
 -		return sent.get(idx);
 -	}
 -	
 -	private void addVocab(String s){
 -		Integer integer=freq.get(s);
 -		if(integer==null){
 -			integer=0;
 -		}
 -		freq.put(s, integer+1);
 -	}
 -	
 -	public int tokIdx(String tok){
 -		Integer integer=vocab.get(tok);
 -		if(integer==null){
 -			return V;
 -		}
 -		return integer;
 -	}
 -	
 -	public int tagIdx(String tok){
 -		Integer integer=tagVocab.get(tok);
 -		if(integer==null){
 -			return tagV;
 -		}
 -		return integer;
 -	}
 -	
 -	private void buildData(){
 -		data=new ArrayList<int[]>();
 -		for(int i=0;i<sent.size();i++){
 -			String s[]=sent.get(i);
 -			data.add(new int [s.length]);
 -			for(int j=0;j<s.length;j++){
 -				data.get(i)[j]=tokIdx(s[j]);
 -			}
 -		}
 -		
 -		tagData=new ArrayList<int[]>();
 -		for(int i=0;i<tag.size();i++){
 -			String s[]=tag.get(i);
 -			tagData.add(new int [s.length]);
 -			for(int j=0;j<s.length;j++){
 -				tagData.get(i)[j]=tagIdx(s[j]);
 -			}
 -		}
 -		sent=null;
 -		tag=null;
 -		System.gc();
 -	}
 -	
 -	public int [] getInt(int idx){
 -		return data.get(idx);
 -	}
 -	
 -	/**
 -	 * 
 -	 * @return size of vocabulary 
 -	 */
 -	public int getVocabSize(){
 -		return V;
 -	}
 -	
 -	public int [][]getAllData(){
 -		return data.toArray(new int [0][]);
 -	}
 -	
 -	public int [][]getTagData(){
 -		return tagData.toArray(new int [0][]);
 -	}
 -	
 -	private void buildVocab(){
 -		for (String key:freq.keySet()){
 -			if(freq.get(key)>2){
 -				vocab.put(key, V);
 -				V++;
 -			}
 -		}
 -		io.SerializedObjects.writeSerializedObject(vocab, alphaFilename);
 -		io.SerializedObjects.writeSerializedObject(tagVocab,tagalphaFilename);
 -	}
 -
 -	private void addTag(String tag){
 -		Integer i=tagVocab.get(tag);
 -		if(i==null){
 -			tagVocab.put(tag, tagV);
 -			tagV++;
 -		}
 -	}
 -	
 -}
 diff --git a/gi/posterior-regularisation/prjava/src/hmm/HMM.java b/gi/posterior-regularisation/prjava/src/hmm/HMM.java deleted file mode 100644 index 17a4679f..00000000 --- a/gi/posterior-regularisation/prjava/src/hmm/HMM.java +++ /dev/null @@ -1,579 +0,0 @@ -package hmm;
 -
 -import java.io.File;
 -import java.io.FileNotFoundException;
 -import java.io.IOException;
 -import java.io.PrintStream;
 -import java.util.ArrayList;
 -import java.util.Scanner;
 -
 -public class HMM {
 -
 -	
 -	//trans[i][j]=prob of going FROM i to j
 -	double [][]trans;
 -	double [][]emit;
 -	double []pi;
 -	int  [][]data;
 -	int [][]tagdata;
 -	
 -	double logtrans[][];
 -	
 -	public HMMObjective o;
 -	
 -	public static void main(String[] args) {
 -	
 -	}
 -	
 -	public HMM(int n_state,int n_emit,int [][]data){
 -		trans=new double [n_state][n_state];
 -		emit=new double[n_state][n_emit];
 -		pi=new double [n_state];
 -		System.out.println(" random initial parameters");
 -		fillRand(trans);
 -		fillRand(emit);
 -		fillRand(pi);
 -
 -		this.data=data;
 -		
 -	}
 -	
 -	private void fillRand(double [][] a){
 -		for(int i=0;i<a.length;i++){
 -			for(int j=0;j<a[i].length;j++){
 -				a[i][j]=Math.random();
 -			}
 -			l1normalize(a[i]);
 -		}
 -	}
 -	private void fillRand(double []a){
 -		for(int i=0;i<a.length;i++){
 -				a[i]=Math.random();
 -		}
 -		l1normalize(a);
 -	}
 -	
 -	private double loglikely=0;
 -	
 -	public void EM(){
 -		double trans_exp_cnt[][]=new double [trans.length][trans.length];
 -		double emit_exp_cnt[][]=new double[trans.length][emit[0].length];
 -		double start_exp_cnt[]=new double[trans.length];
 -		loglikely=0;
 -		
 -		//E
 -		for(int i=0;i<data.length;i++){
 -			
 -			double [][][] post=forwardBackward(data[i]);
 -			incrementExpCnt(post, data[i], 
 -					trans_exp_cnt,
 -					emit_exp_cnt,
 -					start_exp_cnt);
 -			
 -			
 -			if(i%100==0){
 -				System.out.print(".");
 -			}
 -			if(i%1000==0){
 -				System.out.println(i);
 -			}
 -			
 -		}
 -		System.out.println("Log likelihood: "+loglikely);
 -		
 -		//M
 -		addOneSmooth(emit_exp_cnt);
 -		for(int i=0;i<trans.length;i++){
 -		
 -			//transition probs
 -			double sum=0;
 -			for(int j=0;j<trans.length;j++){
 -				sum+=trans_exp_cnt[i][j];
 -			}
 -			//avoid NAN
 -			if(sum==0){
 -				sum=1;
 -			}
 -			for(int j=0;j<trans[i].length;j++){
 -				trans[i][j]=trans_exp_cnt[i][j]/sum;
 -			}
 -			
 -			//emission probs
 -			
 -			sum=0;
 -			for(int j=0;j<emit[i].length;j++){
 -				sum+=emit_exp_cnt[i][j];
 -			}
 -			//avoid NAN
 -			if(sum==0){
 -				sum=1;
 -			}
 -			for(int j=0;j<emit[i].length;j++){
 -				emit[i][j]=emit_exp_cnt[i][j]/sum;
 -			}
 -			
 -			
 -			//initial probs
 -			for(int j=0;j<pi.length;j++){
 -				pi[j]=start_exp_cnt[j];
 -			}
 -			l1normalize(pi);
 -		}
 -	}
 -	
 -	private double [][][]forwardBackward(int [] seq){
 -		double a[][]=new double [seq.length][trans.length];
 -		double b[][]=new double [seq.length][trans.length];
 -		
 -		int len=seq.length;
 -		//initialize the first step
 -		for(int i=0;i<trans.length;i++){
 -			a[0][i]=emit[i][seq[0]]*pi[i];
 -			b[len-1][i]=1;
 -		}
 -		
 -		//log of denominator for likelyhood
 -		double c=Math.log(l1norm(a[0]));
 -		
 -		l1normalize(a[0]);
 -		l1normalize(b[len-1]);
 -		
 -		
 -		
 -		//forward
 -		for(int n=1;n<len;n++){
 -			for(int i=0;i<trans.length;i++){
 -				for(int j=0;j<trans.length;j++){
 -					a[n][i]+=trans[j][i]*a[n-1][j];
 -				}
 -				a[n][i]*=emit[i][seq[n]];
 -			}
 -			c+=Math.log(l1norm(a[n]));
 -			l1normalize(a[n]);
 -		}
 -		
 -		loglikely+=c;
 -		
 -		//backward
 -		for(int n=len-2;n>=0;n--){
 -			for(int i=0;i<trans.length;i++){
 -				for(int j=0;j<trans.length;j++){
 -					b[n][i]+=trans[i][j]*b[n+1][j]*emit[j][seq[n+1]];
 -				}
 -			}
 -			l1normalize(b[n]);
 -		}
 -		
 -		
 -		//expected transition 
 -		double p[][][]=new double [seq.length][trans.length][trans.length];
 -		for(int n=0;n<len-1;n++){
 -			for(int i=0;i<trans.length;i++){
 -				for(int j=0;j<trans.length;j++){
 -					p[n][i][j]=a[n][i]*trans[i][j]*emit[j][seq[n+1]]*b[n+1][j];
 -					
 -				}
 -			}
 -
 -			l1normalize(p[n]);
 -		}
 -		return p;
 -	}
 -	
 -	private void incrementExpCnt(
 -			double post[][][],int [] seq, 
 -			double trans_exp_cnt[][],
 -			double emit_exp_cnt[][],
 -			double start_exp_cnt[])
 -	{
 -		
 -		for(int n=0;n<post.length;n++){
 -			for(int i=0;i<trans.length;i++){
 -				double py=0;
 -				for(int j=0;j<trans.length;j++){
 -					py+=post[n][i][j];
 -					trans_exp_cnt[i][j]+=post[n][i][j];
 -				}
 -
 -				emit_exp_cnt[i][seq[n]]+=py;				
 -				
 -			}
 -		}
 -		
 -		//the first state
 -		for(int i=0;i<trans.length;i++){
 -			double py=0;
 -			for(int j=0;j<trans.length;j++){
 -				py+=post[0][i][j];
 -			}
 -			start_exp_cnt[i]+=py;	
 -		}
 -		
 -		
 -		//the last state
 -		int len=post.length;
 -		for(int i=0;i<trans.length;i++){
 -			double py=0;
 -			for(int j=0;j<trans.length;j++){
 -				py+=post[len-2][j][i];
 -			}
 -			emit_exp_cnt[i][seq[len-1]]+=py;	
 -		}
 -	}
 -	
 -	public void l1normalize(double [] a){
 -		double sum=0;
 -		for(int i=0;i<a.length;i++){
 -			sum+=a[i];
 -		}
 -		if(sum==0){
 -			return ;
 -		}
 -		for(int i=0;i<a.length;i++){
 -			a[i]/=sum;
 -		}
 -	}
 -	
 -	public  void l1normalize(double [][] a){
 -		double sum=0;
 -		for(int i=0;i<a.length;i++){
 -			for(int j=0;j<a[i].length;j++){
 -				sum+=a[i][j];
 -			}
 -		}
 -		if(sum==0){
 -			return;
 -		}
 -		for(int i=0;i<a.length;i++){
 -			for(int j=0;j<a[i].length;j++){
 -				a[i][j]/=sum;
 -			}
 -		}
 -	}
 -	
 -	public void writeModel(String modelFilename) throws FileNotFoundException, IOException{
 -		PrintStream ps=io.FileUtil.printstream(new File(modelFilename));
 -		ps.println(trans.length);
 -		ps.println("Initial Probabilities:");
 -		for(int i=0;i<pi.length;i++){
 -			ps.print(pi[i]+"\t");
 -		}
 -		ps.println();
 -		ps.println("Transition Probabilities:");
 -		for(int i=0;i<trans.length;i++){
 -			for(int j=0;j<trans[i].length;j++){
 -				ps.print(trans[i][j]+"\t");
 -			}
 -			ps.println();
 -		}
 -		ps.println("Emission Probabilities:");
 -		ps.println(emit[0].length);
 -		for(int i=0;i<trans.length;i++){
 -			for(int j=0;j<emit[i].length;j++){
 -				ps.println(emit[i][j]);
 -			}
 -			ps.println();
 -		}
 -		ps.close();
 -	}
 -	
 -	public HMM(){
 -	
 -	}
 -	
 -	public void readModel(String modelFilename){
 -		Scanner sc=io.FileUtil.openInFile(modelFilename);
 -	
 -		int n_state=sc.nextInt();
 -		sc.nextLine();
 -		sc.nextLine();
 -		pi=new double [n_state];
 -		for(int i=0;i<n_state;i++){
 -			pi[i]=sc.nextDouble();
 -		}
 -		sc.nextLine();
 -		sc.nextLine();
 -		trans=new double[n_state][n_state];
 -		for(int i=0;i<trans.length;i++){
 -			for(int j=0;j<trans[i].length;j++){
 -				trans[i][j]=sc.nextDouble();
 -			}
 -		}
 -		sc.nextLine();
 -		sc.nextLine();
 -		
 -		int n_obs=sc.nextInt();
 -		emit=new double[n_state][n_obs];
 -		for(int i=0;i<trans.length;i++){
 -			for(int j=0;j<emit[i].length;j++){
 -				emit[i][j]=sc.nextDouble();
 -			}
 -		}
 -		sc.close();
 -	}
 -	
 -	public int []viterbi(int [] seq){
 -		double [][]p=new double [seq.length][trans.length];
 -		int backp[][]=new int [seq.length][trans.length];
 -		
 -		for(int i=0;i<trans.length;i++){
 -			p[0][i]=Math.log(emit[i][seq[0]]*pi[i]);
 -		}
 -		
 -		double a[][]=logtrans;
 -		if(logtrans==null){
 -			a=new double [trans.length][trans.length];
 -			for(int i=0;i<trans.length;i++){
 -				for(int j=0;j<trans.length;j++){
 -					a[i][j]=Math.log(trans[i][j]);
 -				}
 -			}
 -			logtrans=a;
 -		}
 -		
 -		double maxprob=0;
 -		for(int n=1;n<seq.length;n++){
 -			for(int i=0;i<trans.length;i++){
 -				maxprob=p[n-1][0]+a[0][i];
 -				backp[n][i]=0;
 -				for(int j=1;j<trans.length;j++){
 -					double prob=p[n-1][j]+a[j][i];
 -					if(maxprob<prob){
 -						backp[n][i]=j;
 -						maxprob=prob;
 -					}
 -				}
 -				p[n][i]=maxprob+Math.log(emit[i][seq[n]]);
 -			}
 -		}
 -		
 -		maxprob=p[seq.length-1][0];
 -		int maxIdx=0;
 -		for(int i=1;i<trans.length;i++){
 -			if(p[seq.length-1][i]>maxprob){
 -				maxprob=p[seq.length-1][i];
 -				maxIdx=i;
 -			}
 -		}
 -		int ans[]=new int [seq.length];
 -		ans[seq.length-1]=maxIdx;
 -		for(int i=seq.length-2;i>=0;i--){
 -			ans[i]=backp[i+1][ans[i+1]];
 -		}
 -		return ans;
 -	}
 -	
 -	public double l1norm(double a[]){
 -		double norm=0;
 -		for(int i=0;i<a.length;i++){
 -			norm += a[i];
 -		}
 -		return norm;
 -	}
 -	
 -	public double [][]getEmitProb(){
 -		return emit;
 -	}
 -	
 -	public int [] sample(int terminalSym){
 -		ArrayList<Integer > s=new ArrayList<Integer>();
 -		int state=sample(pi);
 -		int sym=sample(emit[state]);
 -		while(sym!=terminalSym){
 -			s.add(sym);
 -			state=sample(trans[state]);
 -			sym=sample(emit[state]);
 -		}
 -		
 -		int ans[]=new int [s.size()];
 -		for(int i=0;i<ans.length;i++){
 -			ans[i]=s.get(i);
 -		}
 -		return ans;
 -	}
 -	
 -	public int sample(double p[]){
 -		double r=Math.random();
 -		double sum=0;
 -		for(int i=0;i<p.length;i++){
 -			sum+=p[i];
 -			if(sum>=r){
 -				return i;
 -			}
 -		}
 -		return p.length-1;
 -	}
 -	
 -	public void train(int tagdata[][]){
 -		double trans_exp_cnt[][]=new double [trans.length][trans.length];
 -		double emit_exp_cnt[][]=new double[trans.length][emit[0].length];
 -		double start_exp_cnt[]=new double[trans.length];
 -		
 -		for(int i=0;i<tagdata.length;i++){
 -			start_exp_cnt[tagdata[i][0]]++;
 -			
 -			for(int j=0;j<tagdata[i].length;j++){
 -				if(j+1<tagdata[i].length){
 -					trans_exp_cnt[ tagdata[i][j] ] [ tagdata[i][j+1] ]++;
 -				}
 -				emit_exp_cnt[tagdata[i][j]][data[i][j]]++;
 -			}
 -			
 -		}
 -		
 -		//M
 -		addOneSmooth(emit_exp_cnt);
 -		for(int i=0;i<trans.length;i++){
 -		
 -			//transition probs
 -			double sum=0;
 -			for(int j=0;j<trans.length;j++){
 -				sum+=trans_exp_cnt[i][j];
 -			}
 -			if(sum==0){
 -				sum=1;
 -			}
 -			for(int j=0;j<trans[i].length;j++){
 -				trans[i][j]=trans_exp_cnt[i][j]/sum;
 -			}
 -			
 -			//emission probs
 -
 -			sum=0;
 -			for(int j=0;j<emit[i].length;j++){
 -				sum+=emit_exp_cnt[i][j];
 -			}
 -			if(sum==0){
 -				sum=1;
 -			}
 -			for(int j=0;j<emit[i].length;j++){
 -				emit[i][j]=emit_exp_cnt[i][j]/sum;
 -			}
 -
 -			
 -			//initial probs
 -			for(int j=0;j<pi.length;j++){
 -				pi[j]=start_exp_cnt[j];
 -			}
 -			l1normalize(pi);
 -		}
 -	}
 -	
 -	private void addOneSmooth(double a[][]){
 -		for(int i=0;i<a.length;i++){
 -			for(int j=0;j<a[i].length;j++){
 -				a[i][j]+=0.01;
 -			}
 -			//l1normalize(a[i]);
 -		}
 -	}
 -	
 -	public void PREM(){
 -		
 -		o.optimizeWithProjectedGradientDescent();
 -		
 -		double trans_exp_cnt[][]=new double [trans.length][trans.length];
 -		double emit_exp_cnt[][]=new double[trans.length][emit[0].length];
 -		double start_exp_cnt[]=new double[trans.length];
 -		
 -		o.loglikelihood=0;
 -		//E
 -		for(int sentNum=0;sentNum<data.length;sentNum++){
 -			
 -			double [][][] post=o.forwardBackward(sentNum);
 -			incrementExpCnt(post, data[sentNum], 
 -					trans_exp_cnt,
 -					emit_exp_cnt,
 -					start_exp_cnt);
 -			
 -			
 -			if(sentNum%100==0){
 -				System.out.print(".");
 -			}
 -			if(sentNum%1000==0){
 -				System.out.println(sentNum);
 -			}
 -			
 -		}
 -		
 -		System.out.println("Log likelihood: "+o.getValue());
 -		
 -		//M
 -		addOneSmooth(emit_exp_cnt);
 -		for(int i=0;i<trans.length;i++){
 -		
 -			//transition probs
 -			double sum=0;
 -			for(int j=0;j<trans.length;j++){
 -				sum+=trans_exp_cnt[i][j];
 -			}
 -			//avoid NAN
 -			if(sum==0){
 -				sum=1;
 -			}
 -			for(int j=0;j<trans[i].length;j++){
 -				trans[i][j]=trans_exp_cnt[i][j]/sum;
 -			}
 -			
 -			//emission probs
 -			
 -			sum=0;
 -			for(int j=0;j<emit[i].length;j++){
 -				sum+=emit_exp_cnt[i][j];
 -			}
 -			//avoid NAN
 -			if(sum==0){
 -				sum=1;
 -			}
 -			for(int j=0;j<emit[i].length;j++){
 -				emit[i][j]=emit_exp_cnt[i][j]/sum;
 -			}
 -			
 -			
 -			//initial probs
 -			for(int j=0;j<pi.length;j++){
 -				pi[j]=start_exp_cnt[j];
 -			}
 -			l1normalize(pi);
 -		}
 -		
 -	}
 -	
 -	public void computeMaxwt(double[][]maxwt, int[][] d){
 -
 -		for(int sentNum=0;sentNum<d.length;sentNum++){
 -			double post[][][]=forwardBackward(d[sentNum]);
 -			
 -			for(int n=0;n<post.length;n++){
 -				for(int i=0;i<trans.length;i++){
 -					double py=0;
 -					for(int j=0;j<trans.length;j++){
 -						py+=post[n][i][j];
 -					}
 -
 -					if(py>maxwt[i][d[sentNum][n]]){
 -						maxwt[i][d[sentNum][n]]=py;
 -					}
 -					
 -				}
 -			}
 -			
 -			//the last state
 -			int len=post.length;
 -			for(int i=0;i<trans.length;i++){
 -				double py=0;
 -				for(int j=0;j<trans.length;j++){
 -					py+=post[len-2][j][i];
 -				}
 -				
 -				if(py>maxwt[i][d[sentNum][len-1]]){
 -					maxwt[i][d[sentNum][len-1]]=py;
 -				}
 -				
 -			}
 -			
 -		}
 -	
 -	}
 -	
 -}//end of class
 diff --git a/gi/posterior-regularisation/prjava/src/hmm/HMMObjective.java b/gi/posterior-regularisation/prjava/src/hmm/HMMObjective.java deleted file mode 100644 index 70b6c966..00000000 --- a/gi/posterior-regularisation/prjava/src/hmm/HMMObjective.java +++ /dev/null @@ -1,351 +0,0 @@ -package hmm;
 -
 -import gnu.trove.TIntArrayList;
 -import optimization.gradientBasedMethods.ProjectedGradientDescent;
 -import optimization.gradientBasedMethods.ProjectedObjective;
 -import optimization.gradientBasedMethods.stats.OptimizerStats;
 -import optimization.linesearch.ArmijoLineSearchMinimizationAlongProjectionArc;
 -import optimization.linesearch.InterpolationPickFirstStep;
 -import optimization.linesearch.LineSearchMethod;
 -import optimization.projections.SimplexProjection;
 -import optimization.stopCriteria.CompositeStopingCriteria;
 -import optimization.stopCriteria.ProjectedGradientL2Norm;
 -import optimization.stopCriteria.StopingCriteria;
 -import optimization.stopCriteria.ValueDifference;
 -
 -public class HMMObjective extends ProjectedObjective{
 -
 -	
 -	private static final double GRAD_DIFF = 3;
 -	public static double INIT_STEP_SIZE=10;
 -	public static double VAL_DIFF=1000;
 -	
 -	private HMM hmm;
 -	double[] newPoint  ;
 -	
 -	//posterior[sent num][tok num][tag]=index into lambda
 -	private int posteriorMap[][][];
 -	//projection[word][tag].get(occurence)=index into lambda
 -	private TIntArrayList projectionMap[][];
 -
 -	//Size of the simplex
 -	public double scale=10;
 -	private SimplexProjection projection;
 -	
 -	private int wordFreq[];
 -	private static int MIN_FREQ=10;
 -	private int numWordsToProject=0;
 -	
 -	private int n_param;
 -	
 -	public  double loglikelihood;
 -	
 -	public HMMObjective(HMM h){
 -		hmm=h;
 -		
 -		countWords();
 -		buildMap();
 -
 -		gradient=new double [n_param];
 -		projection = new SimplexProjection(scale);
 -		newPoint  = new double[n_param];
 -		setInitialParameters(new double[n_param]);
 -		
 -	}
 -	
 -	/**@brief counts word frequency in the corpus
 -	 * 
 -	 */
 -	private void countWords(){
 -		wordFreq=new int [hmm.emit[0].length];
 -		for(int i=0;i<hmm.data.length;i++){
 -			for(int j=0;j<hmm.data[i].length;j++){
 -				wordFreq[hmm.data[i][j]]++;
 -			}
 -		}
 -	}
 -	
 -	/**@brief build posterior and projection indices
 -	 * 
 -	 */
 -	private void buildMap(){
 -		//number of sentences hidden states and words
 -		int n_states=hmm.trans.length;
 -		int n_words=hmm.emit[0].length;
 -		int n_sents=hmm.data.length;
 -		
 -		n_param=0;
 -		posteriorMap=new int[n_sents][][];
 -		projectionMap=new TIntArrayList[n_words][];
 -		for(int sentNum=0;sentNum<n_sents;sentNum++){
 -			int [] data=hmm.data[sentNum];
 -			posteriorMap[sentNum]=new int[data.length][n_states];
 -			numWordsToProject=0;
 -			for(int i=0;i<data.length;i++){
 -				int word=data[i];
 -				for(int state=0;state<n_states;state++){
 -					if(wordFreq[word]>MIN_FREQ){
 -						if(projectionMap[word]==null){
 -							projectionMap[word]=new TIntArrayList[n_states];
 -						}
 -			//			if(posteriorMap[sentNum][i]==null){
 -			//				posteriorMap[sentNum][i]=new int[n_states];
 -			//			}
 -						
 -						posteriorMap[sentNum][i][state]=n_param;
 -						if(projectionMap[word][state]==null){
 -							projectionMap[word][state]=new TIntArrayList();
 -							numWordsToProject++;
 -						}
 -						projectionMap[word][state].add(n_param);
 -						n_param++;
 -					}
 -					else{
 -						posteriorMap[sentNum][i][state]=-1;
 -					}
 -				}
 -			}
 -		}
 -	}
 -	
 -	@Override
 -	public double[] projectPoint(double[] point) {
 -		// TODO Auto-generated method stub
 -		for(int i=0;i<projectionMap.length;i++){
 -			
 -			if(projectionMap[i]==null){
 -				//this word is not constrained
 -				continue;
 -			}
 -			
 -			for(int j=0;j<projectionMap[i].length;j++){
 -				TIntArrayList instances=projectionMap[i][j];
 -				double[] toProject = new double[instances.size()];
 -				
 -				for (int k = 0; k < toProject.length; k++) {
 -					//	System.out.print(instances.get(k) + " ");
 -						toProject[k] = point[instances.get(k)];
 -				}
 -				
 -				projection.project(toProject);
 -				for (int k = 0; k < toProject.length; k++) {
 -					newPoint[instances.get(k)]=toProject[k];
 -				}
 -			}
 -		}
 -		return newPoint;
 -	}
 -
 -	@Override
 -	public double[] getGradient() {
 -		// TODO Auto-generated method stub
 -		gradientCalls++;
 -		return gradient;
 -	}
 -
 -	@Override
 -	public double getValue() {
 -		// TODO Auto-generated method stub
 -		functionCalls++;
 -		return loglikelihood;
 -	}
 -	
 -
 -	@Override
 -	public String toString() {
 -		// TODO Auto-generated method stub
 -		StringBuffer sb = new StringBuffer();
 -		for (int i = 0; i < parameters.length; i++) {
 -			sb.append(parameters[i]+" ");
 -			if(i%100==0){
 -				sb.append("\n");
 -			}
 -		}
 -		sb.append("\n");
 -		/*
 -		for (int i = 0; i < gradient.length; i++) {
 -			sb.append(gradient[i]+" ");
 -			if(i%100==0){
 -				sb.append("\n");
 -			}
 -		}
 -		sb.append("\n");
 -		*/
 -		return sb.toString();
 -	}
 -
 -	
 -	/**
 -	 * @param seq
 -	 * @return posterior probability of each transition
 -	 */
 -	public double [][][]forwardBackward(int sentNum){
 -		int [] seq=hmm.data[sentNum];
 -		int n_states=hmm.trans.length;
 -		double a[][]=new double [seq.length][n_states];
 -		double b[][]=new double [seq.length][n_states];
 -		
 -		int len=seq.length;
 -		
 -		boolean  constrained=
 -			(projectionMap[seq[0]]!=null);
 -
 -		//initialize the first step
 -		for(int i=0;i<n_states;i++){
 -			a[0][i]=hmm.emit[i][seq[0]]*hmm.pi[i];
 -			if(constrained){
 -				a[0][i]*=
 -					Math.exp(- parameters[ posteriorMap[sentNum][0][i] ] );
 -			}
 -			b[len-1][i]=1;
 -		}
 -		
 -		loglikelihood+=Math.log(hmm.l1norm(a[0]));		
 -		hmm.l1normalize(a[0]);
 -		hmm.l1normalize(b[len-1]);
 -		
 -		//forward
 -		for(int n=1;n<len;n++){
 -			
 -			constrained=
 -				(projectionMap[seq[n]]!=null);
 -			
 -			for(int i=0;i<n_states;i++){
 -				for(int j=0;j<n_states;j++){
 -					a[n][i]+=hmm.trans[j][i]*a[n-1][j];
 -				}
 -				a[n][i]*=hmm.emit[i][seq[n]];
 -				
 -				if(constrained){
 -					a[n][i]*=
 -						Math.exp(- parameters[ posteriorMap[sentNum][n][i] ] );
 -				}
 -				
 -			}
 -			loglikelihood+=Math.log(hmm.l1norm(a[n]));
 -			hmm.l1normalize(a[n]);
 -		}
 -		
 -		//temp variable for e^{-\lambda}
 -		double factor=1;
 -		//backward
 -		for(int n=len-2;n>=0;n--){
 -			
 -			constrained=
 -				(projectionMap[seq[n+1]]!=null);
 -			
 -			for(int i=0;i<n_states;i++){
 -				for(int j=0;j<n_states;j++){
 -					
 -					if(constrained){
 -						factor=
 -							Math.exp(- parameters[ posteriorMap[sentNum][n+1][j] ] );
 -					}else{
 -						factor=1;
 -					}
 -					
 -					b[n][i]+=hmm.trans[i][j]*b[n+1][j]*hmm.emit[j][seq[n+1]]*factor;
 -					
 -				}
 -			}
 -			hmm.l1normalize(b[n]);
 -		}
 -		
 -		//expected transition 
 -		double p[][][]=new double [seq.length][n_states][n_states];
 -		for(int n=0;n<len-1;n++){
 -			
 -			constrained=
 -				(projectionMap[seq[n+1]]!=null);
 -			
 -			for(int i=0;i<n_states;i++){
 -				for(int j=0;j<n_states;j++){
 -					
 -					if(constrained){
 -						factor=
 -							Math.exp(- parameters[ posteriorMap[sentNum][n+1][j] ] );
 -					}else{
 -						factor=1;
 -					}
 -					
 -					p[n][i][j]=a[n][i]*hmm.trans[i][j]*
 -						hmm.emit[j][seq[n+1]]*b[n+1][j]*factor;
 -					
 -				}
 -			}
 -
 -			hmm.l1normalize(p[n]);
 -		}
 -		return p;
 -	}
 -
 -	public void optimizeWithProjectedGradientDescent(){
 -		LineSearchMethod ls =
 -			new ArmijoLineSearchMinimizationAlongProjectionArc
 -				(new InterpolationPickFirstStep(INIT_STEP_SIZE));
 -		
 -		OptimizerStats stats = new OptimizerStats();
 -		
 -		
 -		ProjectedGradientDescent optimizer = new ProjectedGradientDescent(ls);
 -		StopingCriteria stopGrad = new ProjectedGradientL2Norm(GRAD_DIFF);
 -		StopingCriteria stopValue = new ValueDifference(VAL_DIFF);
 -		CompositeStopingCriteria compositeStop = new CompositeStopingCriteria();
 -		compositeStop.add(stopGrad);
 -		compositeStop.add(stopValue);
 -		
 -		optimizer.setMaxIterations(10);
 -		updateFunction();
 -		boolean succed = optimizer.optimize(this,stats,compositeStop);
 -		System.out.println("Ended optimzation Projected Gradient Descent\n" + stats.prettyPrint(1));
 -		if(succed){
 -			System.out.println("Ended optimization in " + optimizer.getCurrentIteration());
 -		}else{
 -			System.out.println("Failed to optimize");
 -		}
 -	}
 -	
 -	@Override
 -	public void setParameters(double[] params) {
 -		super.setParameters(params);
 -		updateFunction();
 -	}
 -	
 -	private void updateFunction(){
 -		
 -		updateCalls++;
 -		loglikelihood=0;
 -	
 -		for(int sentNum=0;sentNum<hmm.data.length;sentNum++){
 -			double [][][]p=forwardBackward(sentNum);
 -			
 -			for(int n=0;n<p.length-1;n++){
 -				for(int i=0;i<p[n].length;i++){
 -					if(projectionMap[hmm.data[sentNum][n]]!=null){
 -						double posterior=0;
 -						for(int j=0;j<p[n][i].length;j++){
 -							posterior+=p[n][i][j];
 -						}
 -						gradient[posteriorMap[sentNum][n][i]]=-posterior;
 -					}
 -				}
 -			}
 -			
 -			//the last state
 -			int n=p.length-2;
 -			for(int i=0;i<p[n].length;i++){
 -				if(projectionMap[hmm.data[sentNum][n+1]]!=null){
 -					
 -					double posterior=0;
 -					for(int j=0;j<p[n].length;j++){
 -						posterior+=p[n][j][i];
 -					}
 -					gradient[posteriorMap[sentNum][n+1][i]]=-posterior;
 -				
 -				}
 -			}	
 -		}
 -		
 -	}
 -	
 -}
 diff --git a/gi/posterior-regularisation/prjava/src/hmm/POS.java b/gi/posterior-regularisation/prjava/src/hmm/POS.java deleted file mode 100644 index bdcbc683..00000000 --- a/gi/posterior-regularisation/prjava/src/hmm/POS.java +++ /dev/null @@ -1,120 +0,0 @@ -package hmm;
 -
 -import java.io.File;
 -import java.io.FileNotFoundException;
 -import java.io.IOException;
 -import java.io.PrintStream;
 -import java.util.HashMap;
 -
 -import data.Corpus;
 -
 -public class POS {
 -
 -	//public String trainFilename="../posdata/en_train.conll";
 -	public static String trainFilename="../posdata/small_train.txt";
 -//	public static String trainFilename="../posdata/en_test.conll";
 -//	public static String trainFilename="../posdata/trial1.txt";
 -	
 -	public static String testFilename="../posdata/en_test.conll";
 -	//public static String testFilename="../posdata/trial1.txt";
 -	
 -	public static String predFilename="../posdata/en_test.predict.conll";
 -	public static String modelFilename="../posdata/posModel.out";
 -	public static final int ITER=20;
 -	public static final int N_STATE=30;
 -	
 -	public static void main(String[] args) {
 -		//POS p=new POS();
 -		//POS p=new POS(true);
 -		try {
 -			PRPOS();
 -		} catch (FileNotFoundException e) {
 -			e.printStackTrace();
 -		} catch (IOException e) {
 -			e.printStackTrace();
 -		}
 -	}
 -
 -	
 -	public POS() throws FileNotFoundException, IOException{
 -		Corpus c= new Corpus(trainFilename);
 -		//size of vocabulary +1 for unknown tokens
 -		HMM hmm =new HMM(N_STATE, c.getVocabSize()+1,c.getAllData());
 -		for(int i=0;i<ITER;i++){
 -			System.out.println("Iter"+i);
 -			hmm.EM();
 -			if((i+1)%10==0){
 -				hmm.writeModel(modelFilename+i);
 -			}
 -		}
 -
 -		hmm.writeModel(modelFilename);
 -		
 -		Corpus test=new Corpus(testFilename,c.vocab);
 -		
 -		PrintStream ps= io.FileUtil.printstream(new File(predFilename));
 -		
 -		int [][]data=test.getAllData();
 -		for(int i=0;i<data.length;i++){
 -			int []tag=hmm.viterbi(data[i]);
 -			String sent[]=test.get(i);
 -			for(int j=0;j<data[i].length;j++){
 -				ps.println(sent[j]+"\t"+tag[j]);
 -			}
 -			ps.println();
 -		}
 -		ps.close();
 -	}
 -	
 -	//POS induction with L1/Linf constraints
 -	public static void PRPOS() throws FileNotFoundException, IOException{
 -		Corpus c= new Corpus(trainFilename);
 -		//size of vocabulary +1 for unknown tokens
 -		HMM hmm =new HMM(N_STATE, c.getVocabSize()+1,c.getAllData());
 -		hmm.o=new HMMObjective(hmm);
 -		for(int i=0;i<ITER;i++){
 -			System.out.println("Iter: "+i);
 -			hmm.PREM();
 -			if((i+1)%10==0){
 -				hmm.writeModel(modelFilename+i);
 -			}
 -		}
 -
 -		hmm.writeModel(modelFilename);
 -	}
 -	
 -	
 -	public POS(boolean supervised) throws FileNotFoundException, IOException{
 -		Corpus c= new Corpus(trainFilename);
 -		//size of vocabulary +1 for unknown tokens
 -		HMM hmm =new HMM(c.tagVocab.size() , c.getVocabSize()+1,c.getAllData());
 -		hmm.train(c.getTagData());
 -
 -		hmm.writeModel(modelFilename);
 -		
 -		Corpus test=new Corpus(testFilename,c.vocab);
 -		
 -		HashMap<String, Integer>tagVocab=
 -			(HashMap<String, Integer>) io.SerializedObjects.readSerializedObject(Corpus.tagalphaFilename);
 -		String [] tagdict=new String [tagVocab.size()+1];
 -		for(String key:tagVocab.keySet()){
 -			tagdict[tagVocab.get(key)]=key;
 -		}
 -		tagdict[tagdict.length-1]=Corpus.UNK_TOK;
 -		
 -		System.out.println(c.vocab.get("<e>"));
 -		
 -		PrintStream ps= io.FileUtil.printstream(new File(predFilename));
 -		
 -		int [][]data=test.getAllData();
 -		for(int i=0;i<data.length;i++){
 -			int []tag=hmm.viterbi(data[i]);
 -			String sent[]=test.get(i);
 -			for(int j=0;j<data[i].length;j++){
 -				ps.println(sent[j]+"\t"+tagdict[tag[j]]);
 -			}
 -			ps.println();
 -		}
 -		ps.close();
 -	}
 -}
 diff --git a/gi/posterior-regularisation/prjava/src/io/FileUtil.java b/gi/posterior-regularisation/prjava/src/io/FileUtil.java deleted file mode 100644 index 6720d087..00000000 --- a/gi/posterior-regularisation/prjava/src/io/FileUtil.java +++ /dev/null @@ -1,48 +0,0 @@ -package io;
 -import java.util.*;
 -import java.util.zip.GZIPInputStream;
 -import java.util.zip.GZIPOutputStream;
 -import java.io.*;
 -public class FileUtil 
 -{
 -	public static BufferedReader reader(File file) throws FileNotFoundException, IOException
 -	{
 -		if (file.getName().endsWith(".gz"))
 -			return new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(file)), "UTF8"));
 -		else
 -			return new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF8"));
 -	}
 -	
 -	public static PrintStream printstream(File file) throws FileNotFoundException, IOException
 -	{
 -		if (file.getName().endsWith(".gz"))
 -			return new PrintStream(new GZIPOutputStream(new FileOutputStream(file)), true, "UTF8");
 -		else
 -			return new PrintStream(new FileOutputStream(file), true, "UTF8");
 -	}
 -
 -	public static Scanner openInFile(String filename)
 -	{
 -		Scanner localsc=null;
 -		try
 -		{
 -			localsc=new Scanner(new FileInputStream(filename), "UTF8");
 -
 -		}catch(IOException ioe){
 -			System.out.println(ioe.getMessage());
 -		}
 -		return localsc;
 -	}
 -	
 -	public static FileInputStream openInputStream(String infilename)
 -	{
 -		FileInputStream fis=null;
 -		try {
 -			fis = new FileInputStream(infilename);
 -			
 -		} catch (IOException ioe) {
 -			System.out.println(ioe.getMessage());
 -		}
 -		return fis;
 -	}
 -}
 diff --git a/gi/posterior-regularisation/prjava/src/io/SerializedObjects.java b/gi/posterior-regularisation/prjava/src/io/SerializedObjects.java deleted file mode 100644 index d1631b51..00000000 --- a/gi/posterior-regularisation/prjava/src/io/SerializedObjects.java +++ /dev/null @@ -1,83 +0,0 @@ -package io; - - - -import java.io.BufferedInputStream; -import java.io.BufferedOutputStream; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.ObjectInput; -import java.io.ObjectInputStream; -import java.io.ObjectOutput; -import java.io.ObjectOutputStream; -import java.io.OutputStream; - -public class SerializedObjects -{ -	public static void writeSerializedObject(Object object, String outFile) -	{ -		ObjectOutput output = null; -	    try{ -	      //use buffering -	      OutputStream file = new FileOutputStream(outFile); -	      OutputStream buffer = new BufferedOutputStream( file ); -	      output = new ObjectOutputStream( buffer ); -	      output.writeObject(object); -	      buffer.close(); -	      file.close(); -	    } -	    catch(IOException ex){ -	    	ex.printStackTrace(); -	    } -	    finally{ -	      try { -	        if (output != null) { -	          //flush and close "output" and its underlying streams -	          output.close(); -	        } -	      } -	      catch (IOException ex ){ -	    	  ex.printStackTrace(); -	      } -	    } -	} -	 -	public static Object readSerializedObject(String inputFile) -	{ -		ObjectInput input = null; -	    Object recoveredObject=null; -		try{ -	      //use buffering -	      InputStream file = new FileInputStream(inputFile); -	      InputStream buffer = new BufferedInputStream(file); -	      input = new ObjectInputStream(buffer); -	      //deserialize the List -	      recoveredObject = input.readObject(); -	    } -	    catch(IOException ex){ -	    	ex.printStackTrace(); -	    } -	    catch (ClassNotFoundException ex){ -	    	ex.printStackTrace(); -	    } -	    catch(Exception ex) -	    { -	    	ex.printStackTrace(); -	    } -	    finally{ -	      try { -	        if ( input != null ) { -	          //close "input" and its underlying streams -	          input.close(); -	        } -	      } -	      catch (IOException ex){ -	    	  ex.printStackTrace(); -	      } -	    } -	    return recoveredObject; -	 } -	 -}
\ No newline at end of file diff --git a/gi/posterior-regularisation/prjava/src/optimization/examples/GeneralizedRosenbrock.java b/gi/posterior-regularisation/prjava/src/optimization/examples/GeneralizedRosenbrock.java deleted file mode 100644 index 25fa7f09..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/examples/GeneralizedRosenbrock.java +++ /dev/null @@ -1,110 +0,0 @@ -package optimization.examples; - - -import optimization.gradientBasedMethods.ConjugateGradient; -import optimization.gradientBasedMethods.GradientDescent; -import optimization.gradientBasedMethods.LBFGS; -import optimization.gradientBasedMethods.Objective; -import optimization.gradientBasedMethods.Optimizer; -import optimization.gradientBasedMethods.stats.OptimizerStats; -import optimization.linesearch.ArmijoLineSearchMinimization; -import optimization.linesearch.LineSearchMethod; -import optimization.stopCriteria.GradientL2Norm; -import optimization.stopCriteria.StopingCriteria; -import optimization.util.MathUtils; - -/** - *  - * @author javg - * f(x) = \sum_{i=1}^{N-1} \left[ (1-x_i)^2+ 100 (x_{i+1} - x_i^2 )^2 \right] \quad \forall x\in\mathbb{R}^N. - */ -public class GeneralizedRosenbrock extends Objective{ - -	 -	 -	public GeneralizedRosenbrock(int dimensions){ -		parameters = new double[dimensions];		 -		java.util.Arrays.fill(parameters, 0); -		gradient = new double[dimensions]; -		 -	} -	 -	public GeneralizedRosenbrock(int dimensions, double[] params){ -		parameters = params;	 -		gradient = new double[dimensions]; -	} -	 -	 -	public double getValue() { -		functionCalls++; -		double value = 0; -		for(int i = 0; i < parameters.length-1; i++){ -			value += MathUtils.square(1-parameters[i]) + 100*MathUtils.square(parameters[i+1] - MathUtils.square(parameters[i])); -		} -		 -		return value; -	} - -	/** -	 * gx = -2(1-x) -2x200(y-x^2) -	 * gy = 200(y-x^2) -	 */ -	public double[] getGradient() { -		gradientCalls++; -		java.util.Arrays.fill(gradient,0); -		for(int i = 0; i < parameters.length-1; i++){ -			gradient[i]+=-2*(1-parameters[i]) - 400*parameters[i]*(parameters[i+1] - MathUtils.square(parameters[i])); -			gradient[i+1]+=200*(parameters[i+1] - MathUtils.square(parameters[i])); -		}	 -		return gradient; -	} - -	 - -	 - -	 -	 -	public String toString(){ -		String  res =""; -		for(int i = 0; i < parameters.length; i++){ -			res += "P" + i+ " " + parameters[i]; -		} -		res += " Value " + getValue(); -		return res; -	} -	 -	public static void main(String[] args) { -		 -		GeneralizedRosenbrock o = new GeneralizedRosenbrock(2); -		System.out.println("Starting optimization " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]); -		; - -		System.out.println("Doing Gradient descent"); -		//LineSearchMethod wolfe = new WolfRuleLineSearch(new InterpolationPickFirstStep(1),100,0.001,0.1); -		StopingCriteria stop = new GradientL2Norm(0.001);		 -		LineSearchMethod ls = new ArmijoLineSearchMinimization(); -		Optimizer optimizer = new GradientDescent(ls);		 -		OptimizerStats stats = new OptimizerStats(); -		optimizer.setMaxIterations(1000); -		boolean succed = optimizer.optimize(o,stats, stop); -		System.out.println("Suceess " + succed + "/n"+stats.prettyPrint(1)); -		System.out.println("Doing Conjugate Gradient descent"); -		o = new GeneralizedRosenbrock(2); -	//	wolfe = new WolfRuleLineSearch(new InterpolationPickFirstStep(1),100,0.001,0.1); -		optimizer = new ConjugateGradient(ls); -		stats = new OptimizerStats(); -		optimizer.setMaxIterations(1000); -		succed = optimizer.optimize(o,stats,stop); -		System.out.println("Suceess " + succed + "/n"+stats.prettyPrint(1)); -		System.out.println("Doing Quasi newton descent"); -		o = new GeneralizedRosenbrock(2); -		optimizer = new LBFGS(ls,10); -		stats = new OptimizerStats(); -		optimizer.setMaxIterations(1000); -		succed = optimizer.optimize(o,stats,stop); -		System.out.println("Suceess " + succed + "/n"+stats.prettyPrint(1)); - -	} -	 -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/examples/x2y2.java b/gi/posterior-regularisation/prjava/src/optimization/examples/x2y2.java deleted file mode 100644 index f087681e..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/examples/x2y2.java +++ /dev/null @@ -1,128 +0,0 @@ -package optimization.examples; - - -import optimization.gradientBasedMethods.ConjugateGradient; - -import optimization.gradientBasedMethods.GradientDescent; -import optimization.gradientBasedMethods.LBFGS; -import optimization.gradientBasedMethods.Objective; -import optimization.gradientBasedMethods.stats.OptimizerStats; -import optimization.linesearch.GenericPickFirstStep; -import optimization.linesearch.LineSearchMethod; -import optimization.linesearch.WolfRuleLineSearch; -import optimization.stopCriteria.GradientL2Norm; -import optimization.stopCriteria.StopingCriteria; - - -/** - * @author javg - * - */ -public class x2y2 extends Objective{ - -	 -	//Implements function ax2+ by2  -	double a, b; -	public x2y2(double a, double b){ -		this.a = a; -		this.b = b; -		parameters = new double[2]; -		parameters[0] = 4; -		parameters[1] = 4; -		gradient = new double[2]; -	} -	 -	public double getValue() { -		functionCalls++; -		return a*parameters[0]*parameters[0]+b*parameters[1]*parameters[1]; -	} - -	public double[] getGradient() { -		gradientCalls++; -		gradient[0]=2*a*parameters[0]; -		gradient[1]=2*b*parameters[1]; -		return gradient; -//		if(debugLevel >=2){ -//			double[] numericalGradient = DebugHelpers.getNumericalGradient(this, parameters, 0.000001); -//			for(int i = 0; i < parameters.length; i++){ -//				double diff = Math.abs(gradient[i]-numericalGradient[i]); -//				if(diff > 0.00001){ -//					System.out.println("Numerical Gradient does not match"); -//					System.exit(1); -//				} -//			} -//		} -	} - -	 -	 -	public void optimizeWithGradientDescent(LineSearchMethod ls, OptimizerStats stats, x2y2 o){ -		GradientDescent optimizer = new GradientDescent(ls); -		StopingCriteria stop = new GradientL2Norm(0.001); -//		optimizer.setGradientConvergenceValue(0.001); -		optimizer.setMaxIterations(100); -		boolean succed = optimizer.optimize(o,stats,stop); -		System.out.println("Ended optimzation Gradient Descent\n" + stats.prettyPrint(1)); -		System.out.println("Solution: " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]); -		if(succed){ -			System.out.println("Ended optimization in " + optimizer.getCurrentIteration()); -		}else{ -			System.out.println("Failed to optimize"); -		} -	} -	 -	public void optimizeWithConjugateGradient(LineSearchMethod ls, OptimizerStats stats, x2y2 o){ -		ConjugateGradient optimizer = new ConjugateGradient(ls); -		StopingCriteria stop = new GradientL2Norm(0.001); - -		optimizer.setMaxIterations(10); -		boolean succed = optimizer.optimize(o,stats,stop); -		System.out.println("Ended optimzation Conjugate Gradient\n" + stats.prettyPrint(1)); -		System.out.println("Solution: " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]); -		if(succed){ -			System.out.println("Ended optimization in " + optimizer.getCurrentIteration()); -		}else{ -			System.out.println("Failed to optimize"); -		} -	} -	 -	public void optimizeWithLBFGS(LineSearchMethod ls, OptimizerStats stats, x2y2 o){ -		LBFGS optimizer = new LBFGS(ls,10); -		StopingCriteria stop = new GradientL2Norm(0.001); -		optimizer.setMaxIterations(10); -		boolean succed = optimizer.optimize(o,stats,stop); -		System.out.println("Ended optimzation LBFGS\n" + stats.prettyPrint(1)); -		System.out.println("Solution: " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]); -		if(succed){ -			System.out.println("Ended optimization in " + optimizer.getCurrentIteration()); -		}else{ -			System.out.println("Failed to optimize"); -		} -	} -	 -	public static void main(String[] args) { -		x2y2 o = new x2y2(1,10); -		System.out.println("Starting optimization " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]); -		o.setDebugLevel(4); -		LineSearchMethod wolfe = new WolfRuleLineSearch(new GenericPickFirstStep(1),0.001,0.9);; -//		LineSearchMethod ls = new ArmijoLineSearchMinimization(); -		OptimizerStats stats = new OptimizerStats(); -		o.optimizeWithGradientDescent(wolfe, stats, o); -		o = new x2y2(1,10); -		System.out.println("Starting optimization " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]); -//		ls = new ArmijoLineSearchMinimization(); -		stats = new OptimizerStats(); -		o.optimizeWithConjugateGradient(wolfe, stats, o); -		o = new x2y2(1,10); -		System.out.println("Starting optimization " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]); -//		ls = new ArmijoLineSearchMinimization(); -		stats = new OptimizerStats(); -		o.optimizeWithLBFGS(wolfe, stats, o);	 -	} -	 -	public String toString(){ -		return "P1: " + parameters[0] + " P2: " + parameters[1] + " value " + getValue(); -	} -	 -	 -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/examples/x2y2WithConstraints.java b/gi/posterior-regularisation/prjava/src/optimization/examples/x2y2WithConstraints.java deleted file mode 100644 index 391775b7..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/examples/x2y2WithConstraints.java +++ /dev/null @@ -1,127 +0,0 @@ -package optimization.examples; - - -import optimization.gradientBasedMethods.ProjectedGradientDescent; -import optimization.gradientBasedMethods.ProjectedObjective; -import optimization.gradientBasedMethods.stats.OptimizerStats; -import optimization.linesearch.ArmijoLineSearchMinimizationAlongProjectionArc; -import optimization.linesearch.InterpolationPickFirstStep; -import optimization.linesearch.LineSearchMethod; -import optimization.projections.BoundsProjection; -import optimization.projections.Projection; -import optimization.projections.SimplexProjection; -import optimization.stopCriteria.CompositeStopingCriteria; -import optimization.stopCriteria.GradientL2Norm; -import optimization.stopCriteria.ProjectedGradientL2Norm; -import optimization.stopCriteria.StopingCriteria; -import optimization.stopCriteria.ValueDifference; - - -/** - * @author javg - *  - *  - *ax2+ b(y2 -displacement) - */ -public class x2y2WithConstraints extends ProjectedObjective{ - - -	double a, b; -	double dx; -	double dy; -	Projection projection; -	 -	 -	public x2y2WithConstraints(double a, double b, double[] params, double dx, double dy, Projection proj){ -		//projection = new BoundsProjection(0.2,Double.MAX_VALUE); -		super(); -		projection = proj;	 -		this.a = a; -		this.b = b; -		this.dx = dx; -		this.dy = dy; -		setInitialParameters(params); -		System.out.println("Function " +a+"(x-"+dx+")^2 + "+b+"(y-"+dy+")^2"); -		System.out.println("Gradient " +(2*a)+"(x-"+dx+") ; "+(b*2)+"(y-"+dy+")"); -		printParameters(); -		projection.project(parameters); -		printParameters(); -		gradient = new double[2]; -	} -	 -	public double getValue() { -		functionCalls++; -		return a*(parameters[0]-dx)*(parameters[0]-dx)+b*((parameters[1]-dy)*(parameters[1]-dy)); -	} - -	public double[] getGradient() { -		if(gradient == null){ -			gradient = new double[2]; -		} -		gradientCalls++; -		gradient[0]=2*a*(parameters[0]-dx); -		gradient[1]=2*b*(parameters[1]-dy); -		return gradient; -	} -	 -	 -	public double[] projectPoint(double[] point) { -		double[] newPoint = point.clone(); -		projection.project(newPoint); -		return newPoint; -	}	 -	 -	public void optimizeWithProjectedGradientDescent(LineSearchMethod ls, OptimizerStats stats, x2y2WithConstraints o){ -		ProjectedGradientDescent optimizer = new ProjectedGradientDescent(ls); -		StopingCriteria stopGrad = new ProjectedGradientL2Norm(0.001); -		StopingCriteria stopValue = new ValueDifference(0.001); -		CompositeStopingCriteria compositeStop = new CompositeStopingCriteria(); -		compositeStop.add(stopGrad); -		compositeStop.add(stopValue); -		 -		optimizer.setMaxIterations(5); -		boolean succed = optimizer.optimize(o,stats,compositeStop); -		System.out.println("Ended optimzation Projected Gradient Descent\n" + stats.prettyPrint(1)); -		System.out.println("Solution: " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]); -		if(succed){ -			System.out.println("Ended optimization in " + optimizer.getCurrentIteration()); -		}else{ -			System.out.println("Failed to optimize"); -		} -	} -	 -	 -	 -	public String toString(){ -		 -		return "P1: " + parameters[0] + " P2: " + parameters[1] + " value " + getValue() + " grad (" + getGradient()[0] + ":" + getGradient()[1]+")"; -	} -	 -	public static void main(String[] args) { -		double a = 1; -		double b=1; -		double x0 = 0; -		double y0  =1; -		double dx = 0.5; -		double dy = 0.5	; -		double [] parameters = new double[2]; -		parameters[0] = x0; -		parameters[1] = y0; -		x2y2WithConstraints o = new x2y2WithConstraints(a,b,parameters,dx,dy, new SimplexProjection(0.5)); -		System.out.println("Starting optimization " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1] + " a " + a + " b "+b ); -		o.setDebugLevel(4); -		 -		LineSearchMethod ls = new ArmijoLineSearchMinimizationAlongProjectionArc(new InterpolationPickFirstStep(1)); -		 -		OptimizerStats stats = new OptimizerStats(); -		o.optimizeWithProjectedGradientDescent(ls, stats, o); -		 -//		o = new x2y2WithConstraints(a,b,x0,y0,dx,dy); -//		stats = new OptimizerStats(); -//		o.optimizeWithSpectralProjectedGradientDescent(stats, o); -	} -	 -	 -	 -	 -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/AbstractGradientBaseMethod.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/AbstractGradientBaseMethod.java deleted file mode 100644 index 2fcb7990..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/AbstractGradientBaseMethod.java +++ /dev/null @@ -1,120 +0,0 @@ -package optimization.gradientBasedMethods; - -import optimization.gradientBasedMethods.stats.OptimizerStats; -import optimization.linesearch.DifferentiableLineSearchObjective; -import optimization.linesearch.LineSearchMethod; -import optimization.stopCriteria.StopingCriteria; -import optimization.util.MathUtils; - -/** - *  - * @author javg - * - */ -public abstract class AbstractGradientBaseMethod implements Optimizer{ -	 -	protected int maxNumberOfIterations=10000; -	 -	 -	 -	protected int currentProjectionIteration; -	protected double currValue;	 -	protected double previousValue = Double.MAX_VALUE;; -	protected double step; -	protected double[] gradient; -	public double[] direction; -	 -	//Original values -	protected double originalGradientL2Norm; -	 -	protected LineSearchMethod lineSearch; -	DifferentiableLineSearchObjective lso; -	 -	 -	public void reset(){ -		direction = null; -		gradient = null; -		previousValue = Double.MAX_VALUE; -		currentProjectionIteration = 0; -		originalGradientL2Norm = 0; -		step = 0; -		currValue = 0; -	} -	 -	public void initializeStructures(Objective o,OptimizerStats stats, StopingCriteria stop){ -		lso =   new DifferentiableLineSearchObjective(o); -	} -	public void updateStructuresBeforeStep(Objective o,OptimizerStats stats, StopingCriteria stop){ -	} -	 -	public void updateStructuresAfterStep(Objective o,OptimizerStats stats, StopingCriteria stop){ -	} -	 -	public boolean optimize(Objective o,OptimizerStats stats, StopingCriteria stop){ -		//Initialize structures -			 -		stats.collectInitStats(this, o); -		direction = new double[o.getNumParameters()]; -		initializeStructures(o, stats, stop); -		for (currentProjectionIteration = 1; currentProjectionIteration < maxNumberOfIterations; currentProjectionIteration++){ -			//System.out.println("\tgradient descent iteration " + currentProjectionIteration); -			//System.out.print("\tparameters:" ); -			//o.printParameters(); -			previousValue = currValue; -			currValue = o.getValue(); -			gradient = o.getGradient(); -			if(stop.stopOptimization(o)){ -				stats.collectFinalStats(this, o); -				return true; -			}	 -			 -			getDirection(); -			if(MathUtils.dotProduct(gradient, direction) > 0){ -				System.out.println("Not a descent direction"); -				System.out.println(" current stats " + stats.prettyPrint(1)); -				System.exit(-1); -			} -			updateStructuresBeforeStep(o, stats, stop); -			lso.reset(direction); -			step = lineSearch.getStepSize(lso); -			//System.out.println("\t\tLeave with step: " + step); -			if(step==-1){ -				System.out.println("Failed to find step"); -				stats.collectFinalStats(this, o); -				return false;		 -			} -			updateStructuresAfterStep( o, stats,  stop); -//			previousValue = currValue; -//			currValue = o.getValue(); -//			gradient = o.getGradient(); -			stats.collectIterationStats(this, o); -		} -		stats.collectFinalStats(this, o); -		return false; -	} -	 -	 -	public int getCurrentIteration() { -		return currentProjectionIteration; -	} - -	 -	/** -	 * Method specific -	 */ -	public abstract double[] getDirection(); - -	public double getCurrentStep() { -		return step; -	} - - - -	public void setMaxIterations(int max) { -		maxNumberOfIterations = max; -	} - -	public double getCurrentValue() { -		return currValue; -	} -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ConjugateGradient.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ConjugateGradient.java deleted file mode 100644 index 28295729..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ConjugateGradient.java +++ /dev/null @@ -1,92 +0,0 @@ -package optimization.gradientBasedMethods; - -import optimization.gradientBasedMethods.stats.OptimizerStats; -import optimization.linesearch.DifferentiableLineSearchObjective; -import optimization.linesearch.LineSearchMethod; -import optimization.stopCriteria.StopingCriteria; -import optimization.util.MathUtils; - - - -public class ConjugateGradient extends AbstractGradientBaseMethod{ -	 -	 -	double[] previousGradient; -	double[] previousDirection; - -	public ConjugateGradient(LineSearchMethod lineSearch) { -		this.lineSearch = lineSearch; -	} -	 -	public void reset(){ -		super.reset(); -		java.util.Arrays.fill(previousDirection, 0); -		java.util.Arrays.fill(previousGradient, 0); -	} -	 -	public void initializeStructures(Objective o,OptimizerStats stats, StopingCriteria stop){ -		super.initializeStructures(o, stats, stop); -		previousGradient = new double[o.getNumParameters()]; -		previousDirection = new double[o.getNumParameters()]; -	} -	public void updateStructuresBeforeStep(Objective o,OptimizerStats stats, StopingCriteria stop){ -		System.arraycopy(gradient, 0, previousGradient, 0, gradient.length); -		System.arraycopy(direction, 0, previousDirection, 0, direction.length);	 -	} -	 -//	public boolean optimize(Objective o,OptimizerStats stats, StopingCriteria stop){ -//		DifferentiableLineSearchObjective lso = new DifferentiableLineSearchObjective(o); -//		stats.collectInitStats(this, o); -//		direction = new double[o.getNumParameters()]; -//		initializeStructures(o, stats, stop); -//		for (currentProjectionIteration = 0; currentProjectionIteration < maxNumberOfIterations; currentProjectionIteration++){ -//			previousValue = currValue; -//			currValue = o.getValue(); -//			gradient =o.getGradient(); -//			if(stop.stopOptimization(gradient)){ -//				stats.collectFinalStats(this, o); -//				return true; -//			} -//			getDirection(); -//			updateStructures(o, stats, stop); -//			lso.reset(direction); -//			step = lineSearch.getStepSize(lso);	 -//			if(step==-1){ -//				System.out.println("Failed to find a step size"); -//				System.out.println("Failed to find step"); -//				stats.collectFinalStats(this, o); -//				return false;	 -//			} -//			 -//			stats.collectIterationStats(this, o); -//		} -//		stats.collectFinalStats(this, o); -//		return false; -//	} -	 -	public double[] getDirection(){ -		direction = MathUtils.negation(gradient); -		if(currentProjectionIteration != 1){ -			//Using Polak-Ribiere method (book equation 5.45) -			double b = MathUtils.dotProduct(gradient, MathUtils.arrayMinus(gradient, previousGradient)) -			/MathUtils.dotProduct(previousGradient, previousGradient); -			if(b<0){ -				System.out.println("Defaulting to gradient descent"); -				b = Math.max(b, 0); -			} -			MathUtils.plusEquals(direction, previousDirection, b); -			//Debug code -			if(MathUtils.dotProduct(direction, gradient) > 0){ -				System.out.println("Not an descent direction reseting to gradien"); -				direction = MathUtils.negation(gradient); -			} -		} -		return direction; -	} -	 -	 -	 - - - -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/DebugHelpers.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/DebugHelpers.java deleted file mode 100644 index 6dc4ef6c..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/DebugHelpers.java +++ /dev/null @@ -1,65 +0,0 @@ -package optimization.gradientBasedMethods; - -import java.util.ArrayList; - -import optimization.util.MathUtils; - - - -public class DebugHelpers { -	public static void getLineSearchGraph(Objective o, double[] direction,  -			double[] parameters, double originalObj, -			double originalDot, double c1, double c2){ -		ArrayList<Double> stepS = new ArrayList<Double>(); -		ArrayList<Double> obj = new ArrayList<Double>(); -		ArrayList<Double> norm = new ArrayList<Double>(); -		double[] gradient = new double[o.getNumParameters()]; -		double[] newParameters = parameters.clone(); -		MathUtils.plusEquals(newParameters,direction,0); -		o.setParameters(newParameters); -		double minValue = o.getValue(); -		int valuesBiggerThanMax = 0; -		for(double step = 0; step < 2; step +=0.01 ){ -			newParameters = parameters.clone(); -			MathUtils.plusEquals(newParameters,direction,step); -			o.setParameters(newParameters); -			double newValue = o.getValue(); -			gradient = o.getGradient(); -			double newgradDirectionDot = MathUtils.dotProduct(gradient,direction); -			stepS.add(step); -			obj.add(newValue); -			norm.add(newgradDirectionDot); -			if(newValue <= minValue){ -				minValue = newValue; -			}else{ -				valuesBiggerThanMax++; -			} -			 -			if(valuesBiggerThanMax > 10){ -				break; -			} -			 -		} -		System.out.println("step\torigObj\tobj\tsuffdec\tnorm\tcurvature1"); -		for(int i = 0; i < stepS.size(); i++){ -			double cnorm= norm.get(i);  -			System.out.println(stepS.get(i)+"\t"+originalObj +"\t"+obj.get(i) + "\t" +  -					(originalObj + originalDot*((Double)stepS.get(i))*c1) +"\t"+Math.abs(cnorm) +"\t"+c2*Math.abs(originalDot)); -		} -	} -	 -	public static double[] getNumericalGradient(Objective o, double[] parameters, double epsilon){ -		int nrParameters = o.getNumParameters(); -		double[] gradient = new double[nrParameters]; -		double[] newParameters; -		double originalValue = o.getValue(); -		for(int parameter = 0; parameter < nrParameters; parameter++){ -			newParameters = parameters.clone(); -			newParameters[parameter]+=epsilon; -			o.setParameters(newParameters); -			double newValue = o.getValue(); -			gradient[parameter]=(newValue-originalValue)/epsilon; -		}	 -		return gradient; -	} -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/GradientDescent.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/GradientDescent.java deleted file mode 100644 index 9a53cef4..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/GradientDescent.java +++ /dev/null @@ -1,19 +0,0 @@ -package optimization.gradientBasedMethods; - -import optimization.linesearch.LineSearchMethod; - - - -public class GradientDescent extends AbstractGradientBaseMethod{ -	 -	public GradientDescent(LineSearchMethod lineSearch) { -		this.lineSearch = lineSearch; -	} -		 -	public double[] getDirection(){ -		for(int i = 0; i< gradient.length; i++){ -			direction[i] = -gradient[i]; -		} -		return direction; -	} -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/LBFGS.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/LBFGS.java deleted file mode 100644 index dedbc942..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/LBFGS.java +++ /dev/null @@ -1,234 +0,0 @@ -package optimization.gradientBasedMethods; - - -import optimization.gradientBasedMethods.stats.OptimizerStats; -import optimization.linesearch.DifferentiableLineSearchObjective; -import optimization.linesearch.LineSearchMethod; -import optimization.stopCriteria.StopingCriteria; -import optimization.util.MathUtils; - -public class LBFGS extends AbstractGradientBaseMethod{ - -	//How many previous values are being saved -	int history; -	double[][] skList; -	double[][] ykList; -	double initialHessianParameters; -	double[] previousGradient; -	double[] previousParameters; -	 -	//auxiliar structures -	double q[]; -	double[] roi; -	double[] alphai; -	 -	public LBFGS(LineSearchMethod ls, int history) { -		lineSearch = ls; -		this.history = history; -		skList = new double[history][]; -		ykList = new double[history][]; - -	} -	 -	public void reset(){ -		super.reset(); -		initialHessianParameters = 0; -		previousParameters = null; -		previousGradient = null; -		skList = new double[history][]; -		ykList = new double[history][]; -		q = null; -		roi = null; -		alphai = null; -	} -	 -	public double[] LBFGSTwoLoopRecursion(double hessianConst){ -		//Only create array once -		if(q == null){ -			 q = new double[gradient.length]; -		} -		System.arraycopy(gradient, 0, q, 0, gradient.length); -		//Only create array once -		if(roi == null){ -			roi = new double[history];  -		} -		//Only create array once -		if(alphai == null){ -			alphai = new double[history]; -		} -		 -		for(int i = history-1; i >=0 && skList[i]!= null && ykList[i]!=null; i-- ){			 -		//	System.out.println("New to Old proj " + currentProjectionIteration + " history "+history + " index " + i); -			double[] si =  skList[i]; -			double[] yi = ykList[i]; -			roi[i]= 1.0/MathUtils.dotProduct(yi,si); -			alphai[i] = MathUtils.dotProduct(si, q)*roi[i]; -			MathUtils.plusEquals(q, yi, -alphai[i]); -		} -		//Initial Hessian is just a constant -		MathUtils.scalarMultiplication(q, hessianConst); -		for(int i = 0; i <history && skList[i]!= null && ykList[i]!=null; i++ ){ -		//	System.out.println("Old to New proj " + currentProjectionIteration + " history "+history + " index " + i); -			double beta = MathUtils.dotProduct(ykList[i], q)*roi[i]; -			MathUtils.plusEquals(q, skList[i], (alphai[i]-beta)); -		} -		return q; -	} -	 -	 -	 -	 -	@Override -	public double[] getDirection() { -		 -		calculateInitialHessianParameter(); -//		System.out.println("Initial hessian " + initialHessianParameters); -		return direction = MathUtils.negation(LBFGSTwoLoopRecursion(initialHessianParameters));		 -	} -	 -	public void calculateInitialHessianParameter(){ -		if(currentProjectionIteration == 1){ -			//Use gradient -			initialHessianParameters = 1; -		}else if(currentProjectionIteration <= history){ -			double[] sk = skList[currentProjectionIteration-2]; -			double[] yk = ykList[currentProjectionIteration-2]; -			initialHessianParameters = MathUtils.dotProduct(sk, yk)/MathUtils.dotProduct(yk, yk); -		}else{ -			//get the last one -			double[] sk = skList[history-1]; -			double[] yk = ykList[history-1]; -			initialHessianParameters = MathUtils.dotProduct(sk, yk)/MathUtils.dotProduct(yk, yk); -		} -	} -	 -	//TODO if structures exit just reset them to zero -	public void initializeStructures(Objective o,OptimizerStats stats, StopingCriteria stop){ -		super.initializeStructures(o, stats, stop); -		previousParameters = new double[o.getNumParameters()]; -		previousGradient = new double[o.getNumParameters()]; -	} -	public void updateStructuresBeforeStep(Objective o,OptimizerStats stats, StopingCriteria stop){	 -		super.initializeStructures(o, stats, stop); -		System.arraycopy(o.getParameters(), 0, previousParameters, 0, previousParameters.length); -		System.arraycopy(gradient, 0, previousGradient, 0, gradient.length); -	} - -	public void 	updateStructuresAfterStep( Objective o,OptimizerStats stats, StopingCriteria stop){ -		double[] diffX = MathUtils.arrayMinus(o.getParameters(), previousParameters); -		double[] diffGrad = MathUtils.arrayMinus(gradient, previousGradient); -		//Save new values and discard new ones -		if(currentProjectionIteration > history){ -			for(int i = 0; i < history-1;i++){ -				skList[i]=skList[i+1]; -				ykList[i]=ykList[i+1]; -			} -			skList[history-1]=diffX; -			ykList[history-1]=diffGrad; -		}else{ -			skList[currentProjectionIteration-1]=diffX; -			ykList[currentProjectionIteration-1]=diffGrad; -		}	 -	} -	 -//	public boolean optimize(Objective o, OptimizerStats stats, StopingCriteria stop) {		 -//		DifferentiableLineSearchObjective lso = new DifferentiableLineSearchObjective(o);		 -//		gradient = o.getGradient(); -//		direction = new double[o.getNumParameters()]; -//		previousGradient = new double[o.getNumParameters()]; -//		 -//		previousParameters = new double[o.getNumParameters()]; -//	 -//		stats.collectInitStats(this, o); -//		previousValue = Double.MAX_VALUE; -//		currValue= o.getValue(); -//		//Used for stopping criteria -//		double[] originalGradient = o.getGradient(); -//		 -//		originalGradientL2Norm = MathUtils.L2Norm(originalGradient); -//		if(stop.stopOptimization(originalGradient)){ -//			stats.collectFinalStats(this, o); -//			return true; -//		} -//		for (currentProjectionIteration = 1; currentProjectionIteration < maxNumberOfIterations; currentProjectionIteration++){ -//			 -//			 -//			currValue = o.getValue(); -//			gradient  = o.getGradient(); -//			currParameters = o.getParameters(); -//			 -//			 -//			if(currentProjectionIteration == 1){ -//				//Use gradient -//				initialHessianParameters = 1; -//			}else if(currentProjectionIteration <= history){ -//				double[] sk = skList[currentProjectionIteration-2]; -//				double[] yk = ykList[currentProjectionIteration-2]; -//				initialHessianParameters = MathUtils.dotProduct(sk, yk)/MathUtils.dotProduct(yk, yk); -//			}else{ -//				//get the last one -//				double[] sk = skList[history-1]; -//				double[] yk = ykList[history-1]; -//				initialHessianParameters = MathUtils.dotProduct(sk, yk)/MathUtils.dotProduct(yk, yk); -//			} -//			 -//			getDirection(); -//			 -//			//MatrixOutput.printDoubleArray(direction, "direction"); -//			double dot = MathUtils.dotProduct(direction, gradient); -//			if(dot > 0){				 -//				throw new RuntimeException("Not a descent direction"); -//			} if (Double.isNaN(dot)){ -//				throw new RuntimeException("dot is not a number!!"); -//			} -//			System.arraycopy(currParameters, 0, previousParameters, 0, currParameters.length); -//			System.arraycopy(gradient, 0, previousGradient, 0, gradient.length); -//			lso.reset(direction); -//			step = lineSearch.getStepSize(lso); -//			if(step==-1){ -//				System.out.println("Failed to find a step size"); -////				lso.printLineSearchSteps(); -////				System.out.println(stats.prettyPrint(1)); -//				stats.collectFinalStats(this, o); -//				return false;	 -//			} -//			stats.collectIterationStats(this, o); -//			 -//			//We are not updating the alpha since it is done in line search already -//			currParameters = o.getParameters(); -//			gradient = o.getGradient(); -//			 -//			if(stop.stopOptimization(gradient)){ -//				stats.collectFinalStats(this, o); -//				return true; -//			} -//			double[] diffX = MathUtils.arrayMinus(currParameters, previousParameters); -//			double[] diffGrad = MathUtils.arrayMinus(gradient, previousGradient); -//			//Save new values and discard new ones -//			if(currentProjectionIteration > history){ -//				for(int i = 0; i < history-1;i++){ -//					skList[i]=skList[i+1]; -//					ykList[i]=ykList[i+1]; -//				} -//				skList[history-1]=diffX; -//				ykList[history-1]=diffGrad; -//			}else{ -//				skList[currentProjectionIteration-1]=diffX; -//				ykList[currentProjectionIteration-1]=diffGrad; -//			}		 -//			previousValue = currValue; -//		} -//		stats.collectFinalStats(this, o); -//		return false;	 -//	} -	 - - -	 - -	 - -	 -	 - -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Objective.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Objective.java deleted file mode 100644 index 6be01bf9..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Objective.java +++ /dev/null @@ -1,87 +0,0 @@ -package optimization.gradientBasedMethods; - - -/** - * Defines an optimization objective: - *  - *  - * @author javg - * - */ -public abstract  class Objective { - -	protected int functionCalls = 0; -	protected int gradientCalls = 0; -	protected int updateCalls = 0; -	 -	protected double[] parameters; -	 -	//Contains a cache with the gradient -	public double[] gradient; -	int debugLevel = 0; -	 -	public void setDebugLevel(int level){ -		debugLevel = level; -	} -	 -	public int getNumParameters() { -		return parameters.length; -	} - -	public double getParameter(int index) { -		return parameters[index]; -	} - -	public double[] getParameters() { -		return parameters; -	} - -	public abstract double[] getGradient( ); -	 -	public void setParameter(int index, double value) { -		parameters[index]=value; -	} - -	public void setParameters(double[] params) { -		if(parameters == null){ -			parameters = new double[params.length]; -		} -		updateCalls++; -		System.arraycopy(params, 0, parameters, 0, params.length); -	} - -	 -	public int getNumberFunctionCalls() { -		return functionCalls; -	} - -	public int getNumberGradientCalls() { -		return gradientCalls; -	} -	 -	public int getNumberUpdateCalls() { -		return updateCalls; -	} -	 -	public String finalInfoString() { -		return "FE: " + functionCalls + " GE " + gradientCalls + " Params updates" + -		updateCalls; -	} -	public void printParameters() { -		System.out.println(toString()); -	}	 -	 -	public abstract String toString();	 -	public abstract double getValue (); -	 -	/** -	 * Sets the initial objective parameters -	 * For unconstrained models this just sets the objective params = argument no copying -	 * For a constrained objective project the parameters and then set -	 * @param params -	 */ -	public  void setInitialParameters(double[] params){ -		parameters = params; -	} - -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Optimizer.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Optimizer.java deleted file mode 100644 index 96fce5b0..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Optimizer.java +++ /dev/null @@ -1,19 +0,0 @@ -package optimization.gradientBasedMethods; - -import optimization.gradientBasedMethods.stats.OptimizerStats; -import optimization.stopCriteria.StopingCriteria; - -public interface Optimizer { -	public boolean optimize(Objective o,OptimizerStats stats, StopingCriteria stoping); -	 -	 -	public double[] getDirection(); -	public double getCurrentStep(); -	public double getCurrentValue(); -	public int getCurrentIteration(); -	public void reset(); -	 -	public void setMaxIterations(int max); -	 -		 -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedAbstractGradientBaseMethod.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedAbstractGradientBaseMethod.java deleted file mode 100644 index afb29d04..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedAbstractGradientBaseMethod.java +++ /dev/null @@ -1,11 +0,0 @@ -package optimization.gradientBasedMethods; - - -/** - *  - * @author javg - * - */ -public abstract class ProjectedAbstractGradientBaseMethod extends AbstractGradientBaseMethod implements ProjectedOptimizer{ -	 -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedGradientDescent.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedGradientDescent.java deleted file mode 100644 index 0186e945..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedGradientDescent.java +++ /dev/null @@ -1,154 +0,0 @@ -package optimization.gradientBasedMethods; - -import java.io.IOException; - -import optimization.gradientBasedMethods.stats.OptimizerStats; -import optimization.linesearch.DifferentiableLineSearchObjective; -import optimization.linesearch.LineSearchMethod; -import optimization.linesearch.ProjectedDifferentiableLineSearchObjective; -import optimization.stopCriteria.StopingCriteria; -import optimization.util.MathUtils; - - -/** - * This class implements the projected gradiend - * as described in Bertsekas "Non Linear Programming" - * section 2.3. - *  - * The update is given by: - * x_k+1 = x_k + alpha^k(xbar_k-x_k) - * Where xbar is: - * xbar = [x_k -s_k grad(f(x_k))]+ - * where []+ is the projection into the feasibility set - *  - * alpha is the step size  - * s_k - is a positive scalar which can be view as a step size as well, by  - * setting alpha to 1, then x_k+1 = [x_k -s_k grad(f(x_k))]+ - * This is called taking a step size along the projection arc (Bertsekas) which - * we will use by default. - *  - * Note that the only place where we actually take a step size is on pick a step size - * so this is going to be just like a normal gradient descent but use a different  - * armijo line search where we project after taking a step. - *  - *  - * @author javg - * - */ -public class ProjectedGradientDescent extends ProjectedAbstractGradientBaseMethod{ -	 - -	 -	 -	public ProjectedGradientDescent(LineSearchMethod lineSearch) { -		this.lineSearch = lineSearch; -	} -	 -	//Use projected differential objective instead -	public void initializeStructures(Objective o, OptimizerStats stats, StopingCriteria stop) { -		lso = new ProjectedDifferentiableLineSearchObjective(o); -	}; -	 -	 -	ProjectedObjective obj; -	public boolean optimize(ProjectedObjective o,OptimizerStats stats, StopingCriteria stop){ -		obj = o; -		return super.optimize(o, stats, stop); -	} -	 -	public double[] getDirection(){ -		for(int i = 0; i< gradient.length; i++){ -			direction[i] = -gradient[i]; -		} -		return direction; -	} -	 -	 - -		 -} - - - - - - - -///OLD CODE - -//Use projected gradient norm -//public boolean stopCriteria(double[] gradient){ -//	if(originalDirenctionL2Norm == 0){ -//		System.out.println("Leaving original direction norm is zero"); -//		return true;	 -//	} -//	if(MathUtils.L2Norm(direction)/originalDirenctionL2Norm < gradientConvergenceValue){ -//		System.out.println("Leaving projected gradient Norm smaller than epsilon"); -//		return true;	 -//	} -//	if((previousValue - currValue)/Math.abs(previousValue) < valueConvergenceValue) { -//		System.out.println("Leaving value change below treshold " + previousValue + " - " + currValue); -//		System.out.println(previousValue/currValue + " - " + currValue/currValue  -//				+ " = " + (previousValue - currValue)/Math.abs(previousValue)); -//		return true; -//	} -//	return false; -//} -// - -//public boolean optimize(ProjectedObjective o,OptimizerStats stats, StopingCriteria stop){ -//		stats.collectInitStats(this, o); -//		obj = o; -//		step = 0; -//		currValue = o.getValue(); -//		previousValue = Double.MAX_VALUE; -//		gradient = o.getGradient(); -//		originalGradientL2Norm = MathUtils.L2Norm(gradient); -//		parameterChange = new double[gradient.length]; -//		getDirection(); -//		ProjectedDifferentiableLineSearchObjective lso = new ProjectedDifferentiableLineSearchObjective(o,direction); -//		 -//		originalDirenctionL2Norm = MathUtils.L2Norm(direction); -//		//MatrixOutput.printDoubleArray(currParameters, "parameters"); -//		for (currentProjectionIteration = 0; currentProjectionIteration < maxNumberOfIterations; currentProjectionIteration++){		 -//		//	System.out.println("Iter " + currentProjectionIteration); -//			//o.printParameters(); -//			 -//			 -//			 -//			if(stop.stopOptimization(gradient)){ -//				stats.collectFinalStats(this, o); -//				lastStepUsed = step; -//				return true; -//			}			 -//			lso.reset(direction); -//			step = lineSearch.getStepSize(lso); -//			if(step==-1){ -//				System.out.println("Failed to find step"); -//				stats.collectFinalStats(this, o); -//				return false;	 -//				 -//			} -//			 -//			//Update the direction for stopping criteria -//			previousValue = currValue; -//			currValue = o.getValue(); -//			gradient = o.getGradient(); -//			direction = getDirection();		 -//			if(MathUtils.dotProduct(gradient, direction) > 0){ -//				System.out.println("Not a descent direction"); -//				System.out.println(" current stats " + stats.prettyPrint(1)); -//				System.exit(-1); -//			} -//			stats.collectIterationStats(this, o); -//		} -//		lastStepUsed = step; -//		stats.collectFinalStats(this, o); -//		return false; -//	} - -//public boolean optimize(Objective o,OptimizerStats stats, StopingCriteria stop){ -//	System.out.println("Objective is not a projected objective"); -//	throw new RuntimeException(); -//} - diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedObjective.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedObjective.java deleted file mode 100644 index c3d21393..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedObjective.java +++ /dev/null @@ -1,29 +0,0 @@ -package optimization.gradientBasedMethods; - -import optimization.util.MathUtils; - - -/** - * Computes a projected objective - * When we tell it to set some parameters it automatically projects the parameters back into the simplex: - *  - *  - * When we tell it to get the gradient in automatically returns the projected gradient: - * @author javg - * - */ -public abstract class ProjectedObjective extends Objective{ -	 -	public abstract double[] projectPoint (double[] point); -	 -	public double[] auxParameters; -	 -	 -	public  void setInitialParameters(double[] params){ -		setParameters(projectPoint(params)); -	} -	 -	 -	 -	 -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedOptimizer.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedOptimizer.java deleted file mode 100644 index 81d8403e..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedOptimizer.java +++ /dev/null @@ -1,10 +0,0 @@ -package optimization.gradientBasedMethods; - - - -public interface ProjectedOptimizer extends Optimizer{ -	 -	 -	 -	 -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/OptimizerStats.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/OptimizerStats.java deleted file mode 100644 index 6340ef73..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/OptimizerStats.java +++ /dev/null @@ -1,86 +0,0 @@ -package optimization.gradientBasedMethods.stats; - -import java.util.ArrayList; - -import optimization.gradientBasedMethods.Objective; -import optimization.gradientBasedMethods.Optimizer; -import optimization.util.MathUtils; -import optimization.util.StaticTools; - - -public class OptimizerStats { -	 -	double start = 0; -	double totalTime = 0; -	 -	String objectiveFinalStats; -	 -	ArrayList<Double> gradientNorms = new ArrayList<Double>(); -	ArrayList<Double> steps = new ArrayList<Double>(); -	ArrayList<Double> value = new ArrayList<Double>(); -	ArrayList<Integer> iterations = new ArrayList<Integer>(); -	double prevValue =0; -	 -	public void reset(){ -		start = 0; -		totalTime = 0; -		 -		objectiveFinalStats=""; -		 -		gradientNorms.clear(); -		steps.clear(); -		value.clear(); -		iterations.clear(); -		prevValue =0; -	} -	 -	public void startTime() { -		start = System.currentTimeMillis(); -	} -	public void stopTime() { -		totalTime += System.currentTimeMillis() - start; -	} -	 -	public String prettyPrint(int level){ -		StringBuffer res = new StringBuffer(); -		res.append("Total time " + totalTime/1000 + " seconds \n" + "Iterations " + iterations.size() + "\n"); -		res.append(objectiveFinalStats+"\n"); -		if(level > 0){ -			if(iterations.size() > 0){ -			res.append("\tIteration"+iterations.get(0)+"\tstep: "+StaticTools.prettyPrint(steps.get(0), "0.00E00", 6)+ "\tgradientNorm "+  -					StaticTools.prettyPrint(gradientNorms.get(0), "0.00000E00", 10)+ "\tvalue "+ StaticTools.prettyPrint(value.get(0), "0.000000E00",11)+"\n"); -			} -			for(int i = 1; i < iterations.size(); i++){ -			res.append("\tIteration:\t"+iterations.get(i)+"\tstep:"+StaticTools.prettyPrint(steps.get(i), "0.00E00", 6)+ "\tgradientNorm "+  -					StaticTools.prettyPrint(gradientNorms.get(i), "0.00000E00", 10)+  -					"\tvalue:\t"+ StaticTools.prettyPrint(value.get(i), "0.000000E00",11)+ -					"\tvalueDiff:\t"+ StaticTools.prettyPrint((value.get(i-1)-value.get(i)), "0.000000E00",11)+ -					"\n"); -			} -		} -		return res.toString(); -	} -	 -	 -	public void collectInitStats(Optimizer optimizer, Objective objective){ -		startTime(); -		iterations.add(-1); -		gradientNorms.add(MathUtils.L2Norm(objective.getGradient())); -		steps.add(0.0); -		value.add(objective.getValue()); -	} -	 -	public void collectIterationStats(Optimizer optimizer, Objective objective){ -		iterations.add(optimizer.getCurrentIteration()); -		gradientNorms.add(MathUtils.L2Norm(objective.getGradient())); -		steps.add(optimizer.getCurrentStep()); -		value.add(optimizer.getCurrentValue()); -	} -	 -	 -	public void collectFinalStats(Optimizer optimizer, Objective objective){ -		stopTime(); -		objectiveFinalStats = objective.finalInfoString(); -	} -	 -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/ProjectedOptimizerStats.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/ProjectedOptimizerStats.java deleted file mode 100644 index d65a1267..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/ProjectedOptimizerStats.java +++ /dev/null @@ -1,70 +0,0 @@ -package optimization.gradientBasedMethods.stats; - -import java.util.ArrayList; - -import optimization.gradientBasedMethods.Objective; -import optimization.gradientBasedMethods.Optimizer; -import optimization.gradientBasedMethods.ProjectedObjective; -import optimization.gradientBasedMethods.ProjectedOptimizer; -import optimization.util.MathUtils; -import optimization.util.StaticTools; - - -public class ProjectedOptimizerStats extends OptimizerStats{ -	 -	 -	 -	public void reset(){ -		super.reset(); -		projectedGradientNorms.clear(); -	} -	 -	ArrayList<Double> projectedGradientNorms = new ArrayList<Double>(); - -	public String prettyPrint(int level){ -		StringBuffer res = new StringBuffer(); -		res.append("Total time " + totalTime/1000 + " seconds \n" + "Iterations " + iterations.size() + "\n"); -		res.append(objectiveFinalStats+"\n"); -		if(level > 0){ -			if(iterations.size() > 0){ -			res.append("\tIteration"+iterations.get(0)+"\tstep: "+ -					StaticTools.prettyPrint(steps.get(0), "0.00E00", 6)+ "\tgradientNorm "+  -					StaticTools.prettyPrint(gradientNorms.get(0), "0.00000E00", 10) -					+ "\tdirection"+ -					StaticTools.prettyPrint(projectedGradientNorms.get(0), "0.00000E00", 10)+ -					"\tvalue "+ StaticTools.prettyPrint(value.get(0), "0.000000E00",11)+"\n"); -			} -			for(int i = 1; i < iterations.size(); i++){ -			res.append("\tIteration"+iterations.get(i)+"\tstep: "+StaticTools.prettyPrint(steps.get(i), "0.00E00", 6)+ "\tgradientNorm "+  -					StaticTools.prettyPrint(gradientNorms.get(i), "0.00000E00", 10)+  -					"\t direction "+ -					StaticTools.prettyPrint(projectedGradientNorms.get(i), "0.00000E00", 10)+ -					"\tvalue "+ StaticTools.prettyPrint(value.get(i), "0.000000E00",11)+ -					"\tvalueDiff "+ StaticTools.prettyPrint((value.get(i-1)-value.get(i)), "0.000000E00",11)+ -					"\n"); -			} -		} -		return res.toString(); -	} -	 -	 -	public void collectInitStats(Optimizer optimizer, Objective objective){ -		startTime(); -	} -	 -	public void collectIterationStats(Optimizer optimizer, Objective objective){		 -		iterations.add(optimizer.getCurrentIteration()); -		gradientNorms.add(MathUtils.L2Norm(objective.getGradient())); -		projectedGradientNorms.add(MathUtils.L2Norm(optimizer.getDirection())); -		steps.add(optimizer.getCurrentStep()); -		value.add(optimizer.getCurrentValue()); -	} -	 -	 -	 -	public void collectFinalStats(Optimizer optimizer, Objective objective){ -		stopTime(); -		objectiveFinalStats = objective.finalInfoString(); -	} -	 -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimization.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimization.java deleted file mode 100644 index c9f9b8df..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimization.java +++ /dev/null @@ -1,102 +0,0 @@ -package optimization.linesearch; - -import optimization.util.Interpolation; - - -/** - * Implements Back Tracking Line Search as described on page 37 of Numerical Optimization. - * Also known as armijo rule - * @author javg - * - */ -public class ArmijoLineSearchMinimization implements LineSearchMethod{ - -	/** -	 * How much should the step size decrease at each iteration. -	 */ -	double contractionFactor = 0.5; -	double c1 = 0.0001; -	 -	double sigma1 = 0.1; -	double sigma2 = 0.9; - - -	 -	double initialStep; -	int maxIterations = 10; -	 -			 -	public ArmijoLineSearchMinimization(){ -		this.initialStep = 1; -	} -	 -	//Experiment -	double previousStepPicked = -1;; -	double previousInitGradientDot = -1; -	double currentInitGradientDot = -1; -	 -	 -	public void reset(){ -		previousStepPicked = -1;; -		previousInitGradientDot = -1; -		currentInitGradientDot = -1; -	} -	 -	public void setInitialStep(double initial){ -		initialStep = initial; -	} -	 -	/** -	 *  -	 */ -	 -	public double getStepSize(DifferentiableLineSearchObjective o) {	 -		currentInitGradientDot = o.getInitialGradient(); -		//Should update all in the objective -		o.updateAlpha(initialStep);	 -		int nrIterations = 0; -		//System.out.println("tried alpha" + initialStep + " value " + o.getCurrentValue()); -		while(!WolfeConditions.suficientDecrease(o,c1)){			 -			if(nrIterations >= maxIterations){ -				o.printLineSearchSteps();	 -				return -1; -			} -			double alpha=o.getAlpha(); -			double alphaTemp =  -				Interpolation.quadraticInterpolation(o.getOriginalValue(), o.getInitialGradient(), alpha, o.getCurrentValue()); -			if(alphaTemp >= sigma1 || alphaTemp <= sigma2*o.getAlpha()){ -//				System.out.println("using alpha temp " + alphaTemp); -				alpha = alphaTemp; -			}else{ -//				System.out.println("Discarding alpha temp " + alphaTemp); -				alpha = alpha*contractionFactor; -			} -//			double alpha =o.getAlpha()*contractionFactor; - -			o.updateAlpha(alpha); -			//System.out.println("tried alpha" + alpha+ " value " + o.getCurrentValue()); -			nrIterations++;			 -		} -		 -		//System.out.println("Leavning line search used:"); -		//o.printLineSearchSteps();	 -		 -		previousInitGradientDot = currentInitGradientDot; -		previousStepPicked = o.getAlpha(); -		return o.getAlpha(); -	} - -	public double getInitialGradient() { -		return currentInitGradientDot; -		 -	} - -	public double getPreviousInitialGradient() { -		return previousInitGradientDot; -	} - -	public double getPreviousStepUsed() { -		return previousStepPicked; -	} -		 -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimizationAlongProjectionArc.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimizationAlongProjectionArc.java deleted file mode 100644 index e153f2da..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimizationAlongProjectionArc.java +++ /dev/null @@ -1,141 +0,0 @@ -package optimization.linesearch; - -import optimization.gradientBasedMethods.ProjectedObjective; -import optimization.util.Interpolation; -import optimization.util.MathUtils; - - - - - -/** - * Implements Armijo Rule Line search along the projection arc (Non-Linear Programming page 230) - * To be used with Projected gradient Methods. - *  - * Recall that armijo tries successive step sizes alpha until the sufficient decrease is satisfied: - * f(x+alpha*direction) < f(x) + alpha*c1*grad(f)*direction - *  - * In this case we are optimizing over a convex set X so we must guarantee that the new point stays inside the  - * constraints. - * First the direction as to be feasible (inside constraints) and will be define as: - * d = (x_k_f - x_k) where x_k_f is a feasible point. - * so the armijo condition can be rewritten as: - * f(x+alpha(x_k_f - x_k)) < f(x) + c1*grad(f)*(x_k_f - x_k) - * and x_k_f is defined as: - * [x_k-alpha*grad(f)]+ - * where []+ mean a projection to the feasibility set. - * So this means that we take a step on the negative gradient (gradient descent) and then obtain then project - * that point to the feasibility set.  - * Note that if the point is already feasible then we are back to the normal armijo rule. - *  - * @author javg - * - */ -public class ArmijoLineSearchMinimizationAlongProjectionArc implements LineSearchMethod{ - -	/** -	 * How much should the step size decrease at each iteration. -	 */ -	double contractionFactor = 0.5; -	double c1 = 0.0001; -	 -	 -	double initialStep; -	int maxIterations = 100; -			 -	 -	double sigma1 = 0.1; -	double sigma2 = 0.9; -	 -	//Experiment -	double previousStepPicked = -1;; -	double previousInitGradientDot = -1; -	double currentInitGradientDot = -1; -	 -	GenericPickFirstStep strategy; -	 -	 -	public void reset(){ -		previousStepPicked = -1;; -		previousInitGradientDot = -1; -		currentInitGradientDot = -1; -	} - -	 -	public ArmijoLineSearchMinimizationAlongProjectionArc(){ -		this.initialStep = 1; -	} -	 -	public ArmijoLineSearchMinimizationAlongProjectionArc(GenericPickFirstStep strategy){ -		this.strategy = strategy; -		this.initialStep = strategy.getFirstStep(this); -	} -	 -	 -	public void setInitialStep(double initial){ -		this.initialStep = initial; -	} -	 -	/** -	 *  -	 */ -	 -	public double getStepSize(DifferentiableLineSearchObjective o) {	 - -		 -		//Should update all in the objective -		initialStep = strategy.getFirstStep(this); -		o.updateAlpha(initialStep);	 -		previousInitGradientDot=currentInitGradientDot; -		currentInitGradientDot=o.getCurrentGradient(); -		int nrIterations = 0; -	 -		//Armijo rule, the current value has to be smaller than the original value plus a small step of the gradient -		while(o.getCurrentValue()  > -			o.getOriginalValue() + c1*(o.getCurrentGradient())){			 -//			System.out.println("curr value "+o.getCurrentValue()); -//			System.out.println("original value "+o.getOriginalValue()); -//			System.out.println("GRADIENT decrease" +(MathUtils.dotProduct(o.o.gradient, -//					MathUtils.arrayMinus(o.originalParameters,((ProjectedObjective)o.o).auxParameters)))); -//			System.out.println("GRADIENT SAVED" + o.getCurrentGradient()); -			if(nrIterations >= maxIterations){ -				System.out.println("Could not find a step leaving line search with -1"); -				o.printLineSearchSteps(); -				return -1; -			} -			double alpha=o.getAlpha(); -			double alphaTemp =  -				Interpolation.quadraticInterpolation(o.getOriginalValue(), o.getInitialGradient(), alpha, o.getCurrentValue()); -			if(alphaTemp >= sigma1 || alphaTemp <= sigma2*o.getAlpha()){ -				alpha = alphaTemp; -			}else{ -				alpha = alpha*contractionFactor; -			} -//			double alpha =obj.getAlpha()*contractionFactor; -			o.updateAlpha(alpha); -			nrIterations++;			 -		} -//		System.out.println("curr value "+o.getCurrentValue()); -//		System.out.println("original value "+o.getOriginalValue()); -//		System.out.println("sufficient decrease" +c1*o.getCurrentGradient()); -//		System.out.println("Leavning line search used:"); -//		o.printSmallLineSearchSteps();	 -		 -		previousStepPicked = o.getAlpha(); -		return o.getAlpha(); -	} -	 -	public double getInitialGradient() { -		return currentInitGradientDot; -		 -	} - -	public double getPreviousInitialGradient() { -		return previousInitGradientDot; -	} - -	public double getPreviousStepUsed() { -		return previousStepPicked; -	} -		 -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/DifferentiableLineSearchObjective.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/DifferentiableLineSearchObjective.java deleted file mode 100644 index a5bc958e..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/DifferentiableLineSearchObjective.java +++ /dev/null @@ -1,185 +0,0 @@ -package optimization.linesearch; - -import gnu.trove.TDoubleArrayList; -import gnu.trove.TIntArrayList; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.Comparator; - -import optimization.gradientBasedMethods.Objective; -import optimization.util.MathUtils; -import optimization.util.StaticTools; - - - -import util.MathUtil; -import util.Printing; - - -/** - * A wrapper class for the actual objective in order to perform  - * line search.  The optimization code assumes that this does a lot  - * of caching in order to simplify legibility.  For the applications  - * we use it for, caching the entire history of evaluations should be  - * a win.  - *  - * Note: the lastEvaluatedAt value is very important, since we will use - * it to avoid doing an evaluation of the gradient after the line search.   - *  - * The differentiable line search objective defines a search along the ray - * given by a direction of the main objective. - * It defines the following function,  - * where f is the original objective function: - * g(alpha) = f(x_0 + alpha*direction) - * g'(alpha) = f'(x_0 + alpha*direction)*direction - *  - * @author joao - * - */ -public class DifferentiableLineSearchObjective { - -	 -	 -	Objective o; -	int nrIterations; -	TDoubleArrayList steps; -	TDoubleArrayList values; -	TDoubleArrayList gradients; -	 -	//This variables cannot change -	public double[] originalParameters; -	public double[] searchDirection; - -	 -	/** -	 * Defines a line search objective: -	 * Receives: -	 * Objective to each we are performing the line search, is used to calculate values and gradients -	 * Direction where to do the ray search, note that the direction does not depend of the  -	 * objective but depends from the method. -	 * @param o -	 * @param direction -	 */ -	public DifferentiableLineSearchObjective(Objective o) { -		this.o = o; -		originalParameters = new double[o.getNumParameters()]; -		searchDirection = new double[o.getNumParameters()]; -		steps = new TDoubleArrayList(); -		values = new TDoubleArrayList(); -		gradients = new TDoubleArrayList(); -	} -	/** -	 * Called whenever we start a new iteration.  -	 * Receives the ray where we are searching for and resets all values -	 *  -	 */ -	public void reset(double[] direction){ -		//Copy initial values -		System.arraycopy(o.getParameters(), 0, originalParameters, 0, o.getNumParameters()); -		System.arraycopy(direction, 0, searchDirection, 0, o.getNumParameters()); -		 -		//Initialize variables -		nrIterations = 0; -		steps.clear(); -		values.clear(); -		gradients.clear(); -	 -		values.add(o.getValue()); -		gradients.add(MathUtils.dotProduct(o.getGradient(),direction));	 -		steps.add(0); -	} -	 -	 -	/** -	 * update the current value of alpha. -	 * Takes a step with that alpha in direction -	 * Get the real objective value and gradient and calculate all required information. -	 */ -	public void updateAlpha(double alpha){ -		if(alpha < 0){ -			System.out.println("alpha may not be smaller that zero"); -			throw new RuntimeException(); -		} -		nrIterations++; -		steps.add(alpha); -		//x_t+1 = x_t + alpha*direction -		System.arraycopy(originalParameters,0, o.getParameters(), 0, originalParameters.length); -		MathUtils.plusEquals(o.getParameters(), searchDirection, alpha); -		o.setParameters(o.getParameters()); -//		System.out.println("Took a step of " + alpha + " new value " + o.getValue()); -		values.add(o.getValue()); -		gradients.add(MathUtils.dotProduct(o.getGradient(),searchDirection));		 -	} - -	 -	 -	public int getNrIterations(){ -		return nrIterations; -	} -	 -	/** -	 * return g(alpha) for the current value of alpha -	 * @param iter -	 * @return -	 */ -	public double getValue(int iter){ -		return values.get(iter); -	} -	 -	public double getCurrentValue(){ -		return values.get(nrIterations); -	} -	 -	public double getOriginalValue(){ -		return values.get(0); -	} - -	/** -	 * return g'(alpha) for the current value of alpha -	 * @param iter -	 * @return -	 */ -	public double getGradient(int iter){ -		return gradients.get(iter); -	} -	 -	public double getCurrentGradient(){ -		return gradients.get(nrIterations); -	} -	 -	public double getInitialGradient(){ -		return gradients.get(0); -	} -	 -	 -	 -	 -	public double getAlpha(){ -		return steps.get(nrIterations); -	} -	 -	public void printLineSearchSteps(){ -		System.out.println( -				" Steps size "+steps.size() +  -				"Values size "+values.size() + -				"Gradeients size "+gradients.size()); -		for(int i =0; i < steps.size();i++){ -			System.out.println("Iter " + i + " step " + steps.get(i) + -					" value " + values.get(i) + " grad "  + gradients.get(i)); -		} -	} -	 -	public void printSmallLineSearchSteps(){ -		for(int i =0; i < steps.size();i++){ -			System.out.print(StaticTools.prettyPrint(steps.get(i), "0.0000E00",8) + " "); -		} -		System.out.println(); -	} -	 -	public static void main(String[] args) { -		 -	} -	 -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/GenericPickFirstStep.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/GenericPickFirstStep.java deleted file mode 100644 index a33eb311..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/GenericPickFirstStep.java +++ /dev/null @@ -1,20 +0,0 @@ -package optimization.linesearch; - - -public class GenericPickFirstStep{ -	double _initValue; -	public GenericPickFirstStep(double initValue) { -		_initValue = initValue; -	} -	 -	public double getFirstStep(LineSearchMethod ls){ -		return _initValue; -	} -	public void collectInitValues(LineSearchMethod ls){ -		 -	} -	 -	public void collectFinalValues(LineSearchMethod ls){ -		 -	} -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/InterpolationPickFirstStep.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/InterpolationPickFirstStep.java deleted file mode 100644 index 0deebcdb..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/InterpolationPickFirstStep.java +++ /dev/null @@ -1,25 +0,0 @@ -package optimization.linesearch; - - -public class InterpolationPickFirstStep extends GenericPickFirstStep{ -	public InterpolationPickFirstStep(double initValue) { -		super(initValue); -	} -	 -	public double getFirstStep(LineSearchMethod ls){ -		if(ls.getPreviousStepUsed() != -1 && ls.getPreviousInitialGradient()!=0){ -			double newStep = Math.min(300, 1.02*ls.getPreviousInitialGradient()*ls.getPreviousStepUsed()/ls.getInitialGradient()); -		//	System.out.println("proposing " + newStep); -			return newStep; -			 -		} -		return _initValue; -	} -	public void collectInitValues(WolfRuleLineSearch ls){ -		 -	} -	 -	public void collectFinalValues(WolfRuleLineSearch ls){ -		 -	} -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/LineSearchMethod.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/LineSearchMethod.java deleted file mode 100644 index 80cd7f39..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/LineSearchMethod.java +++ /dev/null @@ -1,14 +0,0 @@ -package optimization.linesearch; - - -public interface LineSearchMethod { -	 -	double getStepSize(DifferentiableLineSearchObjective o); -	 -	public double getInitialGradient(); -	public double getPreviousInitialGradient(); -	public double getPreviousStepUsed(); -	 -	public void setInitialStep(double initial); -	public void reset(); -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/NonNewtonInterpolationPickFirstStep.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/NonNewtonInterpolationPickFirstStep.java deleted file mode 100644 index 4b354fd9..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/NonNewtonInterpolationPickFirstStep.java +++ /dev/null @@ -1,33 +0,0 @@ -package optimization.linesearch; - -/** - * Non newtwon since we don't always try 1... - * Not sure if that is even usefull for newton - * @author javg - * - */ -public class NonNewtonInterpolationPickFirstStep extends GenericPickFirstStep{ -	public NonNewtonInterpolationPickFirstStep(double initValue) { -		super(initValue); -	} -	 -	public double getFirstStep(LineSearchMethod ls){ -//		System.out.println("Previous step used " + ls.getPreviousStepUsed()); -//		System.out.println("PreviousGradinebt " + ls.getPreviousInitialGradient()); -//		System.out.println("CurrentGradinebt " + ls.getInitialGradient()); -		if(ls.getPreviousStepUsed() != -1 && ls.getPreviousInitialGradient()!=0){ -			double newStep = 1.01*ls.getPreviousInitialGradient()*ls.getPreviousStepUsed()/ls.getInitialGradient(); -			//System.out.println("Suggesting " + newStep); -			return newStep; -			 -		} -		return _initValue; -	} -	public void collectInitValues(WolfRuleLineSearch ls){ -		 -	} -	 -	public void collectFinalValues(WolfRuleLineSearch ls){ -		 -	} -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/ProjectedDifferentiableLineSearchObjective.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/ProjectedDifferentiableLineSearchObjective.java deleted file mode 100644 index 29ccbc32..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/ProjectedDifferentiableLineSearchObjective.java +++ /dev/null @@ -1,137 +0,0 @@ -package optimization.linesearch; - -import optimization.gradientBasedMethods.Objective; -import optimization.gradientBasedMethods.ProjectedObjective; -import optimization.util.MathUtils; -import optimization.util.MatrixOutput; - - -/** - * See ArmijoLineSearchMinimizationAlongProjectionArc for description - * @author javg - * - */ -public class ProjectedDifferentiableLineSearchObjective extends DifferentiableLineSearchObjective{ - -	 -	 -	ProjectedObjective obj; -	public ProjectedDifferentiableLineSearchObjective(Objective o) { -		super(o); -		if(!(o instanceof ProjectedObjective)){ -			System.out.println("Must receive a projected objective"); -			throw new RuntimeException(); -		} -		obj = (ProjectedObjective) o; -	} - -	 -	 -	public double[] projectPoint (double[] point){ -		return ((ProjectedObjective)o).projectPoint(point); -	} -	public void updateAlpha(double alpha){ -		if(alpha < 0){ -			System.out.println("alpha may not be smaller that zero"); -			throw new RuntimeException(); -		} -		 -		if(obj.auxParameters == null){ -			obj.auxParameters = new double[obj.getParameters().length]; -		} -		 -		nrIterations++; -		 -		steps.add(alpha);		 -		System.arraycopy(originalParameters, 0, obj.auxParameters, 0, obj.auxParameters.length); -		 -		//Take a step into the search direction -		 -//		MatrixOutput.printDoubleArray(obj.getGradient(), "gradient"); -		 -//		alpha=gradients.get(0)*alpha/(gradients.get(gradients.size()-1)); -	 -		//x_t+1 = x_t - alpha*gradient = x_t + alpha*direction -		MathUtils.plusEquals(obj.auxParameters, searchDirection, alpha); -//		MatrixOutput.printDoubleArray(obj.auxParameters, "before projection"); -		obj.auxParameters = projectPoint(obj.auxParameters); -//		MatrixOutput.printDoubleArray(obj.auxParameters, "after projection"); -		o.setParameters(obj.auxParameters); -//		System.out.println("new parameters"); -//		o.printParameters(); -		values.add(o.getValue()); -		//Computes the new gradient x_k-[x_k-alpha*Gradient(x_k)]+  -		MathUtils.minusEqualsInverse(originalParameters,obj.auxParameters,1); -//		MatrixOutput.printDoubleArray(obj.auxParameters, "new gradient"); -		//Dot product between the new direction and the new gradient -		double gradient = MathUtils.dotProduct(obj.auxParameters,searchDirection); -		gradients.add(gradient);	 -		if(gradient > 0){ -			System.out.println("Gradient on line search has to be smaller than zero"); -			System.out.println("Iter: " + nrIterations); -			MatrixOutput.printDoubleArray(obj.auxParameters, "new direction"); -			MatrixOutput.printDoubleArray(searchDirection, "search direction"); -			throw new RuntimeException(); -			 -		} -		 -	} -	 -	/** -	 *  -	 */ -//	public void updateAlpha(double alpha){ -//		 -//		if(alpha < 0){ -//			System.out.println("alpha may not be smaller that zero"); -//			throw new RuntimeException(); -//		} -//		 -//		nrIterations++; -//		steps.add(alpha); -//		//x_t+1 = x_t - alpha*direction -//		System.arraycopy(originalParameters, 0, parametersChange, 0, parametersChange.length); -////		MatrixOutput.printDoubleArray(parametersChange, "parameters before step"); -////		System.out.println("Step" + alpha); -//		MatrixOutput.printDoubleArray(originalGradient, "gradient + " + alpha); -// -//		MathUtils.minusEquals(parametersChange, originalGradient, alpha); -//		 -//		//Project the points into the feasibility set -////		MatrixOutput.printDoubleArray(parametersChange, "before projection"); -//		//x_k(alpha) = [x_k - alpha*grad f(x_k)]+ -//		parametersChange = projectPoint(parametersChange); -////		MatrixOutput.printDoubleArray(parametersChange, "after projection"); -//		o.setParameters(parametersChange); -//		values.add(o.getValue()); -//		//Computes the new direction x_k-[x_k-alpha*Gradient(x_k)]+ -//		 -//		direction=MathUtils.arrayMinus(parametersChange,originalParameters); -////		MatrixOutput.printDoubleArray(direction, "new direction"); -//		 -//		double gradient = MathUtils.dotProduct(originalGradient,direction); -//		gradients.add(gradient);		 -//		if(gradient > 1E-10){ -//			System.out.println("cosine " + gradient/(MathUtils.L2Norm(originalGradient)*MathUtils.L2Norm(direction))); -//			 -//			 -//			System.out.println("not a descent direction for alpha " + alpha); -//			System.arraycopy(originalParameters, 0, parametersChange, 0, parametersChange.length); -//			MathUtils.minusEquals(parametersChange, originalGradient, 1E-20); -//			 -//			parametersChange = projectPoint(parametersChange); -//			direction=MathUtils.arrayMinus(parametersChange,originalParameters); -//			gradient = MathUtils.dotProduct(originalGradient,direction); -//			if(gradient > 0){ -//				System.out.println("Direction is really non-descent evern for small alphas:" + gradient); -//			} -//			System.out.println("ProjecteLineSearchObjective: Should be a descent direction at " + nrIterations + ": "+ gradient); -////			System.out.println(Printing.doubleArrayToString(originalGradient, null,"Original gradient")); -////			System.out.println(Printing.doubleArrayToString(originalParameters, null,"Original parameters")); -////			System.out.println(Printing.doubleArrayToString(parametersChange, null,"Projected parameters")); -////			System.out.println(Printing.doubleArrayToString(direction, null,"Direction")); -//			throw new RuntimeException(); -//		} -//	} -	 -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfRuleLineSearch.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfRuleLineSearch.java deleted file mode 100644 index 5489f2d0..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfRuleLineSearch.java +++ /dev/null @@ -1,300 +0,0 @@ -package optimization.linesearch; - -import java.io.PrintStream; -import java.util.ArrayList; - -import optimization.util.Interpolation; - - - - -/** - *  - * @author javg - * - */ -public class WolfRuleLineSearch implements LineSearchMethod{ - -	GenericPickFirstStep pickFirstStep; -	 -	double c1 = 1.0E-4; -	double c2 = 0.9; -	 -	//Application dependent -	double maxStep=100; -	 -	int extrapolationIteration; -	int maxExtrapolationIteration = 1000; -	 -	 -	double minZoomDiffTresh = 10E-10; - -	 -	ArrayList<Double> steps; -	ArrayList<Double> gradientDots; -	ArrayList<Double> functionVals; -	 -	int debugLevel = 0; -	boolean foudStep = false; -	 -	public WolfRuleLineSearch(GenericPickFirstStep pickFirstStep){ -		this.pickFirstStep = pickFirstStep; -		 -	} -	 -	 - -	 -	public WolfRuleLineSearch(GenericPickFirstStep pickFirstStep,  double c1, double c2){ -		this.pickFirstStep = pickFirstStep; -		initialStep = pickFirstStep.getFirstStep(this); -		this.c1 = c1; -		this.c2 = c2; -	} -	 -	public void setDebugLevel(int level){ -		debugLevel = level; -	} -	 -	//Experiment -	double previousStepPicked = -1;; -	double previousInitGradientDot = -1; -	double currentInitGradientDot = -1; -	 -	double initialStep; - -	 -	public void reset(){ -		previousStepPicked = -1;; -		previousInitGradientDot = -1; -		currentInitGradientDot = -1; -		if(steps != null) -			steps.clear(); -		if(gradientDots != null) -			gradientDots.clear(); -		if(functionVals != null) -			functionVals.clear(); -	} -	 -	public void setInitialStep(double initial){ -		initialStep = pickFirstStep.getFirstStep(this); -	} -	 -	 -	 -	/** -	 * Implements Wolf Line search as described in nocetal. -	 * This process consists in two stages. The first stage we try to satisfy the -	 * biggest step size that still satisfies the curvature condition. We keep increasing -	 * the initial step size until we find a step satisfying the curvature condition, we return  -	 * success, we failed the sufficient increase so we cannot increase more and we can call zoom with  -	 * that maximum step, or we pass the minimum in which case we can call zoom the same way.  -	 *  -	 */ -	public double getStepSize(DifferentiableLineSearchObjective objective){ -		//System.out.println("entering line search"); -		 -		foudStep = false; -		if(debugLevel >= 1){ -			steps = new ArrayList<Double>(); -			gradientDots = new ArrayList<Double>(); -			functionVals  =new ArrayList<Double>(); -		} -		 -		//test -		currentInitGradientDot = objective.getInitialGradient(); -		 -		 -		double previousValue = objective.getCurrentValue(); -		double previousStep = 0; -		double currentStep =pickFirstStep.getFirstStep(this); -		for(extrapolationIteration = 0;  -		extrapolationIteration < maxExtrapolationIteration; extrapolationIteration++){	 -			 -			objective.updateAlpha(currentStep); -			double currentValue = objective.getCurrentValue(); -			if(debugLevel >= 1){ -				steps.add(currentStep); -				functionVals.add(currentValue); -				gradientDots.add(objective.getCurrentGradient()); -			} -			 -			 -			//The current step does not satisfy the sufficient decrease condition anymore -			// so we cannot get bigger than that calling zoom. -			if(!WolfeConditions.suficientDecrease(objective,c1)||					 -					(extrapolationIteration > 0 && currentValue >= previousValue)){ -				currentStep = zoom(objective,previousStep,currentStep,objective.nrIterations-1,objective.nrIterations); -				break; -			} -			 -			//Satisfying both conditions ready to leave -			if(WolfeConditions.sufficientCurvature(objective,c1,c2)){ -				//Found step -				foudStep = true; -				break; -			} -			 -			/** -			 * This means that we passed the minimum already since the dot product that should be  -			 * negative (descent direction) is now positive. So we cannot increase more. On the other hand -			 * since we know the direction is a descent direction the value the objective at the current step -			 * is for sure smaller than the preivous step so we change the order. -			 */ -			if(objective.getCurrentGradient() >= 0){ -				currentStep =  zoom(objective,currentStep,previousStep,objective.nrIterations,objective.nrIterations-1); -				break; -			} -			 -			 -			//Ok, so we can still get a bigger step,  -			double aux = currentStep; -			//currentStep = currentStep*2; -			if(Math.abs(currentStep-maxStep)>1.1e-2){ -				currentStep = (currentStep+maxStep)/2; -			}else{ -				currentStep = currentStep*2; -			} -			previousStep = aux; -			previousValue = currentValue; -			//Could be done better -			if(currentStep >= maxStep){ -				System.out.println("Excedded max step...calling zoom with maxStepSize"); -				currentStep = zoom(objective,previousStep,currentStep,objective.nrIterations-1,objective.nrIterations); -			} -		} -		if(!foudStep){ -			System.out.println("Wolfe Rule exceed number of iterations"); -			if(debugLevel >= 1){ -				printSmallWolfeStats(System.out); -//				System.out.println("Line search values"); -//				DebugHelpers.getLineSearchGraph(o,  direction, originalParameters,origValue, origGradDirectionDot,c1,c2);			 -			} -			return -1; -		} -		if(debugLevel >= 1){ -			printSmallWolfeStats(System.out); -		} - -		previousStepPicked = currentStep; -		previousInitGradientDot = currentInitGradientDot; -//		objective.printLineSearchSteps(); -		return currentStep; -	} -	 -	 -	 -	 -	 -	public void printWolfeStats(PrintStream out){ -		for(int i = 0; i < steps.size(); i++){		 -			out.println("Step " + steps.get(i) + " value " + functionVals.get(i) + " dot " + gradientDots.get(i)); -		} -	} -	 -	public void printSmallWolfeStats(PrintStream out){ -		for(int i = 0; i < steps.size(); i++){		 -			out.print(steps.get(i) + ":"+functionVals.get(i)+":"+gradientDots.get(i)+" "); -		} -		System.out.println(); -	} -	 -	 -	 -	/** -	 * Pick a step satisfying the strong wolfe condition from an given from lowerStep and higherStep -	 * picked on the routine above. -	 *  -	 * Both lowerStep and higherStep have been evaluated, so we only need to pass the iteration where they have -	 * been evaluated and save extra evaluations. -	 *  -	 * We know that lowerStepValue as to be smaller than higherStepValue, and that a point  -	 * satisfying both conditions exists in such interval. -	 *  -	 * LowerStep always satisfies at least the sufficient decrease -	 * @return -	 */ -	public double zoom(DifferentiableLineSearchObjective o, double lowerStep, double higherStep, -			int lowerStepIter, int higherStepIter){ -		 -		if(debugLevel >=2){ -			System.out.println("Entering zoom with " + lowerStep+"-"+higherStep); -		} -		 -		double currentStep=-1; -		 -		int zoomIter = 0; -		while(zoomIter < 1000){		 -			if(Math.abs(lowerStep-higherStep) < minZoomDiffTresh){ -				o.updateAlpha(lowerStep); -				if(debugLevel >= 1){ -					steps.add(lowerStep); -					functionVals.add(o.getCurrentValue()); -					gradientDots.add(o.getCurrentGradient()); -				} -				foudStep = true; -				return lowerStep; -			}	 -	 -			//Cubic interpolation -			currentStep =  -				Interpolation.cubicInterpolation(lowerStep, o.getValue(lowerStepIter), o.getGradient(lowerStepIter),  -						higherStep, o.getValue(higherStepIter), o.getGradient(higherStepIter)); -			 -			//Safeguard.... should not be required check in what condtions it is required -			if(currentStep < 0 ){ -				currentStep = (lowerStep+higherStep)/2; -			} -			if(Double.isNaN(currentStep) || Double.isInfinite(currentStep)){ -				currentStep = (lowerStep+higherStep)/2; -			} -//			currentStep = (lowerStep+higherStep)/2; -//			System.out.println("Trying "+currentStep); -			o.updateAlpha(currentStep); -			if(debugLevel >=1){ -				steps.add(currentStep); -				functionVals.add(o.getCurrentValue()); -				gradientDots.add(o.getCurrentGradient()); -			} -			if(!WolfeConditions.suficientDecrease(o,c1) -					|| o.getCurrentValue() >= o.getValue(lowerStepIter)){ -				higherStepIter = o.nrIterations; -				higherStep = currentStep; -			} -			//Note when entering here the new step satisfies the sufficent decrease and -			// or as a function value that is better than the previous best (lowerStepFunctionValues) -			// so we either leave or change the value of the alpha low. -			else{ -				if(WolfeConditions.sufficientCurvature(o,c1,c2)){ -					//Satisfies the both wolf conditions -					foudStep = true; -					break; -				} -				//If does not satisfy curvature  -				if(o.getCurrentGradient()*(higherStep-lowerStep) >= 0){ -					higherStep = lowerStep; -					higherStepIter = lowerStepIter; -				} -				lowerStep = currentStep; -				lowerStepIter = o.nrIterations; -			} -			zoomIter++; -		} -		return currentStep; -	} - -	public double getInitialGradient() { -		return currentInitGradientDot; -		 -	} - -	public double getPreviousInitialGradient() { -		return previousInitGradientDot; -	} - -	public double getPreviousStepUsed() { -		return previousStepPicked; -	} -	 -	 -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfeConditions.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfeConditions.java deleted file mode 100644 index dcc704eb..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfeConditions.java +++ /dev/null @@ -1,45 +0,0 @@ -package optimization.linesearch; - - -public class WolfeConditions { -	 -	/** -	 * Sufficient Increase number. Default constant -	 */ -	 -	 -	/** -	 * Value for suficient curvature: -	 * 0.9 - For newton and quase netwon methods -	 * 0.1 - Non linear conhugate gradient -	 */ -	 -	int debugLevel = 0; -	public void setDebugLevel(int level){ -		debugLevel = level; -	} -	 -	public  static boolean suficientDecrease(DifferentiableLineSearchObjective o, double c1){	 -		double value = o.getOriginalValue()+c1*o.getAlpha()*o.getInitialGradient(); -//		System.out.println("Sufficient Decrease original "+value+" new "+  o.getCurrentValue()); -		return o.getCurrentValue() <= value; -	} -	 -	 - - -	public static boolean sufficientCurvature(DifferentiableLineSearchObjective o, double c1, double c2){ -//		if(debugLevel >= 2){ -//			double current = Math.abs(o.getCurrentGradient()); -//			double orig = -c2*o.getInitialGradient(); -//			if(current <= orig){ -//				return true; -//			}else{ -//				System.out.println("Not satistfying curvature condition curvature " + current + " wants " + orig); -//				return false; -//			} -//		} -		return Math.abs(o.getCurrentGradient()) <= -c2*o.getInitialGradient(); -	} -	 -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/projections/BoundsProjection.java b/gi/posterior-regularisation/prjava/src/optimization/projections/BoundsProjection.java deleted file mode 100644 index 0429d531..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/projections/BoundsProjection.java +++ /dev/null @@ -1,104 +0,0 @@ -package optimization.projections; - - -import java.util.Random; - -import optimization.util.MathUtils; -import optimization.util.MatrixOutput; - -/** - * Implements a projection into a box set defined by a and b. - * If either a or b are infinity then that bound is ignored. - * @author javg - * - */ -public class BoundsProjection extends Projection{ - -	double a,b; -	boolean ignoreA = false; -	boolean ignoreB = false; -	public BoundsProjection(double lowerBound, double upperBound) { -		if(Double.isInfinite(lowerBound)){ -			this.ignoreA = true; -		}else{ -			this.a =lowerBound; -		} -		if(Double.isInfinite(upperBound)){ -			this.ignoreB = true; -		}else{ -			this.b =upperBound; -		} -	} -	 -	 -	 -	/** -	* Projects into the bounds -	* a <= x_i <=b -	 */ -	public void project(double[] original){ -		for (int i = 0; i < original.length; i++) { -			if(!ignoreA && original[i] < a){ -				original[i] = a; -			}else if(!ignoreB && original[i]>b){ -				original[i]=b; -			} -		} -	} -	 -	/** -	 * Generates a random number between a and b. -	 */ - -	Random r = new Random(); -	 -	public double[] samplePoint(int numParams) { -		double[] point = new double[numParams]; -		for (int i = 0; i < point.length; i++) { -			double rand = r.nextDouble(); -			if(ignoreA && ignoreB){ -				//Use const to avoid number near overflow -				point[i] = rand*(1.E100+1.E100)-1.E100; -			}else if(ignoreA){ -				point[i] = rand*(b-1.E100)-1.E100; -			}else if(ignoreB){ -				point[i] = rand*(1.E100-a)-a; -			}else{ -				point[i] = rand*(b-a)-a; -			} -		} -		return point; -	} -	 -	public static void main(String[] args) { -		BoundsProjection sp = new BoundsProjection(0,Double.POSITIVE_INFINITY); -		 -		 -		MatrixOutput.printDoubleArray(sp.samplePoint(3), "random 1"); -		MatrixOutput.printDoubleArray(sp.samplePoint(3), "random 2"); -		MatrixOutput.printDoubleArray(sp.samplePoint(3), "random 3"); -		 -		double[] d = {-1.1,1.2,1.4}; -		double[] original = d.clone(); -		MatrixOutput.printDoubleArray(d, "before"); -		 -		sp.project(d); -		MatrixOutput.printDoubleArray(d, "after"); -		System.out.println("Test projection: " + sp.testProjection(original, d)); -	} -	 -	double epsilon = 1.E-10; -	public double[] perturbePoint(double[] point, int parameter){ -		double[] newPoint = point.clone(); -		if(!ignoreA && MathUtils.almost(point[parameter], a)){ -			newPoint[parameter]+=epsilon; -		}else if(!ignoreB && MathUtils.almost(point[parameter], b)){ -			newPoint[parameter]-=epsilon; -		}else{ -			newPoint[parameter]-=epsilon; -		} -		return newPoint; -	} - -	 -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/projections/Projection.java b/gi/posterior-regularisation/prjava/src/optimization/projections/Projection.java deleted file mode 100644 index b5a9f92f..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/projections/Projection.java +++ /dev/null @@ -1,72 +0,0 @@ -package optimization.projections; - -import optimization.util.MathUtils; -import optimization.util.MatrixOutput; -import util.ArrayMath; -import util.Printing; - - - -public abstract class Projection { - -	 -	public abstract void project(double[] original); -	 -	 -	/** -	 *  From the projection theorem "Non-Linear Programming" page -	 *  201 fact 2. -	 *   -	 *  Given some z in R, and a vector x* in X; -	 *  x* = z+ iif for all x in X  -	 *  (z-x*)'(x-x*) <= 0 where 0 is when x*=x -	 *  See figure 2.16 in book -	 *   -	 * @param original -	 * @param projected -	 * @return -	 */ -	public boolean testProjection(double[] original, double[] projected){ -		double[] original1 = original.clone(); -		//System.out.println(Printing.doubleArrayToString(original1, null, "original")); -		//System.out.println(Printing.doubleArrayToString(projected, null, "projected")); -		MathUtils.minusEquals(original1, projected, 1); -		//System.out.println(Printing.doubleArrayToString(original1, null, "minus1")); -		for(int i = 0; i < 10; i++){ -			double[] x = samplePoint(original.length); -		//	System.out.println(Printing.doubleArrayToString(x, null, "sample")); -			//If the same this returns zero so we are there.	 -			MathUtils.minusEquals(x, projected, 1); -		//	System.out.println(Printing.doubleArrayToString(x, null, "minus2")); -			double dotProd = MathUtils.dotProduct(original1, x); -			 -		//	System.out.println("dot " + dotProd); -			if(dotProd > 0) return false; -		} -		 -		//Perturbs the point a bit in all possible directions -		for(int i = 0; i < original.length; i++){ -			double[] x = perturbePoint(projected,i); -		//	System.out.println(Printing.doubleArrayToString(x, null, "perturbed")); -			//If the same this returns zero so we are there.	 -			MathUtils.minusEquals(x, projected, 1); -		//	System.out.println(Printing.doubleArrayToString(x, null, "minus2")); -			double dotProd = MathUtils.dotProduct(original1, x); -			 -		//	System.out.println("dot " + dotProd); -			if(dotProd > 0) return false; -		} -		 -		 -		 -		return true; -	} - -	//Samples a point from the constrained set -	public abstract double[] samplePoint(int dimensions); -	 -	//Perturbs a point a bit still leaving it at the constraints set -	public abstract double[] perturbePoint(double[] point, int parameter); -	 -	 -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/projections/SimplexProjection.java b/gi/posterior-regularisation/prjava/src/optimization/projections/SimplexProjection.java deleted file mode 100644 index f22afcaf..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/projections/SimplexProjection.java +++ /dev/null @@ -1,127 +0,0 @@ -package optimization.projections; - - - -import java.util.Random; - -import optimization.util.MathUtils; -import optimization.util.MatrixOutput; - -public class SimplexProjection extends Projection{ - -	double scale; -	public SimplexProjection(double scale) { -		this.scale = scale; -	} -	 -	/** -	 * projects the numbers of the array  -	 * into a simplex of size.  -	 * We follow the description of the paper -	 * "Efficient Projetions onto the l1-Ball -	 * for learning in high dimensions" -	 */ -	public void project(double[] original){ -		double[] ds = new double[original.length]; -		System.arraycopy(original, 0, ds, 0, ds.length); -		//If sum is smaller then zero then its ok -		for (int i = 0; i < ds.length; i++) ds[i] = ds[i]>0? ds[i]:0; -		double sum = MathUtils.sum(ds); -		if (scale - sum >=  -1.E-10 ){ -			System.arraycopy(ds, 0, original, 0, ds.length); -			//System.out.println("Not projecting"); -			return; -		} -		//System.out.println("projecting " + sum + " scontraints " + scale);	 -		util.Array.sortDescending(ds); -		double currentSum = 0; -		double previousTheta = 0; -		double theta = 0; -		for (int i = 0; i < ds.length; i++) { -			currentSum+=ds[i]; -			theta = (currentSum-scale)/(i+1); -			if(ds[i]-theta < -1e-10){ -				break; -			} -			previousTheta = theta; -		} -		//DEBUG -		if(previousTheta < 0){ -			System.out.println("Simple Projection: Theta is smaller than zero: " + previousTheta); -			System.exit(-1); -		} -		for (int i = 0; i < original.length; i++) { -			original[i] = Math.max(original[i]-previousTheta, 0); -		} -	} -	 -	 -	 -	 -	 - -	/** -	 * Samples a point from the simplex of scale. Just sample -	 * random number from 0-scale and then if -	 * their sum is bigger then sum make them normalize. -	 * This is probably not sampling uniformly from the simplex but it is -	 * enough for our goals in here. -	 */ -	Random r = new Random(); -	public double[] samplePoint(int dimensions) { -		double[] newPoint = new double[dimensions]; -		double sum =0; -		for (int i = 0; i < newPoint.length; i++) { -			double rand = r.nextDouble()*scale; -			sum+=rand; -			newPoint[i]=rand; -		} -		//Normalize -		if(sum > scale){ -			for (int i = 0; i < newPoint.length; i++) { -				newPoint[i]=scale*newPoint[i]/sum; -			} -		} -		return newPoint; -	} -	 -	public static void main(String[] args) { -		SimplexProjection sp = new SimplexProjection(1); -		 -		 -		double[] point = sp.samplePoint(3); -		MatrixOutput.printDoubleArray(point , "random 1 sum:" + MathUtils.sum(point)); -		point = sp.samplePoint(3); -		MatrixOutput.printDoubleArray(point , "random 2 sum:" + MathUtils.sum(point)); -		point = sp.samplePoint(3); -		MatrixOutput.printDoubleArray(point , "random 3 sum:" + MathUtils.sum(point)); -		 -		double[] d = {0,1.1,-10}; -		double[] original = d.clone(); -		MatrixOutput.printDoubleArray(d, "before"); -		 -		sp.project(d); -		MatrixOutput.printDoubleArray(d, "after"); -		System.out.println("Test projection: " + sp.testProjection(original, d)); -		 -	} -	 -	 -	double epsilon = 1.E-10; -	public double[] perturbePoint(double[] point, int parameter){ -		double[] newPoint = point.clone(); -		if(MathUtils.almost(MathUtils.sum(point), scale)){ -			newPoint[parameter]-=epsilon; -		} -		else if(point[parameter]==0){ -			newPoint[parameter]+=epsilon; -		}else if(MathUtils.almost(point[parameter], scale)){ -			newPoint[parameter]-=epsilon; -		} -		else{ -			newPoint[parameter]-=epsilon; -		} -		return newPoint; -	} -	 -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/CompositeStopingCriteria.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/CompositeStopingCriteria.java deleted file mode 100644 index 15760f18..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/CompositeStopingCriteria.java +++ /dev/null @@ -1,33 +0,0 @@ -package optimization.stopCriteria; - -import java.util.ArrayList; - -import optimization.gradientBasedMethods.Objective; - -public class CompositeStopingCriteria implements StopingCriteria { -	 -	ArrayList<StopingCriteria> criterias; -	 -	public CompositeStopingCriteria() { -		criterias = new ArrayList<StopingCriteria>(); -	} -	 -	public void add(StopingCriteria criteria){ -		criterias.add(criteria); -	} -	 -	public boolean stopOptimization(Objective obj){ -		for(StopingCriteria criteria: criterias){ -			if(criteria.stopOptimization(obj)){ -				return true; -			} -		} -		return false; -	} -	 -	public void reset(){ -		for(StopingCriteria criteria: criterias){ -			criteria.reset(); -		} -	} -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/GradientL2Norm.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/GradientL2Norm.java deleted file mode 100644 index 534ff833..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/GradientL2Norm.java +++ /dev/null @@ -1,30 +0,0 @@ -package optimization.stopCriteria; - -import optimization.gradientBasedMethods.Objective; -import optimization.util.MathUtils; - -public class GradientL2Norm implements StopingCriteria{ -	 -	/** -	 * Stop if gradientNorm/(originalGradientNorm) smaller -	 * than gradientConvergenceValue -	 */ -	protected double gradientConvergenceValue; -	 -	 -	public GradientL2Norm(double gradientConvergenceValue){ -		this.gradientConvergenceValue = gradientConvergenceValue; -	} -	 -	public void reset(){} -	 -	public boolean stopOptimization(Objective obj){ -		double norm = MathUtils.L2Norm(obj.gradient); -		if(norm < gradientConvergenceValue){ -			System.out.println("Gradient norm below treshold"); -			return true; -		} -		return false; -		 -	} -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedGradientL2Norm.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedGradientL2Norm.java deleted file mode 100644 index 4a489641..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedGradientL2Norm.java +++ /dev/null @@ -1,48 +0,0 @@ -package optimization.stopCriteria; - -import optimization.gradientBasedMethods.Objective; -import optimization.gradientBasedMethods.ProjectedObjective; -import optimization.util.MathUtils; - -/** - * Divides the norm by the norm at the begining of the iteration - * @author javg - * - */ -public class NormalizedGradientL2Norm extends GradientL2Norm{ -	 -	/** -	 * Stop if gradientNorm/(originalGradientNorm) smaller -	 * than gradientConvergenceValue -	 */ -	protected double originalGradientNorm = -1; -	 -	public void reset(){ -		originalGradientNorm = -1; -	} -	public NormalizedGradientL2Norm(double gradientConvergenceValue){ -		super(gradientConvergenceValue); -	} -	 -	 -	  -	 -	public boolean stopOptimization(Objective obj){ -			double norm = MathUtils.L2Norm(obj.gradient); -			if(originalGradientNorm == -1){ -				originalGradientNorm = norm; -			} -			if(originalGradientNorm < 1E-10){ -				System.out.println("Gradient norm is zero " +  originalGradientNorm); -				return true; -			} -			double normalizedNorm = 1.0*norm/originalGradientNorm; -			if( normalizedNorm < gradientConvergenceValue){ -				System.out.println("Gradient norm below normalized normtreshold: " + norm + " original: " + originalGradientNorm + " normalized norm: " + normalizedNorm); -				return true; -			}else{ -//				System.out.println("projected gradient norm: " + norm); -				return false; -			} -	} -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedProjectedGradientL2Norm.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedProjectedGradientL2Norm.java deleted file mode 100644 index 5ae554c2..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedProjectedGradientL2Norm.java +++ /dev/null @@ -1,60 +0,0 @@ -package optimization.stopCriteria; - -import optimization.gradientBasedMethods.Objective; -import optimization.gradientBasedMethods.ProjectedObjective; -import optimization.util.MathUtils; - -/** - * Divides the norm by the norm at the begining of the iteration - * @author javg - * - */ -public class NormalizedProjectedGradientL2Norm extends ProjectedGradientL2Norm{ -	 -	/** -	 * Stop if gradientNorm/(originalGradientNorm) smaller -	 * than gradientConvergenceValue -	 */ -	double originalProjectedNorm = -1; -	 -	public NormalizedProjectedGradientL2Norm(double gradientConvergenceValue){ -		super(gradientConvergenceValue); -	} -	 -	public void reset(){ -		originalProjectedNorm = -1; -	} -	 -	 -	 double[] projectGradient(ProjectedObjective obj){ -		 -		if(obj.auxParameters == null){ -			obj.auxParameters = new double[obj.getNumParameters()]; -		} -		System.arraycopy(obj.getParameters(), 0, obj.auxParameters, 0, obj.getNumParameters()); -		MathUtils.minusEquals(obj.auxParameters, obj.gradient, 1); -		obj.auxParameters = obj.projectPoint(obj.auxParameters); -		MathUtils.minusEquals(obj.auxParameters,obj.getParameters(),1); -		return obj.auxParameters; -	} -	 -	public boolean stopOptimization(Objective obj){ -		if(obj instanceof ProjectedObjective) { -			ProjectedObjective o = (ProjectedObjective) obj; -			double norm = MathUtils.L2Norm(projectGradient(o)); -			if(originalProjectedNorm == -1){ -				originalProjectedNorm = norm; -			} -			double normalizedNorm = 1.0*norm/originalProjectedNorm; -			if( normalizedNorm < gradientConvergenceValue){ -				System.out.println("Gradient norm below normalized normtreshold: " + norm + " original: " + originalProjectedNorm + " normalized norm: " + normalizedNorm); -				return true; -			}else{ -//				System.out.println("projected gradient norm: " + norm); -				return false; -			} -		} -		System.out.println("Not a projected objective"); -		throw new RuntimeException(); -	} -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedValueDifference.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedValueDifference.java deleted file mode 100644 index 6dbbc50d..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedValueDifference.java +++ /dev/null @@ -1,54 +0,0 @@ -package optimization.stopCriteria; - -import optimization.gradientBasedMethods.Objective; -import optimization.util.MathUtils; - -public class NormalizedValueDifference implements StopingCriteria{ -	 -	/** -	 * Stop if the different between values is smaller than a treshold -	 */ -	protected double valueConvergenceValue=0.01; -	protected double previousValue = Double.NaN; -	protected double currentValue = Double.NaN; -	 -	public NormalizedValueDifference(double valueConvergenceValue){ -		this.valueConvergenceValue = valueConvergenceValue; -	} -	 -	public void reset(){ -		previousValue = Double.NaN; -		currentValue = Double.NaN; -	} - -	 -	public boolean stopOptimization(Objective obj){ -		if(Double.isNaN(currentValue)){ -			currentValue = obj.getValue(); -			return false; -		}else { -			previousValue = currentValue; -			currentValue = obj.getValue(); -			if(previousValue != 0){ -				double valueDiff = Math.abs(previousValue - currentValue)/Math.abs(previousValue); -				if( valueDiff  < valueConvergenceValue){ -					System.out.println("Leaving different in values is to small: Prev "  -							+ (previousValue/previousValue) + " Curr: " + (currentValue/previousValue)  -							+ " diff: " + valueDiff); -					return true; -				} -			}else{ -				double valueDiff = Math.abs(previousValue - currentValue); -				if( valueDiff  < valueConvergenceValue){ -					System.out.println("Leaving different in values is to small: Prev "  -							+ (previousValue) + " Curr: " + (currentValue)  -							+ " diff: " + valueDiff); -					return true; -				} -			} - -			return false; -		} -		 -	} -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ProjectedGradientL2Norm.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ProjectedGradientL2Norm.java deleted file mode 100644 index aadf1fd5..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ProjectedGradientL2Norm.java +++ /dev/null @@ -1,51 +0,0 @@ -package optimization.stopCriteria; - -import optimization.gradientBasedMethods.Objective; -import optimization.gradientBasedMethods.ProjectedObjective; -import optimization.util.MathUtils; - -public class ProjectedGradientL2Norm implements StopingCriteria{ -	 -	/** -	 * Stop if gradientNorm/(originalGradientNorm) smaller -	 * than gradientConvergenceValue -	 */ -	protected double gradientConvergenceValue; -	 -	 -	public ProjectedGradientL2Norm(double gradientConvergenceValue){ -		this.gradientConvergenceValue = gradientConvergenceValue; -	} -	 -	public void reset(){ -		 -	} -	 -	 double[] projectGradient(ProjectedObjective obj){ -		 -		if(obj.auxParameters == null){ -			obj.auxParameters = new double[obj.getNumParameters()]; -		} -		System.arraycopy(obj.getParameters(), 0, obj.auxParameters, 0, obj.getNumParameters()); -		MathUtils.minusEquals(obj.auxParameters, obj.gradient, 1); -		obj.auxParameters = obj.projectPoint(obj.auxParameters); -		MathUtils.minusEquals(obj.auxParameters,obj.getParameters(),1); -		return obj.auxParameters; -	} -	 -	public boolean stopOptimization(Objective obj){ -		if(obj instanceof ProjectedObjective) { -			ProjectedObjective o = (ProjectedObjective) obj; -			double norm = MathUtils.L2Norm(projectGradient(o)); -			if(norm < gradientConvergenceValue){ -	//			System.out.println("Gradient norm below treshold: " + norm); -				return true; -			}else{ -//				System.out.println("projected gradient norm: " + norm); -				return false; -			} -		} -		System.out.println("Not a projected objective"); -		throw new RuntimeException(); -	} -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/StopingCriteria.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/StopingCriteria.java deleted file mode 100644 index 10cf0522..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/StopingCriteria.java +++ /dev/null @@ -1,8 +0,0 @@ -package optimization.stopCriteria; - -import optimization.gradientBasedMethods.Objective; - -public interface StopingCriteria { -	public boolean stopOptimization(Objective obj); -	public void reset(); -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ValueDifference.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ValueDifference.java deleted file mode 100644 index e5d07229..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ValueDifference.java +++ /dev/null @@ -1,41 +0,0 @@ -package optimization.stopCriteria; - -import optimization.gradientBasedMethods.Objective; -import optimization.util.MathUtils; - -public class ValueDifference implements StopingCriteria{ -	 -	/** -	 * Stop if the different between values is smaller than a treshold -	 */ -	protected double valueConvergenceValue=0.01; -	protected double previousValue = Double.NaN; -	protected double currentValue = Double.NaN; -	 -	public ValueDifference(double valueConvergenceValue){ -		this.valueConvergenceValue = valueConvergenceValue; -	} -	 -	public void reset(){ -		previousValue = Double.NaN; -		currentValue = Double.NaN; -	} -	 -	public boolean stopOptimization(Objective obj){ -		if(Double.isNaN(currentValue)){ -			currentValue = obj.getValue(); -			return false; -		}else { -			previousValue = currentValue; -			currentValue = obj.getValue(); -			if(previousValue - currentValue   < valueConvergenceValue){ -//				System.out.println("Leaving different in values is to small: Prev "  -//						+ previousValue + " Curr: " + currentValue  -//						+ " diff: " + (previousValue - currentValue)); -				return true; -			} -			return false; -		} -		 -	} -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/util/Interpolation.java b/gi/posterior-regularisation/prjava/src/optimization/util/Interpolation.java deleted file mode 100644 index cdbdefc6..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/util/Interpolation.java +++ /dev/null @@ -1,37 +0,0 @@ -package optimization.util; - -public class Interpolation { - -	/** -	 * Fits a cubic polinomyal to a function given two points, -	 * such that either gradB is bigger than zero or funcB >= funcA -	 *  -	 * NonLinear Programming appendix C -	 * @param funcA -	 * @param gradA -	 * @param funcB -	 * @param gradB -	 */ -	public final static double cubicInterpolation(double a,  -			double funcA, double gradA, double b,double funcB, double gradB ){ -		if(gradB < 0 && funcA > funcB){ -			System.out.println("Cannot call cubic interpolation"); -			return -1; -		} -		 -		double z = 3*(funcA-funcB)/(b-a) + gradA + gradB; -		double w = Math.sqrt(z*z - gradA*gradB); -		double min = b -(gradB+w-z)*(b-a)/(gradB-gradA+2*w); -		return min; -	} -	 -	public final static double quadraticInterpolation(double initFValue,  -			double initGrad, double point,double pointFValue){ -				double min = -1*initGrad*point*point/(2*(pointFValue-initGrad*point-initFValue)); -		return min; -	} -	 -	public static void main(String[] args) { -		 -	} -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/util/Logger.java b/gi/posterior-regularisation/prjava/src/optimization/util/Logger.java deleted file mode 100644 index 5343a39b..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/util/Logger.java +++ /dev/null @@ -1,7 +0,0 @@ -package optimization.util; - -public class Logger { -	 -	 -	 -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/util/MathUtils.java b/gi/posterior-regularisation/prjava/src/optimization/util/MathUtils.java deleted file mode 100644 index af66f82c..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/util/MathUtils.java +++ /dev/null @@ -1,339 +0,0 @@ -package optimization.util; - -import java.util.Arrays; - - - -public class MathUtils { -	 -	/** -	 *  -	 * @param vector -	 * @return -	 */ -	public static double L2Norm(double[] vector){ -		double value = 0; -		for(int i = 0; i < vector.length; i++){ -			double v = vector[i]; -			value+=v*v; -		} -		return Math.sqrt(value); -	} -	 -	public static double sum(double[] v){ -		double sum = 0; -		for (int i = 0; i < v.length; i++) { -			sum+=v[i]; -		} -		return sum; -	} -	 -	 -	 -	 -	/** -	 * w = w + v -	 * @param w -	 * @param v -	 */ -	public static void plusEquals(double[] w, double[] v) { -		for(int i=0; i<w.length;i++){ -			w[i] += w[i] + v[i]; -		}		 -	} -	 -	/** -	 * w[i] = w[i] + v -	 * @param w -	 * @param v -	 */ -	public static void plusEquals(double[] w, double v) { -		for(int i=0; i<w.length;i++){ -			w[i] += w[i] + v; -		}		 -	} -	 -	/** -	 * w[i] = w[i] - v -	 * @param w -	 * @param v -	 */ -	public static void minusEquals(double[] w, double v) { -		for(int i=0; i<w.length;i++){ -			w[i] -= w[i] + v; -		}		 -	} -	 -	/** -	 * w = w + a*v -	 * @param w -	 * @param v -	 * @param a -	 */ -	public static void plusEquals(double[] w, double[] v, double a) { -		for(int i=0; i<w.length;i++){ -			w[i] += a*v[i]; -		}		 -	} -	 -	/** -	 * w = w - a*v -	 * @param w -	 * @param v -	 * @param a -	 */ -	public static void minusEquals(double[] w, double[] v, double a) { -		for(int i=0; i<w.length;i++){ -			w[i] -= a*v[i]; -		}		 -	} -	/** -	 * v = w - a*v -	 * @param w -	 * @param v -	 * @param a -	 */ -	public static void minusEqualsInverse(double[] w, double[] v, double a) { -		for(int i=0; i<w.length;i++){ -			v[i] = w[i] - a*v[i]; -		}		 -	} -	 -	public static double dotProduct(double[] w, double[] v){ -		double accum = 0; -		for(int i=0; i<w.length;i++){ -			accum += w[i]*v[i]; -		} -		return accum; -	} -	 -	public static double[] arrayMinus(double[]w, double[]v){ -		double result[] = w.clone(); -		for(int i=0; i<w.length;i++){ -			result[i] -= v[i]; -		} -		return result; -	} -	 -	public static double[] arrayMinus(double[] result , double[]w, double[]v){ -		for(int i=0; i<w.length;i++){ -			result[i] = w[i]-v[i]; -		} -		return result; -	} -	 -	public static double[] negation(double[]w){ -		double result[]  = new double[w.length]; -		for(int i=0; i<w.length;i++){ -			result[i] = -w[i]; -		} -		return result; -	} -	 -	public static double square(double value){ -		return value*value; -	} -	public static double[][] outerProduct(double[] w, double[] v){ -		double[][] result = new double[w.length][v.length]; -		for(int i = 0; i < w.length; i++){ -			for(int j = 0; j < v.length; j++){ -				result[i][j] = w[i]*v[j]; -			} -		} -		return result; -	} -	/** -	 * results = a*W*V -	 * @param w -	 * @param v -	 * @param a -	 * @return -	 */ -	public static double[][] weightedouterProduct(double[] w, double[] v, double a){ -		double[][] result = new double[w.length][v.length]; -		for(int i = 0; i < w.length; i++){ -			for(int j = 0; j < v.length; j++){ -				result[i][j] = a*w[i]*v[j]; -			} -		} -		return result; -	} -	 -	public static double[][] identity(int size){ -		double[][] result = new double[size][size]; -		for(int i = 0; i < size; i++){ -			result[i][i] = 1; -		} -		return result; -	} -	 -	/** -	 * v -= w -	 * @param v -	 * @param w -	 */ -	public static void minusEquals(double[][] w, double[][] v){ -		for(int i = 0; i < w.length; i++){ -			for(int j = 0; j < w[0].length; j++){ -				w[i][j] -= v[i][j]; -			} -		} -	} -	 -	/** -	 * v[i][j] -= a*w[i][j] -	 * @param v -	 * @param w -	 */ -	public static void minusEquals(double[][] w, double[][] v, double a){ -		for(int i = 0; i < w.length; i++){ -			for(int j = 0; j < w[0].length; j++){ -				w[i][j] -= a*v[i][j]; -			} -		} -	} -	 -	/** -	 * v += w -	 * @param v -	 * @param w -	 */ -	public static void plusEquals(double[][] w, double[][] v){ -		for(int i = 0; i < w.length; i++){ -			for(int j = 0; j < w[0].length; j++){ -				w[i][j] += v[i][j]; -			} -		} -	} -	 -	/** -	 * v[i][j] += a*w[i][j] -	 * @param v -	 * @param w -	 */ -	public static void plusEquals(double[][] w, double[][] v, double a){ -		for(int i = 0; i < w.length; i++){ -			for(int j = 0; j < w[0].length; j++){ -				w[i][j] += a*v[i][j]; -			} -		} -	} -	 -	 -	/** -	 * results = w*v -	 * @param w -	 * @param v -	 * @return -	 */ -	public static  double[][] matrixMultiplication(double[][] w,double[][] v){ -		int w1 = w.length; -		int w2 = w[0].length; -		int v1 = v.length; -		int v2 = v[0].length; -		 -		if(w2 != v1){ -			System.out.println("Matrix dimensions do not agree..."); -			System.exit(-1); -		} -		 -		double[][] result = new double[w1][v2]; -		for(int w_i1 = 0; w_i1 < w1; w_i1++){ -			for(int v_i2 = 0; v_i2 < v2; v_i2++){ -				double sum = 0; -				for(int w_i2 = 0; w_i2 < w2; w_i2++){ -						sum += w[w_i1 ][w_i2]*v[w_i2][v_i2];	 -				} -				result[w_i1][v_i2] = sum; -			} -		} -		return result; -	} -	 -	/** -	 * w = w.*v -	 * @param w -	 * @param v -	 */ -	public static  void matrixScalarMultiplication(double[][] w,double v){ -		int w1 = w.length; -		int w2 = w[0].length;	 -		for(int w_i1 = 0; w_i1 < w1; w_i1++){ -				for(int w_i2 = 0; w_i2 < w2; w_i2++){ -						w[w_i1 ][w_i2] *= v;	 -				} -		} -	} -	 -	public static  void scalarMultiplication(double[] w,double v){ -		int w1 = w.length; -		for(int w_i1 = 0; w_i1 < w1; w_i1++){ -			w[w_i1 ] *= v;	 -		} -		 -	} -	 -	public static  double[] matrixVector(double[][] w,double[] v){ -		int w1 = w.length; -		int w2 = w[0].length; -		int v1 = v.length; -		 -		if(w2 != v1){ -			System.out.println("Matrix dimensions do not agree..."); -			System.exit(-1); -		} -		 -		double[] result = new double[w1]; -		for(int w_i1 = 0; w_i1 < w1; w_i1++){ -				double sum = 0; -				for(int w_i2 = 0; w_i2 < w2; w_i2++){ -						sum += w[w_i1 ][w_i2]*v[w_i2];	 -				} -				result[w_i1] = sum; -		} -		return result; -	} -	 -	public static boolean allPositive(double[] array){ -		for (int i = 0; i < array.length; i++) { -			if(array[i] < 0) return false; -		} -		return true; -	} -	 -	 -	 -	 -	 -		public static void main(String[] args) { -			double[][] m1 = new double[2][2]; -			m1[0][0]=2; -			m1[1][0]=2; -			m1[0][1]=2; -			m1[1][1]=2; -			MatrixOutput.printDoubleArray(m1, "m1"); -			double[][] m2 = new double[2][2]; -			m2[0][0]=3; -			m2[1][0]=3; -			m2[0][1]=3; -			m2[1][1]=3; -			MatrixOutput.printDoubleArray(m2, "m2"); -			double[][] result = matrixMultiplication(m1, m2); -			MatrixOutput.printDoubleArray(result, "result"); -			matrixScalarMultiplication(result, 3); -			MatrixOutput.printDoubleArray(result, "result after multiply by 3"); -		} -	 -		public static boolean almost(double a, double b, double prec){ -			return Math.abs(a-b)/Math.abs(a+b) <= prec || (almostZero(a) && almostZero(b)); -		} - -		public static boolean almost(double a, double b){ -			return Math.abs(a-b)/Math.abs(a+b) <= 1e-10 || (almostZero(a) && almostZero(b)); -		} - -		public static boolean almostZero(double a) { -			return Math.abs(a) <= 1e-30; -		} -	 -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/util/MatrixOutput.java b/gi/posterior-regularisation/prjava/src/optimization/util/MatrixOutput.java deleted file mode 100644 index 9fbdf955..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/util/MatrixOutput.java +++ /dev/null @@ -1,28 +0,0 @@ -package optimization.util; - - -public class MatrixOutput { -	public static void printDoubleArray(double[][] array, String arrayName) { -		int size1 = array.length; -		int size2 = array[0].length; -		System.out.println(arrayName); -		for (int i = 0; i < size1; i++) { -			for (int j = 0; j < size2; j++) { -				System.out.print(" " + StaticTools.prettyPrint(array[i][j], -						"00.00E00", 4) + " "); - -			} -			System.out.println(); -		} -		System.out.println(); -	} -	 -	public static void printDoubleArray(double[] array, String arrayName) { -		System.out.println(arrayName); -		for (int i = 0; i < array.length; i++) { -				System.out.print(" " + StaticTools.prettyPrint(array[i], -						"00.00E00", 4) + " "); -		} -		System.out.println(); -	} -} diff --git a/gi/posterior-regularisation/prjava/src/optimization/util/StaticTools.java b/gi/posterior-regularisation/prjava/src/optimization/util/StaticTools.java deleted file mode 100644 index bcabee06..00000000 --- a/gi/posterior-regularisation/prjava/src/optimization/util/StaticTools.java +++ /dev/null @@ -1,180 +0,0 @@ -package optimization.util; - - -import java.io.File; -import java.io.PrintStream; - -public class StaticTools { - -	static java.text.DecimalFormat fmt = new java.text.DecimalFormat(); - -	public static void createDir(String directory) { - -		File dir = new File(directory); -		if (!dir.isDirectory()) { -			boolean success = dir.mkdirs(); -			if (!success) { -				System.out.println("Unable to create directory " + directory); -				System.exit(0); -			} -			System.out.println("Created directory " + directory); -		} else { -			System.out.println("Reusing directory " + directory); -		} -	} - -	/* -	 * q and p are indexed by source/foreign Sum_S(q) = 1 the same for p KL(q,p) = -	 * Eq*q/p -	 */ -	public static double KLDistance(double[][] p, double[][] q, int sourceSize, -			int foreignSize) { -		double totalKL = 0; -		// common.StaticTools.printMatrix(q, sourceSize, foreignSize, "q", -		// System.out); -		// common.StaticTools.printMatrix(p, sourceSize, foreignSize, "p", -		// System.out); -		for (int i = 0; i < sourceSize; i++) { -			double kl = 0; -			for (int j = 0; j < foreignSize; j++) { -				assert !Double.isNaN(q[i][j]) : "KLDistance q:  prob is NaN"; -				assert !Double.isNaN(p[i][j]) : "KLDistance p:  prob is NaN"; -				if (p[i][j] == 0 || q[i][j] == 0) { -					continue; -				} else { -					kl += q[i][j] * Math.log(q[i][j] / p[i][j]); -				} - -			} -			totalKL += kl; -		} -		assert !Double.isNaN(totalKL) : "KLDistance: prob is NaN"; -		if (totalKL < -1.0E-10) { -			System.out.println("KL Smaller than zero " + totalKL); -			System.out.println("Source Size" + sourceSize); -			System.out.println("Foreign Size" + foreignSize); -			StaticTools.printMatrix(q, sourceSize, foreignSize, "q", -					System.out); -			StaticTools.printMatrix(p, sourceSize, foreignSize, "p", -					System.out); -			System.exit(-1); -		} -		return totalKL / sourceSize; -	} - -	/* -	 * indexed the by [fi][si] -	 */ -	public static double KLDistancePrime(double[][] p, double[][] q, -			int sourceSize, int foreignSize) { -		double totalKL = 0; -		for (int i = 0; i < sourceSize; i++) { -			double kl = 0; -			for (int j = 0; j < foreignSize; j++) { -				assert !Double.isNaN(q[j][i]) : "KLDistance q:  prob is NaN"; -				assert !Double.isNaN(p[j][i]) : "KLDistance p:  prob is NaN"; -				if (p[j][i] == 0 || q[j][i] == 0) { -					continue; -				} else { -					kl += q[j][i] * Math.log(q[j][i] / p[j][i]); -				} - -			} -			totalKL += kl; -		} -		assert !Double.isNaN(totalKL) : "KLDistance: prob is NaN"; -		return totalKL / sourceSize; -	} - -	public static double Entropy(double[][] p, int sourceSize, int foreignSize) { -		double totalE = 0; -		for (int i = 0; i < foreignSize; i++) { -			double e = 0; -			for (int j = 0; j < sourceSize; j++) { -				e += p[i][j] * Math.log(p[i][j]); -			} -			totalE += e; -		} -		return totalE / sourceSize; -	} - -	public static double[][] copyMatrix(double[][] original, int sourceSize, -			int foreignSize) { -		double[][] result = new double[sourceSize][foreignSize]; -		for (int i = 0; i < sourceSize; i++) { -			for (int j = 0; j < foreignSize; j++) { -				result[i][j] = original[i][j]; -			} -		} -		return result; -	} - -	public static void printMatrix(double[][] matrix, int sourceSize, -			int foreignSize, String info, PrintStream out) { - -		java.text.DecimalFormat fmt = new java.text.DecimalFormat(); -		fmt.setMaximumFractionDigits(3); -		fmt.setMaximumIntegerDigits(3); -		fmt.setMinimumFractionDigits(3); -		fmt.setMinimumIntegerDigits(3); - -		out.println(info); - -		for (int i = 0; i < foreignSize; i++) { -			for (int j = 0; j < sourceSize; j++) { -				out.print(prettyPrint(matrix[j][i], ".00E00", 6) + " "); -			} -			out.println(); -		} -		out.println(); -		out.println(); -	} - -	public static void printMatrix(int[][] matrix, int sourceSize, -			int foreignSize, String info, PrintStream out) { - -		out.println(info); -		for (int i = 0; i < foreignSize; i++) { -			for (int j = 0; j < sourceSize; j++) { -				out.print(matrix[j][i] + " "); -			} -			out.println(); -		} -		out.println(); -		out.println(); -	} - -	public static String formatTime(long duration) { -		StringBuilder sb = new StringBuilder(); -		double d = duration / 1000; -		fmt.applyPattern("00"); -		sb.append(fmt.format((int) (d / (60 * 60))) + ":"); -		d -= ((int) d / (60 * 60)) * 60 * 60; -		sb.append(fmt.format((int) (d / 60)) + ":"); -		d -= ((int) d / 60) * 60; -		fmt.applyPattern("00.0"); -		sb.append(fmt.format(d)); -		return sb.toString(); -	} - -	public static String prettyPrint(double d, String patt, int len) { -		fmt.applyPattern(patt); -		String s = fmt.format(d); -		while (s.length() < len) { -			s = " " + s; -		} -		return s; -	} -	 -	 -	public static long getUsedMemory(){ -		System.gc(); -		return (Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory())/ (1024 * 1024); -	} -	 -	public final static boolean compareDoubles(double d1, double d2){ -		return Math.abs(d1-d2) <= 1.E-10; -	} -	 -	 -} diff --git a/gi/posterior-regularisation/prjava/src/phrase/Agree.java b/gi/posterior-regularisation/prjava/src/phrase/Agree.java deleted file mode 100644 index 8f7b499e..00000000 --- a/gi/posterior-regularisation/prjava/src/phrase/Agree.java +++ /dev/null @@ -1,204 +0,0 @@ -package phrase;
 -
 -import gnu.trove.TIntArrayList;
 -
 -import io.FileUtil;
 -
 -import java.io.File;
 -import java.io.IOException;
 -import java.io.PrintStream;
 -import java.util.List;
 -
 -import phrase.Corpus.Edge;
 -
 -public class Agree {
 -	PhraseCluster model1;
 -	C2F model2;
 -	Corpus c;
 -	private int K,n_phrases, n_words, n_contexts, n_positions1,n_positions2;
 -	
 -	/**@brief sum of loglikelihood of two
 -	 * individual models
 -	 */
 -	public double llh;
 -	/**@brief Bhattacharyya distance
 -	 * 
 -	 */
 -	public double bdist; 
 -	/**
 -	 * 
 -	 * @param numCluster
 -	 * @param corpus
 -	 */
 -	public Agree(int numCluster, Corpus corpus){
 -		
 -		model1=new PhraseCluster(numCluster, corpus);
 -		model2=new C2F(numCluster,corpus);
 -		c=corpus;
 -		n_words=c.getNumWords();
 -		n_phrases=c.getNumPhrases();
 -		n_contexts=c.getNumContexts();
 -		n_positions1=c.getNumContextPositions();
 -		n_positions2=2;
 -		K=numCluster;
 -		
 -	}
 -	
 -	/**@brief test
 -	 * 
 -	 */
 -	public static void main(String args[]){
 -		//String in="../pdata/canned.con";
 -		String in="../pdata/btec.con";
 -		String out="../pdata/posterior.out";
 -		int numCluster=25;
 -		Corpus corpus = null;
 -		File infile = new File(in);
 -		try {
 -			System.out.println("Reading concordance from " + infile);
 -			corpus = Corpus.readFromFile(FileUtil.reader(infile));
 -			corpus.printStats(System.out);
 -		} catch (IOException e) {
 -			System.err.println("Failed to open input file: " + infile);
 -			e.printStackTrace();
 -			System.exit(1);
 -		}
 -		
 -		Agree agree=new Agree(numCluster, corpus);
 -		int iter=20;
 -		for(int i=0;i<iter;i++){
 -			agree.EM();
 -			System.out.println("Iter"+i+", llh: "+agree.llh+
 -					", divergence:"+agree.bdist+
 -							" sum: "+(agree.llh+agree.bdist));
 -		}
 -		
 -		File outfile = new File (out);
 -		try {
 -			PrintStream ps = FileUtil.printstream(outfile);
 -			agree.displayPosterior(ps);
 -		//	ps.println();
 -		//	c2f.displayModelParam(ps);
 -			ps.close();
 -		} catch (IOException e) {
 -			System.err.println("Failed to open output file: " + outfile);
 -			e.printStackTrace();
 -			System.exit(1);
 -		}
 -		
 -	}
 -	
 -	public double EM(){
 -		
 -		double [][][]exp_emit1=new double [K][n_positions1][n_words];
 -		double [][]exp_pi1=new double[n_phrases][K];
 -		
 -		double [][][]exp_emit2=new double [K][n_positions2][n_words];
 -		double [][]exp_pi2=new double[n_contexts][K];
 -		
 -		llh=0;
 -		bdist=0;
 -		//E
 -		for(int context=0; context< n_contexts; context++){
 -			
 -			List<Edge> contexts = c.getEdgesForContext(context);
 -
 -			for (int ctx=0; ctx<contexts.size(); ctx++){
 -				Edge edge = contexts.get(ctx);
 -				int phrase=edge.getPhraseId();
 -				double p[]=posterior(edge);
 -				double z = arr.F.l1norm(p);
 -				assert z > 0;
 -				bdist += edge.getCount() * Math.log(z);
 -				arr.F.l1normalize(p);
 -				
 -				double count = edge.getCount();
 -				//increment expected count
 -				TIntArrayList phraseToks = edge.getPhrase();
 -				TIntArrayList contextToks = edge.getContext();
 -				for(int tag=0;tag<K;tag++){
 -
 -					for(int position=0;position<n_positions1;position++){
 -						exp_emit1[tag][position][contextToks.get(position)]+=p[tag]*count;
 -					}
 -					
 -					exp_emit2[tag][0][phraseToks.get(0)]+=p[tag]*count;
 -					exp_emit2[tag][1][phraseToks.get(phraseToks.size()-1)]+=p[tag]*count;
 -					
 -					exp_pi1[phrase][tag]+=p[tag]*count;
 -					exp_pi2[context][tag]+=p[tag]*count;
 -				}
 -			}
 -		}
 -		
 -		//System.out.println("Log likelihood: "+loglikelihood);
 -		
 -		//M
 -		for(double [][]i:exp_emit1){
 -			for(double []j:i){
 -				arr.F.l1normalize(j);
 -			}
 -		}
 -		
 -		for(double []j:exp_pi1){
 -			arr.F.l1normalize(j);
 -		}
 -		
 -		for(double [][]i:exp_emit2){
 -			for(double []j:i){
 -				arr.F.l1normalize(j);
 -			}
 -		}
 -		
 -		for(double []j:exp_pi2){
 -			arr.F.l1normalize(j);
 -		}
 -		
 -		model1.emit=exp_emit1;
 -		model1.pi=exp_pi1;
 -		model2.emit=exp_emit2;
 -		model2.pi=exp_pi2;
 -		
 -		return llh;
 -	}
 -
 -	public double[] posterior(Corpus.Edge edge) 
 -	{
 -		double[] prob1=model1.posterior(edge);
 -		double[] prob2=model2.posterior(edge);
 -		
 -		llh+=edge.getCount()*Math.log(arr.F.l1norm(prob1));
 -		llh+=edge.getCount()*Math.log(arr.F.l1norm(prob2));
 -		arr.F.l1normalize(prob1);
 -		arr.F.l1normalize(prob2);
 -		
 -		for(int i=0;i<prob1.length;i++){
 -			prob1[i]*=prob2[i];
 -			prob1[i]=Math.sqrt(prob1[i]);
 -		}
 -		
 -		return prob1;
 -	}
 -	
 -	public void displayPosterior(PrintStream ps)
 -	{	
 -		displayPosterior(ps, c.getEdges());
 -	}
 -	
 -	public void displayPosterior(PrintStream ps, List<Edge> test)
 -	{	
 -		for (Edge edge : test)
 -		{
 -			double probs[] = posterior(edge);
 -			arr.F.l1normalize(probs);
 -
 -			// emit phrase
 -			ps.print(edge.getPhraseString());
 -			ps.print("\t");
 -			ps.print(edge.getContextString(true));
 -			int t=arr.F.argmax(probs);
 -			ps.println(" ||| C=" + t);
 -		}
 -	}
 -	
 -}
 diff --git a/gi/posterior-regularisation/prjava/src/phrase/Agree2Sides.java b/gi/posterior-regularisation/prjava/src/phrase/Agree2Sides.java deleted file mode 100644 index 031f887f..00000000 --- a/gi/posterior-regularisation/prjava/src/phrase/Agree2Sides.java +++ /dev/null @@ -1,197 +0,0 @@ -package phrase;
 -
 -import gnu.trove.TIntArrayList;
 -
 -import io.FileUtil;
 -
 -import java.io.File;
 -import java.io.IOException;
 -import java.io.PrintStream;
 -import java.util.List;
 -
 -import phrase.Corpus.Edge;
 -
 -public class Agree2Sides {
 -	PhraseCluster model1,model2;
 -	Corpus c1,c2;
 -	private int K;
 -	
 -	/**@brief sum of loglikelihood of two
 -	 * individual models
 -	 */
 -	public double llh;
 -	/**@brief Bhattacharyya distance
 -	 * 
 -	 */
 -	public double bdist; 
 -	/**
 -	 * 
 -	 * @param numCluster
 -	 * @param corpus
 -	 */
 -	public Agree2Sides(int numCluster, Corpus corpus1 , Corpus corpus2 ){
 -		
 -		model1=new PhraseCluster(numCluster, corpus1);
 -		model2=new PhraseCluster(numCluster,corpus2);
 -		c1=corpus1;
 -		c2=corpus2;
 -		K=numCluster;
 -		
 -	}
 -	
 -	/**@brief test
 -	 * 
 -	 */
 -	public static void main(String args[]){
 -		//String in="../pdata/canned.con";
 -	//	String in="../pdata/btec.con";
 -		String in1="../pdata/source.txt";
 -		String in2="../pdata/target.txt";
 -		String out="../pdata/posterior.out";
 -		int numCluster=25;
 -		Corpus corpus1 = null,corpus2=null;
 -		File infile1 = new File(in1),infile2=new File(in2);
 -		try {
 -			System.out.println("Reading concordance from " + infile1);
 -			corpus1 = Corpus.readFromFile(FileUtil.reader(infile1));
 -			System.out.println("Reading concordance from " + infile2);
 -			corpus2 = Corpus.readFromFile(FileUtil.reader(infile2));
 -			corpus1.printStats(System.out);
 -		} catch (IOException e) {
 -			System.err.println("Failed to open input file: " + infile1);
 -			e.printStackTrace();
 -			System.exit(1);
 -		}
 -		
 -		Agree2Sides agree=new Agree2Sides(numCluster, corpus1,corpus2);
 -		int iter=20;
 -		for(int i=0;i<iter;i++){
 -			agree.EM();
 -			System.out.println("Iter"+i+", llh: "+agree.llh+
 -					", divergence:"+agree.bdist+
 -							" sum: "+(agree.llh+agree.bdist));
 -		}
 -		
 -		File outfile = new File (out);
 -		try {
 -			PrintStream ps = FileUtil.printstream(outfile);
 -			agree.displayPosterior(ps);
 -		//	ps.println();
 -		//	c2f.displayModelParam(ps);
 -			ps.close();
 -		} catch (IOException e) {
 -			System.err.println("Failed to open output file: " + outfile);
 -			e.printStackTrace();
 -			System.exit(1);
 -		}
 -		
 -	}
 -	
 -	public double EM(){
 -		
 -		double [][][]exp_emit1=new double [K][c1.getNumContextPositions()][c1.getNumWords()];
 -		double [][]exp_pi1=new double[c1.getNumPhrases()][K];
 -		
 -		double [][][]exp_emit2=new double [K][c2.getNumContextPositions()][c2.getNumWords()];
 -		double [][]exp_pi2=new double[c2.getNumPhrases()][K];
 -		
 -		llh=0;
 -		bdist=0;
 -		//E
 -		for(int i=0;i<c1.getEdges().size();i++){
 -			Edge edge1=c1.getEdges().get(i);
 -			Edge edge2=c2.getEdges().get(i);
 -			double p[]=posterior(i);
 -			double z = arr.F.l1norm(p);
 -			assert z > 0;
 -			bdist += edge1.getCount() * Math.log(z);
 -			arr.F.l1normalize(p);
 -			double count = edge1.getCount();
 -				//increment expected count
 -			TIntArrayList contextToks1 = edge1.getContext();
 -			TIntArrayList contextToks2 = edge2.getContext();
 -			int phrase1=edge1.getPhraseId();
 -			int phrase2=edge2.getPhraseId();
 -			for(int tag=0;tag<K;tag++){
 -				for(int position=0;position<c1.getNumContextPositions();position++){
 -					exp_emit1[tag][position][contextToks1.get(position)]+=p[tag]*count;
 -				}
 -				for(int position=0;position<c2.getNumContextPositions();position++){
 -					exp_emit2[tag][position][contextToks2.get(position)]+=p[tag]*count;
 -				}
 -				exp_pi1[phrase1][tag]+=p[tag]*count;
 -				exp_pi2[phrase2][tag]+=p[tag]*count;
 -			}
 -		}
 -		
 -		//System.out.println("Log likelihood: "+loglikelihood);
 -		
 -		//M
 -		for(double [][]i:exp_emit1){
 -			for(double []j:i){
 -				arr.F.l1normalize(j);
 -			}
 -		}
 -		
 -		for(double []j:exp_pi1){
 -			arr.F.l1normalize(j);
 -		}
 -		
 -		for(double [][]i:exp_emit2){
 -			for(double []j:i){
 -				arr.F.l1normalize(j);
 -			}
 -		}
 -		
 -		for(double []j:exp_pi2){
 -			arr.F.l1normalize(j);
 -		}
 -		
 -		model1.emit=exp_emit1;
 -		model1.pi=exp_pi1;
 -		model2.emit=exp_emit2;
 -		model2.pi=exp_pi2;
 -		
 -		return llh;
 -	}
 -
 -	public double[] posterior(int edgeIdx) 
 -	{
 -		return posterior(c1.getEdges().get(edgeIdx), c2.getEdges().get(edgeIdx));
 -	}
 -	
 -	public double[] posterior(Edge e1, Edge e2) 
 -	{
 -		double[] prob1=model1.posterior(e1);
 -		double[] prob2=model2.posterior(e2);
 -		
 -		llh+=e1.getCount()*Math.log(arr.F.l1norm(prob1));
 -		llh+=e2.getCount()*Math.log(arr.F.l1norm(prob2));
 -		arr.F.l1normalize(prob1);
 -		arr.F.l1normalize(prob2);
 -		
 -		for(int i=0;i<prob1.length;i++){
 -			prob1[i]*=prob2[i];
 -			prob1[i]=Math.sqrt(prob1[i]);
 -		}
 -		
 -		return prob1;
 -	}
 -	
 -	public void displayPosterior(PrintStream ps)
 -	{	
 -		for (int i=0;i<c1.getEdges().size();i++)
 -		{
 -			Edge edge=c1.getEdges().get(i);
 -			double probs[] = posterior(i);
 -			arr.F.l1normalize(probs);
 -
 -			// emit phrase
 -			ps.print(edge.getPhraseString());
 -			ps.print("\t");
 -			ps.print(edge.getContextString(true));
 -			int t=arr.F.argmax(probs);
 -			ps.println(" ||| C=" + t);
 -		}
 -	}
 -}
 diff --git a/gi/posterior-regularisation/prjava/src/phrase/C2F.java b/gi/posterior-regularisation/prjava/src/phrase/C2F.java deleted file mode 100644 index e8783950..00000000 --- a/gi/posterior-regularisation/prjava/src/phrase/C2F.java +++ /dev/null @@ -1,216 +0,0 @@ -package phrase;
 -
 -import gnu.trove.TIntArrayList;
 -
 -import io.FileUtil;
 -
 -import java.io.File;
 -import java.io.IOException;
 -import java.io.PrintStream;
 -import java.util.Arrays;
 -import java.util.List;
 -
 -import phrase.Corpus.Edge;
 -
 -/**
 - * @brief context generates phrase
 - * @author desaic
 - *
 - */
 -public class C2F {
 -	public int K;
 -	private int n_words, n_contexts, n_positions;
 -	public Corpus c;
 -	
 -	/**@brief
 -	 *  emit[tag][position][word] = p(word | tag, position in phrase)
 -	 */
 -	public double emit[][][];
 -	/**@brief
 -	 *  pi[context][tag] = p(tag | context)
 -	 */
 -	public double pi[][];
 -	
 -	public C2F(int numCluster, Corpus corpus){
 -		K=numCluster;
 -		c=corpus;
 -		n_words=c.getNumWords();
 -		n_contexts=c.getNumContexts();
 -		
 -		//number of words in a phrase to be considered
 -		//currently the first and last word in source and target
 -		//if the phrase has length 1 in either dimension then
 -		//we use the same word for two positions
 -		n_positions=c.phraseEdges(c.getEdges().get(0).getPhrase()).size();
 -		
 -		emit=new double [K][n_positions][n_words];
 -		pi=new double[n_contexts][K];
 -		
 -		for(double [][]i:emit){
 -			for(double []j:i){
 -				arr.F.randomise(j);
 -			}
 -		}
 -		
 -		for(double []j:pi){
 -			arr.F.randomise(j);
 -		}
 -	}
 -	
 -	/**@brief test
 -	 * 
 -	 */
 -	public static void main(String args[]){
 -		String in="../pdata/canned.con";
 -		String out="../pdata/posterior.out";
 -		int numCluster=25;
 -		Corpus corpus = null;
 -		File infile = new File(in);
 -		try {
 -			System.out.println("Reading concordance from " + infile);
 -			corpus = Corpus.readFromFile(FileUtil.reader(infile));
 -			corpus.printStats(System.out);
 -		} catch (IOException e) {
 -			System.err.println("Failed to open input file: " + infile);
 -			e.printStackTrace();
 -			System.exit(1);
 -		}
 -		
 -		C2F c2f=new C2F(numCluster,corpus);
 -		int iter=20;
 -		double llh=0;
 -		for(int i=0;i<iter;i++){
 -			llh=c2f.EM();
 -			System.out.println("Iter"+i+", llh: "+llh);
 -		}
 -		
 -		File outfile = new File (out);
 -		try {
 -			PrintStream ps = FileUtil.printstream(outfile);
 -			c2f.displayPosterior(ps);
 -		//	ps.println();
 -		//	c2f.displayModelParam(ps);
 -			ps.close();
 -		} catch (IOException e) {
 -			System.err.println("Failed to open output file: " + outfile);
 -			e.printStackTrace();
 -			System.exit(1);
 -		}
 -		
 -	}
 -	
 -	public double EM(){
 -		double [][][]exp_emit=new double [K][n_positions][n_words];
 -		double [][]exp_pi=new double[n_contexts][K];
 -		
 -		double loglikelihood=0;
 -		
 -		//E
 -		for(int context=0; context< n_contexts; context++){
 -			
 -			List<Edge> contexts = c.getEdgesForContext(context);
 -
 -			for (int ctx=0; ctx<contexts.size(); ctx++){
 -				Edge edge = contexts.get(ctx);
 -				double p[]=posterior(edge);
 -				double z = arr.F.l1norm(p);
 -				assert z > 0;
 -				loglikelihood += edge.getCount() * Math.log(z);
 -				arr.F.l1normalize(p);
 -				
 -				double count = edge.getCount();
 -				//increment expected count
 -				TIntArrayList phrase= edge.getPhrase();
 -				for(int tag=0;tag<K;tag++){
 -
 -					exp_emit[tag][0][phrase.get(0)]+=p[tag]*count;
 -					exp_emit[tag][1][phrase.get(phrase.size()-1)]+=p[tag]*count;
 -					
 -					exp_pi[context][tag]+=p[tag]*count;
 -				}
 -			}
 -		}
 -		
 -		//System.out.println("Log likelihood: "+loglikelihood);
 -		
 -		//M
 -		for(double [][]i:exp_emit){
 -			for(double []j:i){
 -				arr.F.l1normalize(j);
 -			}
 -		}
 -		
 -		emit=exp_emit;
 -		
 -		for(double []j:exp_pi){
 -			arr.F.l1normalize(j);
 -		}
 -		
 -		pi=exp_pi;
 -		
 -		return loglikelihood;
 -	}
 -
 -	public double[] posterior(Corpus.Edge edge) 
 -	{
 -		double[] prob=Arrays.copyOf(pi[edge.getContextId()], K);
 -		
 -		TIntArrayList phrase = edge.getPhrase();
 -		TIntArrayList offsets = c.phraseEdges(phrase);
 -		for(int tag=0;tag<K;tag++)
 -		{
 -			for (int i=0; i < offsets.size(); ++i)
 -				prob[tag]*=emit[tag][i][phrase.get(offsets.get(i))];
 -		}
 -			
 -		return prob;
 -	}
 -
 -	public void displayPosterior(PrintStream ps)
 -	{	
 -		for (Edge edge : c.getEdges())
 -		{
 -			double probs[] = posterior(edge);
 -			arr.F.l1normalize(probs);
 -
 -			// emit phrase
 -			ps.print(edge.getPhraseString());
 -			ps.print("\t");
 -			ps.print(edge.getContextString(true));
 -			int t=arr.F.argmax(probs);
 -			ps.println(" ||| C=" + t);
 -		}
 -	}
 -	
 -	public void displayModelParam(PrintStream ps)
 -	{
 -		final double EPS = 1e-6;
 -		
 -		ps.println("P(tag|context)");
 -		for (int i = 0; i < n_contexts; ++i)
 -		{
 -			ps.print(c.getContext(i));
 -			for(int j=0;j<pi[i].length;j++){
 -				if (pi[i][j] > EPS)
 -					ps.print("\t" + j + ": " + pi[i][j]);
 -			}
 -			ps.println();
 -		}
 -		
 -		ps.println("P(word|tag,position)");
 -		for (int i = 0; i < K; ++i)
 -		{
 -			for(int position=0;position<n_positions;position++){
 -				ps.println("tag " + i + " position " + position);
 -				for(int word=0;word<emit[i][position].length;word++){
 -					if (emit[i][position][word] > EPS)
 -						ps.print(c.getWord(word)+"="+emit[i][position][word]+"\t");
 -				}
 -				ps.println();
 -			}
 -			ps.println();
 -		}
 -		
 -	}
 -	
 -}
 diff --git a/gi/posterior-regularisation/prjava/src/phrase/Corpus.java b/gi/posterior-regularisation/prjava/src/phrase/Corpus.java deleted file mode 100644 index 4b1939cd..00000000 --- a/gi/posterior-regularisation/prjava/src/phrase/Corpus.java +++ /dev/null @@ -1,288 +0,0 @@ -package phrase; - -import gnu.trove.TIntArrayList; - -import java.io.*; -import java.util.*; -import java.util.regex.Pattern; - - -public class Corpus -{ -	private Lexicon<String> wordLexicon = new Lexicon<String>(); -	private Lexicon<TIntArrayList> phraseLexicon = new Lexicon<TIntArrayList>(); -	private Lexicon<TIntArrayList> contextLexicon = new Lexicon<TIntArrayList>(); -	private List<Edge> edges = new ArrayList<Edge>(); -	private List<List<Edge>> phraseToContext = new ArrayList<List<Edge>>(); -	private List<List<Edge>> contextToPhrase = new ArrayList<List<Edge>>(); -	public int splitSentinel; -	public int phraseSentinel; -	public int rareSentinel; - -	public Corpus() -	{ -		splitSentinel = wordLexicon.insert("<SPLIT>"); -		phraseSentinel = wordLexicon.insert("<PHRASE>");		 -		rareSentinel = wordLexicon.insert("<RARE>"); -	} -	 -	public class Edge -	{ -		 -		Edge(int phraseId, int contextId, double count,int tag) -		{ -			this.phraseId = phraseId; -			this.contextId = contextId; -			this.count = count; -			fixTag=tag; -		} -		 -		Edge(int phraseId, int contextId, double count) -		{ -			this.phraseId = phraseId; -			this.contextId = contextId; -			this.count = count; -			fixTag=-1; -		} -		public int getTag(){ -			return fixTag; -		} -		 -		public int getPhraseId() -		{ -			return phraseId; -		} -		public TIntArrayList getPhrase() -		{ -			return Corpus.this.getPhrase(phraseId); -		} -		public String getPhraseString() -		{ -			return Corpus.this.getPhraseString(phraseId); -		}		 -		public int getContextId() -		{ -			return contextId; -		} -		public TIntArrayList getContext() -		{ -			return Corpus.this.getContext(contextId); -		} -		public String getContextString(boolean insertPhraseSentinel) -		{ -			return Corpus.this.getContextString(contextId, insertPhraseSentinel); -		} -		public double getCount() -		{ -			return count; -		} -		public boolean equals(Object other) -		{ -			if (other instanceof Edge)  -			{ -				Edge oe = (Edge) other; -				return oe.phraseId == phraseId && oe.contextId == contextId;  -			} -			else return false; -		} -		public int hashCode() -		{   // this is how boost's hash_combine does it -			int seed = phraseId; -			seed ^= contextId + 0x9e3779b9 + (seed << 6) + (seed >> 2); -			return seed; -		} -		public String toString() -		{ -			return getPhraseString() + "\t" + getContextString(true); -		} -		 -		private int phraseId; -		private int contextId; -		private double count; -		private int fixTag; -	} - -	List<Edge> getEdges() -	{ -		return edges; -	} -	 -	int getNumEdges() -	{ -		return edges.size(); -	} - -	int getNumPhrases() -	{ -		return phraseLexicon.size(); -	} -	 -	int getNumContextPositions() -	{ -		return contextLexicon.lookup(0).size(); -	} -	 -	List<Edge> getEdgesForPhrase(int phraseId) -	{ -		return phraseToContext.get(phraseId); -	} -	 -	int getNumContexts() -	{ -		return contextLexicon.size(); -	} -	 -	List<Edge> getEdgesForContext(int contextId) -	{ -		return contextToPhrase.get(contextId); -	} -	 -	int getNumWords() -	{ -		return wordLexicon.size(); -	} -	 -	String getWord(int wordId) -	{ -		return wordLexicon.lookup(wordId); -	} -	 -	public TIntArrayList getPhrase(int phraseId) -	{ -		return phraseLexicon.lookup(phraseId); -	} -	 -	public String getPhraseString(int phraseId) -	{ -		StringBuffer b = new StringBuffer(); -		for (int tid: getPhrase(phraseId).toNativeArray()) -		{ -			if (b.length() > 0) -				b.append(" "); -			b.append(wordLexicon.lookup(tid)); -		} -		return b.toString(); -	}		 -	 -	public TIntArrayList getContext(int contextId) -	{ -		return contextLexicon.lookup(contextId); -	} -	 -	public String getContextString(int contextId, boolean insertPhraseSentinel) -	{ -		StringBuffer b = new StringBuffer(); -		TIntArrayList c = getContext(contextId); -		for (int i = 0; i < c.size(); ++i) -		{ -			if (i > 0) b.append(" "); -			//if (i == c.size() / 2) b.append("<PHRASE> "); -			b.append(wordLexicon.lookup(c.get(i))); -		} -		return b.toString(); -	} -	 -	public boolean isSentinel(int wordId) -	{ -		return wordId == splitSentinel || wordId == phraseSentinel; -	} -	 -	List<Edge> readEdges(Reader in) throws IOException -	{	 -		// read in line-by-line -		BufferedReader bin = new BufferedReader(in); -		String line; -		Pattern separator = Pattern.compile(" \\|\\|\\| "); -		 -		List<Edge> edges = new ArrayList<Edge>(); -		while ((line = bin.readLine()) != null) -		{ -			// split into phrase and contexts -			StringTokenizer st = new StringTokenizer(line, "\t"); -			assert (st.hasMoreTokens()); -			String phraseToks = st.nextToken(); -			assert (st.hasMoreTokens()); -			String rest = st.nextToken(); -			assert (!st.hasMoreTokens()); - -			// process phrase	 -			st = new StringTokenizer(phraseToks, " "); -			TIntArrayList ptoks = new TIntArrayList(); -			while (st.hasMoreTokens()) -				ptoks.add(wordLexicon.insert(st.nextToken())); -			int phraseId = phraseLexicon.insert(ptoks); -			 -			// process contexts -			String[] parts = separator.split(rest); -			assert (parts.length % 2 == 0); -			for (int i = 0; i < parts.length; i += 2) -			{ -				// process pairs of strings - context and count -				String ctxString = parts[i]; -				String countString = parts[i + 1]; - -				assert (countString.startsWith("C=")); - -				String []countToks=countString.split(" "); -				 -				double count = Double.parseDouble(countToks[0].substring(2).trim()); -				 -				TIntArrayList ctx = new TIntArrayList(); -				StringTokenizer ctxStrtok = new StringTokenizer(ctxString, " "); -				while (ctxStrtok.hasMoreTokens()) -				{ -					String token = ctxStrtok.nextToken(); -					ctx.add(wordLexicon.insert(token)); -				} -				int contextId = contextLexicon.insert(ctx); - - -				if(countToks.length<2){ -					edges.add(new Edge(phraseId, contextId, count)); -				} -				else{ -					int tag=Integer.parseInt(countToks[1].substring(2)); -					edges.add(new Edge(phraseId, contextId, count,tag)); -				} -			} -		} -		return edges; -	} -	 -	static Corpus readFromFile(Reader in) throws IOException -	{	 -		Corpus c = new Corpus(); -		c.edges = c.readEdges(in); -		for (Edge edge: c.edges) -		{ -			while (edge.getPhraseId() >= c.phraseToContext.size()) -				c.phraseToContext.add(new ArrayList<Edge>()); -			while (edge.getContextId() >= c.contextToPhrase.size()) -				c.contextToPhrase.add(new ArrayList<Edge>()); -			 -			// index the edge for fast phrase, context lookup -			c.phraseToContext.get(edge.getPhraseId()).add(edge); -			c.contextToPhrase.get(edge.getContextId()).add(edge); -		} -		return c; -	} -		 -	TIntArrayList phraseEdges(TIntArrayList phrase) -	{ -		TIntArrayList r = new TIntArrayList(4); -		for (int p = 0; p < phrase.size(); ++p) -		{ -			if (p == 0 || phrase.get(p-1) == splitSentinel) 				 -				r.add(p); -			if (p == phrase.size() - 1 || phrase.get(p+1) == splitSentinel)  -				r.add(p); -		} -		return r; -	} - -	public void printStats(PrintStream out)  -	{ -		out.println("Corpus has " + edges.size() + " edges " + phraseLexicon.size() + " phrases "  -				+ contextLexicon.size() + " contexts and " + wordLexicon.size() + " word types"); -	} -}
\ No newline at end of file diff --git a/gi/posterior-regularisation/prjava/src/phrase/Lexicon.java b/gi/posterior-regularisation/prjava/src/phrase/Lexicon.java deleted file mode 100644 index a386e4a3..00000000 --- a/gi/posterior-regularisation/prjava/src/phrase/Lexicon.java +++ /dev/null @@ -1,34 +0,0 @@ -package phrase; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -public class Lexicon<T> -{ -	public int insert(T word) -	{ -		Integer i = wordToIndex.get(word); -		if (i == null) -		{ -			i = indexToWord.size(); -			wordToIndex.put(word, i); -			indexToWord.add(word); -		} -		return i; -	} - -	public T lookup(int index) -	{ -		return indexToWord.get(index); -	} - -	public int size() -	{ -		return indexToWord.size(); -	} - -	private Map<T, Integer> wordToIndex = new HashMap<T, Integer>(); -	private List<T> indexToWord = new ArrayList<T>(); -}
\ No newline at end of file diff --git a/gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java b/gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java deleted file mode 100644 index c032bb2b..00000000 --- a/gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java +++ /dev/null @@ -1,540 +0,0 @@ -package phrase;
 -
 -import gnu.trove.TIntArrayList;
 -import org.apache.commons.math.special.Gamma;
 -
 -import java.io.BufferedReader;
 -import java.io.IOException;
 -import java.io.PrintStream;
 -import java.util.ArrayList;
 -import java.util.Arrays;
 -import java.util.List;
 -import java.util.concurrent.Callable;
 -import java.util.concurrent.ExecutionException;
 -import java.util.concurrent.ExecutorService;
 -import java.util.concurrent.Executors;
 -import java.util.concurrent.Future;
 -import java.util.concurrent.LinkedBlockingQueue;
 -import java.util.concurrent.atomic.AtomicInteger;
 -import java.util.concurrent.atomic.AtomicLong;
 -import java.util.regex.Pattern;
 -
 -import phrase.Corpus.Edge;
 -
 -
 -public class PhraseCluster {
 -	
 -	public int K;
 -	private int n_phrases, n_words, n_contexts, n_positions;
 -	public Corpus c;
 -	public ExecutorService pool; 
 -
 -	double[] lambdaPTCT;
 -	double[][] lambdaPT;
 -	boolean cacheLambda = true;
 -
 -	// emit[tag][position][word] = p(word | tag, position in context)
 -	double emit[][][];
 -	// pi[phrase][tag] = p(tag | phrase)
 -	double pi[][];
 -	
 -	public PhraseCluster(int numCluster, Corpus corpus)
 -	{
 -		K=numCluster;
 -		c=corpus;
 -		n_words=c.getNumWords();
 -		n_phrases=c.getNumPhrases();
 -		n_contexts=c.getNumContexts();
 -		n_positions=c.getNumContextPositions();
 -
 -		emit=new double [K][n_positions][n_words];
 -		pi=new double[n_phrases][K];
 -		
 -		for(double [][]i:emit)
 -			for(double []j:i)
 -				arr.F.randomise(j, true);
 -
 -		for(double []j:pi)
 -			arr.F.randomise(j, true);
 -	}
 -	
 -	void useThreadPool(ExecutorService pool)
 -	{
 -		this.pool = pool;
 -	}
 -
 -	public double EM(int phraseSizeLimit)
 -	{
 -		double [][][]exp_emit=new double [K][n_positions][n_words];
 -		double []exp_pi=new double[K];
 -		
 -		for(double [][]i:exp_emit)
 -			for(double []j:i)
 -				Arrays.fill(j, 1e-10);
 -		
 -		double loglikelihood=0;
 -		
 -		//E
 -		for(int phrase=0; phrase < n_phrases; phrase++)
 -		{
 -			if (phraseSizeLimit >= 1 && c.getPhrase(phrase).size() > phraseSizeLimit)
 -				continue;
 -
 -			Arrays.fill(exp_pi, 1e-10);
 -			
 -			List<Edge> contexts = c.getEdgesForPhrase(phrase);
 -
 -			for (int ctx=0; ctx<contexts.size(); ctx++)
 -			{
 -				Edge edge = contexts.get(ctx);
 -				
 -				double p[]=posterior(edge);
 -				double z = arr.F.l1norm(p);
 -				assert z > 0;
 -				loglikelihood += edge.getCount() * Math.log(z);
 -				arr.F.l1normalize(p);
 -				
 -				double count = edge.getCount();
 -				//increment expected count
 -				TIntArrayList context = edge.getContext();
 -				for(int tag=0;tag<K;tag++)
 -				{
 -					for(int pos=0;pos<n_positions;pos++){
 -						exp_emit[tag][pos][context.get(pos)]+=p[tag]*count;
 -					}
 -					exp_pi[tag]+=p[tag]*count;
 -				}
 -			}
 -			arr.F.l1normalize(exp_pi);
 -			System.arraycopy(exp_pi, 0, pi[phrase], 0, K);
 -		}
 -
 -		//M
 -		for(double [][]i:exp_emit)
 -			for(double []j:i)
 -				arr.F.l1normalize(j);
 -			
 -		emit=exp_emit;
 -
 -		return loglikelihood;
 -	}
 -	
 -	public double PREM(double scalePT, double scaleCT, int phraseSizeLimit)
 -	{
 -		if (scaleCT == 0)
 -		{
 -			if (pool != null)
 -				return PREM_phrase_constraints_parallel(scalePT, phraseSizeLimit);
 -			else
 -				return PREM_phrase_constraints(scalePT, phraseSizeLimit);
 -		}
 -		else // FIXME: ignores phraseSizeLimit
 -			return this.PREM_phrase_context_constraints(scalePT, scaleCT);
 -	}
 -
 -	
 -	public double PREM_phrase_constraints(double scalePT, int phraseSizeLimit)
 -	{
 -		double [][][]exp_emit=new double[K][n_positions][n_words];
 -		double []exp_pi=new double[K];
 -		
 -		for(double [][]i:exp_emit)
 -			for(double []j:i)
 -				Arrays.fill(j, 1e-10);
 -		
 -		if (lambdaPT == null && cacheLambda)
 -			lambdaPT = new double[n_phrases][];
 -		
 -		double loglikelihood=0, kl=0, l1lmax=0, primal=0;
 -		int failures=0, iterations=0;
 -		long start = System.currentTimeMillis();
 -		//E
 -		for(int phrase=0; phrase<n_phrases; phrase++)
 -		{
 -			if (phraseSizeLimit >= 1 && c.getPhrase(phrase).size() > phraseSizeLimit)
 -			{
 -				//System.arraycopy(pi[phrase], 0, exp_pi[phrase], 0, K);
 -				continue;
 -			}
 -			
 -			Arrays.fill(exp_pi, 1e-10);
 -			
 -			// FIXME: add rare edge check to phrase objective & posterior processing
 -			PhraseObjective po = new PhraseObjective(this, phrase, scalePT, (cacheLambda) ? lambdaPT[phrase] : null);
 -			boolean ok = po.optimizeWithProjectedGradientDescent();
 -			if (!ok) ++failures;
 -			if (cacheLambda) lambdaPT[phrase] = po.getParameters();
 -			iterations += po.getNumberUpdateCalls();
 -			double [][] q=po.posterior();
 -			loglikelihood += po.loglikelihood();
 -			kl += po.KL_divergence();
 -			l1lmax += po.l1lmax();
 -			primal += po.primal(scalePT);
 -			List<Edge> edges = c.getEdgesForPhrase(phrase);
 -
 -			for(int edge=0;edge<q.length;edge++){
 -				Edge e = edges.get(edge);
 -				TIntArrayList context = e.getContext();
 -				double contextCnt = e.getCount();
 -				//increment expected count
 -				for(int tag=0;tag<K;tag++){
 -					for(int pos=0;pos<n_positions;pos++){
 -						exp_emit[tag][pos][context.get(pos)]+=q[edge][tag]*contextCnt;
 -					}
 -					
 -					exp_pi[tag]+=q[edge][tag]*contextCnt;
 -					
 -				}
 -			}
 -			arr.F.l1normalize(exp_pi);
 -			System.arraycopy(exp_pi, 0, pi[phrase], 0, K);
 -		}
 -		
 -		long end = System.currentTimeMillis();
 -		if (failures > 0)
 -			System.out.println("WARNING: failed to converge in " + failures + "/" + n_phrases + " cases");
 -		System.out.println("\tmean iters:     " + iterations/(double)n_phrases + " elapsed time " + (end - start) / 1000.0);
 -		System.out.println("\tllh:            " + loglikelihood);
 -		System.out.println("\tKL:             " + kl);
 -		System.out.println("\tphrase l1lmax:  " + l1lmax);
 -		
 -		//M
 -		for(double [][]i:exp_emit)
 -			for(double []j:i)
 -				arr.F.l1normalize(j);
 -		emit=exp_emit;
 -		
 -		return primal;
 -	}
 -
 -	public double PREM_phrase_constraints_parallel(final double scalePT, int phraseSizeLimit)
 -	{
 -		assert(pool != null);
 -		
 -		final LinkedBlockingQueue<PhraseObjective> expectations 
 -			= new LinkedBlockingQueue<PhraseObjective>();
 -		
 -		double [][][]exp_emit=new double [K][n_positions][n_words];
 -		double [][]exp_pi=new double[n_phrases][K];
 -		
 -		for(double [][]i:exp_emit)
 -			for(double []j:i)
 -				Arrays.fill(j, 1e-10);
 -		for(double []j:exp_pi)
 -			Arrays.fill(j, 1e-10);
 -		
 -		double loglikelihood=0, kl=0, l1lmax=0, primal=0;
 -		final AtomicInteger failures = new AtomicInteger(0);
 -		final AtomicLong elapsed = new AtomicLong(0l);
 -		int iterations=0;
 -		long start = System.currentTimeMillis();
 -		List<Future<PhraseObjective>> results = new ArrayList<Future<PhraseObjective>>();
 -		
 -		if (lambdaPT == null && cacheLambda)
 -			lambdaPT = new double[n_phrases][];
 -
 -		//E
 -		for(int phrase=0;phrase<n_phrases;phrase++) {
 -			if (phraseSizeLimit >= 1 && c.getPhrase(phrase).size() > phraseSizeLimit) {
 -				System.arraycopy(pi[phrase], 0, exp_pi[phrase], 0, K);
 -				continue;
 -			}
 -
 -			final int p=phrase;
 -			results.add(pool.submit(new Callable<PhraseObjective>() {
 -				public PhraseObjective call() {
 -					//System.out.println("" + Thread.currentThread().getId() + " optimising lambda for " + p);
 -					long start = System.currentTimeMillis();
 -					PhraseObjective po = new PhraseObjective(PhraseCluster.this, p, scalePT, (cacheLambda) ? lambdaPT[p] : null);
 -					boolean ok = po.optimizeWithProjectedGradientDescent();
 -					if (!ok) failures.incrementAndGet();
 -					long end = System.currentTimeMillis();
 -					elapsed.addAndGet(end - start);
 -					//System.out.println("" + Thread.currentThread().getId() + " done optimising lambda for " + p);
 -					return po;
 -				}
 -			}));
 -		}
 -		
 -		// aggregate the expectations as they become available
 -		for (Future<PhraseObjective> fpo : results)
 -		{
 -			try {
 -				//System.out.println("" + Thread.currentThread().getId() + " reading queue #" + count);
 -
 -				// wait (blocking) until something is ready
 -				PhraseObjective po = fpo.get();
 -				// process
 -				int phrase = po.phrase;
 -				if (cacheLambda) lambdaPT[phrase] = po.getParameters();
 -				//System.out.println("" + Thread.currentThread().getId() + " taken phrase " + phrase);
 -				double [][] q=po.posterior();
 -				loglikelihood += po.loglikelihood();
 -				kl += po.KL_divergence();
 -				l1lmax += po.l1lmax();
 -				primal += po.primal(scalePT);
 -				iterations += po.getNumberUpdateCalls();
 -
 -				List<Edge> edges = c.getEdgesForPhrase(phrase);
 -				for(int edge=0;edge<q.length;edge++){
 -					Edge e = edges.get(edge);
 -					TIntArrayList context = e.getContext();
 -					double contextCnt = e.getCount();
 -					//increment expected count
 -					for(int tag=0;tag<K;tag++){
 -						for(int pos=0;pos<n_positions;pos++){
 -							exp_emit[tag][pos][context.get(pos)]+=q[edge][tag]*contextCnt;
 -						}
 -						exp_pi[phrase][tag]+=q[edge][tag]*contextCnt;
 -					}
 -				}
 -			} catch (InterruptedException e) {
 -				System.err.println("M-step thread interrupted. Probably fatal!");
 -				throw new RuntimeException(e);
 -			} catch (ExecutionException e) {
 -				System.err.println("M-step thread execution died. Probably fatal!");
 -				throw new RuntimeException(e);
 -			}
 -		}
 -		
 -		long end = System.currentTimeMillis();
 -		
 -		if (failures.get() > 0)
 -			System.out.println("WARNING: failed to converge in " + failures.get() + "/" + n_phrases + " cases");
 -		System.out.println("\tmean iters:     " + iterations/(double)n_phrases + " walltime " + (end-start)/1000.0 + " threads " + elapsed.get() / 1000.0);
 -		System.out.println("\tllh:            " + loglikelihood);
 -		System.out.println("\tKL:             " + kl);
 -		System.out.println("\tphrase l1lmax:  " + l1lmax);
 -		
 -		//M
 -		for(double [][]i:exp_emit)
 -			for(double []j:i)
 -				arr.F.l1normalize(j);
 -		emit=exp_emit;
 -		
 -		for(double []j:exp_pi)
 -			arr.F.l1normalize(j);
 -		pi=exp_pi;
 -		
 -		return primal;
 -	}
 -	
 -	public double PREM_phrase_context_constraints(double scalePT, double scaleCT)
 -	{	
 -		double[][][] exp_emit = new double [K][n_positions][n_words];
 -		double[][] exp_pi = new double[n_phrases][K];
 -
 -		//E step
 -		PhraseContextObjective pco = new PhraseContextObjective(this, lambdaPTCT, pool, scalePT, scaleCT);
 -		boolean ok = pco.optimizeWithProjectedGradientDescent();
 -		if (cacheLambda) lambdaPTCT = pco.getParameters();
 -
 -		//now extract expectations
 -		List<Corpus.Edge> edges = c.getEdges();
 -		for(int e = 0; e < edges.size(); ++e)
 -		{
 -			double [] q = pco.posterior(e);
 -			Corpus.Edge edge = edges.get(e);
 -
 -			TIntArrayList context = edge.getContext();
 -			double contextCnt = edge.getCount();
 -			//increment expected count
 -			for(int tag=0;tag<K;tag++)
 -			{
 -				for(int pos=0;pos<n_positions;pos++)
 -					exp_emit[tag][pos][context.get(pos)]+=q[tag]*contextCnt;
 -				exp_pi[edge.getPhraseId()][tag]+=q[tag]*contextCnt;
 -			}
 -		}
 -		
 -		System.out.println("\tllh:            " + pco.loglikelihood());
 -		System.out.println("\tKL:             " + pco.KL_divergence());
 -		System.out.println("\tphrase l1lmax:  " + pco.phrase_l1lmax());
 -		System.out.println("\tcontext l1lmax: " + pco.context_l1lmax());
 -		
 -		//M step
 -		for(double [][]i:exp_emit)
 -			for(double []j:i)
 -				arr.F.l1normalize(j);
 -		emit=exp_emit;
 -		
 -		for(double []j:exp_pi)
 -			arr.F.l1normalize(j);
 -		pi=exp_pi;
 -		
 -		return pco.primal();
 -	}	
 -		
 -	/**
 -	 * @param phrase index of phrase
 -	 * @param ctx array of context
 -	 * @return unnormalized posterior
 -	 */
 -	public double[] posterior(Corpus.Edge edge) 
 -	{
 -		double[] prob;
 -		
 -		if(edge.getTag()>=0){
 -			prob=new double[K];
 -			prob[edge.getTag()]=1;
 -			return prob;
 -		}
 -		
 -		if (edge.getPhraseId() < n_phrases)
 -			prob = Arrays.copyOf(pi[edge.getPhraseId()], K);
 -		else
 -		{
 -			prob = new double[K];
 -			Arrays.fill(prob, 1.0);
 -		}
 -		
 -		TIntArrayList ctx = edge.getContext();
 -		for(int tag=0;tag<K;tag++)
 -		{
 -			for(int c=0;c<n_positions;c++)
 -			{
 -				int word = ctx.get(c);
 -				if (!this.c.isSentinel(word) && word < n_words)
 -					prob[tag]*=emit[tag][c][word];
 -			}
 -		}
 -		
 -		return prob;
 -	}
 -	
 -	public void displayPosterior(PrintStream ps, List<Edge> testing)
 -	{	
 -		for (Edge edge : testing)
 -		{
 -			double probs[] = posterior(edge);
 -			arr.F.l1normalize(probs);
 -
 -			// emit phrase
 -			ps.print(edge.getPhraseString());
 -			ps.print("\t");
 -			ps.print(edge.getContextString(true));
 -			int t=arr.F.argmax(probs);
 -			ps.println(" ||| C=" + t + " T=" + edge.getCount() + " P=" + probs[t]);
 -			//ps.println("# probs " + Arrays.toString(probs));
 -		}
 -	}
 -	
 -	public void displayModelParam(PrintStream ps)
 -	{
 -		final double EPS = 1e-6;
 -		ps.println("phrases " + n_phrases + " tags " + K + " positions " + n_positions);
 -		
 -		for (int i = 0; i < n_phrases; ++i)
 -			for(int j=0;j<pi[i].length;j++)
 -				if (pi[i][j] > EPS)
 -					ps.println(i + " " + j + " " + pi[i][j]);
 -
 -		ps.println();
 -		for (int i = 0; i < K; ++i)
 -		{
 -			for(int position=0;position<n_positions;position++)
 -			{
 -				for(int word=0;word<emit[i][position].length;word++)
 -				{
 -					if (emit[i][position][word] > EPS)
 -						ps.println(i + " " + position + " " + word + " " + emit[i][position][word]);
 -				}
 -			}
 -		}
 -	}
 -	
 -	double phrase_l1lmax()
 -	{
 -		double sum=0;
 -		for(int phrase=0; phrase<n_phrases; phrase++)
 -		{
 -			double [] maxes = new double[K];
 -			for (Edge edge : c.getEdgesForPhrase(phrase))
 -			{
 -				double p[] = posterior(edge);
 -				arr.F.l1normalize(p);
 -				for(int tag=0;tag<K;tag++)
 -					maxes[tag] = Math.max(maxes[tag], p[tag]);
 -			}
 -			for(int tag=0;tag<K;tag++)
 -				sum += maxes[tag];
 -		}
 -		return sum;
 -	}
 -
 -	double context_l1lmax()
 -	{
 -		double sum=0;
 -		for(int context=0; context<n_contexts; context++)
 -		{
 -			double [] maxes = new double[K];
 -			for (Edge edge : c.getEdgesForContext(context))
 -			{
 -				double p[] = posterior(edge);
 -				arr.F.l1normalize(p);
 -				for(int tag=0;tag<K;tag++)
 -					maxes[tag] = Math.max(maxes[tag], p[tag]);
 -			}
 -			for(int tag=0;tag<K;tag++)
 -				sum += maxes[tag];
 -		}
 -		return sum;
 -	}
 -
 -	public void loadParameters(BufferedReader input) throws IOException
 -	{	
 -		final double EPS = 1e-50;
 -		
 -		// overwrite pi, emit with ~zeros
 -		for(double [][]i:emit)
 -			for(double []j:i)
 -				Arrays.fill(j, EPS);
 -
 -		for(double []j:pi)
 -			Arrays.fill(j, EPS);
 -
 -		String line = input.readLine();
 -		assert line != null;
 -
 -		Pattern space = Pattern.compile(" +");
 -		String[] parts = space.split(line);
 -		assert parts.length == 6;
 -
 -		assert parts[0].equals("phrases");
 -		int phrases = Integer.parseInt(parts[1]);
 -		int tags = Integer.parseInt(parts[3]);
 -		int positions = Integer.parseInt(parts[5]);
 -		
 -		assert phrases == n_phrases;
 -		assert tags == K;
 -		assert positions == n_positions;
 -
 -		// read in pi
 -		while ((line = input.readLine()) != null)
 -		{
 -			line = line.trim();
 -			if (line.isEmpty()) break;
 -			
 -			String[] tokens = space.split(line);
 -			assert tokens.length == 3;
 -			int p = Integer.parseInt(tokens[0]);
 -			int t = Integer.parseInt(tokens[1]);
 -			double v = Double.parseDouble(tokens[2]);
 -
 -			pi[p][t] = v;
 -		}
 -		
 -		// read in emissions
 -		while ((line = input.readLine()) != null)
 -		{
 -			String[] tokens = space.split(line);
 -			assert tokens.length == 4;
 -			int t = Integer.parseInt(tokens[0]);
 -			int p = Integer.parseInt(tokens[1]);
 -			int w = Integer.parseInt(tokens[2]);
 -			double v = Double.parseDouble(tokens[3]);
 -
 -			emit[t][p][w] = v;
 -		}
 -	}
 -}
 diff --git a/gi/posterior-regularisation/prjava/src/phrase/PhraseContextObjective.java b/gi/posterior-regularisation/prjava/src/phrase/PhraseContextObjective.java deleted file mode 100644 index 646ff392..00000000 --- a/gi/posterior-regularisation/prjava/src/phrase/PhraseContextObjective.java +++ /dev/null @@ -1,436 +0,0 @@ -package phrase;
 -
 -import java.util.ArrayList;
 -import java.util.Arrays;
 -import java.util.HashMap;
 -import java.util.List;
 -import java.util.Map;
 -import java.util.concurrent.ExecutionException;
 -import java.util.concurrent.ExecutorService;
 -import java.util.concurrent.Future;
 -
 -import optimization.gradientBasedMethods.ProjectedGradientDescent;
 -import optimization.gradientBasedMethods.ProjectedObjective;
 -import optimization.gradientBasedMethods.stats.OptimizerStats;
 -import optimization.linesearch.ArmijoLineSearchMinimizationAlongProjectionArc;
 -import optimization.linesearch.InterpolationPickFirstStep;
 -import optimization.linesearch.LineSearchMethod;
 -import optimization.projections.SimplexProjection;
 -import optimization.stopCriteria.CompositeStopingCriteria;
 -import optimization.stopCriteria.ProjectedGradientL2Norm;
 -import optimization.stopCriteria.StopingCriteria;
 -import optimization.stopCriteria.ValueDifference;
 -import optimization.util.MathUtils;
 -import phrase.Corpus.Edge;
 -
 -public class PhraseContextObjective extends ProjectedObjective
 -{
 -	private static final double GRAD_DIFF = 0.00002;
 -	private static double INIT_STEP_SIZE = 300;
 -	private static double VAL_DIFF = 1e-8;
 -	private static int ITERATIONS = 20;
 -	boolean debug = false;
 -	
 -	private PhraseCluster c;
 -	
 -	// un-regularized unnormalized posterior, p[edge][tag]
 -	// P(tag|edge) \propto P(tag|phrase)P(context|tag)
 -	private double p[][];
 -
 -	// regularized unnormalized posterior 
 -	// q[edge][tag] propto p[edge][tag]*exp(-lambda)
 -	private double q[][];
 -	private List<Corpus.Edge> data;
 -	
 -	// log likelihood under q
 -	private double loglikelihood;
 -	private SimplexProjection projectionPhrase;
 -	private SimplexProjection projectionContext;
 -	
 -	double[] newPoint;
 -	private int n_param;
 -	
 -	// likelihood under p
 -	public double llh;
 -	
 -	private static Map<Corpus.Edge, Integer> edgeIndex;
 -	
 -	private long projectionTime;
 -	private long objectiveTime;
 -	private long actualProjectionTime;
 -	private ExecutorService pool;
 -	
 -	double scalePT;
 -	double scaleCT;
 -	
 -	public PhraseContextObjective(PhraseCluster cluster, double[] startingParameters, ExecutorService pool,
 -			double scalePT, double scaleCT)
 -	{
 -		c=cluster;
 -		data=c.c.getEdges();
 -		n_param=data.size()*c.K*2;
 -		this.pool=pool;
 -		this.scalePT = scalePT;
 -		this.scaleCT = scaleCT;
 -		
 -		parameters = startingParameters;
 -		if (parameters == null)
 -			parameters = new double[n_param];
 -		
 -		System.out.println("Num parameters " + n_param);
 -		newPoint = new double[n_param];
 -		gradient = new double[n_param];
 -		initP();
 -		projectionPhrase = new SimplexProjection(scalePT);
 -		projectionContext = new SimplexProjection(scaleCT);
 -		q=new double [data.size()][c.K];
 -		
 -		if (edgeIndex == null) {
 -			edgeIndex = new HashMap<Edge, Integer>();
 -			for (int e=0; e<data.size(); e++)
 -			{
 -				edgeIndex.put(data.get(e), e);
 -				//if (debug) System.out.println("Edge " + data.get(e) + " index " + e);
 -			}
 -		}
 -		
 -		setParameters(parameters);
 -	}
 -
 -	private void initP(){
 -		p=new double[data.size()][];
 -		for(int edge=0;edge<data.size();edge++)
 -		{
 -			p[edge]=c.posterior(data.get(edge));
 -			llh += data.get(edge).getCount() * Math.log(arr.F.l1norm(p[edge]));
 -			arr.F.l1normalize(p[edge]);
 -		}
 -	}
 -	
 -	@Override
 -	public void setParameters(double[] params) {
 -		//System.out.println("setParameters " + Arrays.toString(parameters));
 -		// TODO: test if params have changed and skip update otherwise
 -		super.setParameters(params);
 -		updateFunction();
 -	}
 -	
 -	private void updateFunction()
 -	{
 -		updateCalls++;
 -		loglikelihood=0;
 -
 -		System.out.print(".");
 -		System.out.flush();
 -
 -		long begin = System.currentTimeMillis();
 -		for (int e=0; e<data.size(); e++) 
 -		{
 -			Edge edge = data.get(e);
 -			for(int tag=0; tag<c.K; tag++)
 -			{
 -				int ip = index(e, tag, true);
 -				int ic = index(e, tag, false);
 -				q[e][tag] = p[e][tag]*
 -					Math.exp((-parameters[ip]-parameters[ic]) / edge.getCount());
 -				//if (debug)
 -					//System.out.println("\tposterior " + edge + " with tag " + tag + " p " + p[e][tag] + " params " + parameters[ip] + " and " + parameters[ic] + " q " + q[e][tag]);
 -			}
 -		}
 -	
 -		for(int edge=0;edge<data.size();edge++) {
 -			loglikelihood+=data.get(edge).getCount() * Math.log(arr.F.l1norm(q[edge]));
 -			arr.F.l1normalize(q[edge]);
 -		}
 -		
 -		for (int e=0; e<data.size(); e++) 
 -		{
 -			for(int tag=0; tag<c.K; tag++)
 -			{
 -				int ip = index(e, tag, true);
 -				int ic = index(e, tag, false);
 -				gradient[ip]=-q[e][tag];
 -				gradient[ic]=-q[e][tag];
 -			}
 -		}
 -		//if (debug) {
 -			//System.out.println("objective " + loglikelihood + " ||gradient||_2: " + arr.F.l2norm(gradient));		
 -			//System.out.println("gradient " + Arrays.toString(gradient));
 -		//}
 -		objectiveTime += System.currentTimeMillis() - begin;
 -	}
 -	
 -	@Override
 -	public double[] projectPoint(double[] point) 
 -	{
 -		long begin = System.currentTimeMillis();
 -		List<Future<?>> tasks = new ArrayList<Future<?>>();
 -		
 -		System.out.print(",");
 -		System.out.flush();
 -
 -		Arrays.fill(newPoint, 0, newPoint.length, 0);
 -		
 -		// first project using the phrase-tag constraints,
 -		// for all p,t: sum_c lambda_ptc < scaleP 
 -		if (pool == null)
 -		{
 -			for (int p = 0; p < c.c.getNumPhrases(); ++p)
 -			{
 -				List<Edge> edges = c.c.getEdgesForPhrase(p);
 -				double[] toProject = new double[edges.size()];
 -				for(int tag=0;tag<c.K;tag++)
 -				{
 -					// FIXME: slow hash lookup for e (twice)
 -					for(int e=0; e<edges.size(); e++) 						
 -						toProject[e] = point[index(edges.get(e), tag, true)];
 -					long lbegin = System.currentTimeMillis();
 -					projectionPhrase.project(toProject);
 -					actualProjectionTime += System.currentTimeMillis() - lbegin;
 -					for(int e=0; e<edges.size(); e++)
 -						newPoint[index(edges.get(e), tag, true)] = toProject[e];
 -				}
 -			}
 -		}
 -		else // do above in parallel using thread pool
 -		{	
 -			for (int p = 0; p < c.c.getNumPhrases(); ++p)
 -			{
 -				final int phrase = p;
 -				final double[] inPoint = point;
 -				Runnable task = new Runnable()
 -				{
 -					public void run()
 -					{
 -						List<Edge> edges = c.c.getEdgesForPhrase(phrase);
 -						double toProject[] = new double[edges.size()];
 -						for(int tag=0;tag<c.K;tag++)
 -						{
 -							// FIXME: slow hash lookup for e
 -							for(int e=0; e<edges.size(); e++)
 -								toProject[e] = inPoint[index(edges.get(e), tag, true)];
 -							projectionPhrase.project(toProject);
 -							for(int e=0; e<edges.size(); e++)
 -								newPoint[index(edges.get(e), tag, true)] = toProject[e];
 -						}
 -					}		
 -				};
 -				tasks.add(pool.submit(task));
 -			}
 -		}
 -		//System.out.println("after PT " + Arrays.toString(newPoint));
 -	
 -		// now project using the context-tag constraints,
 -		// for all c,t: sum_p omega_pct < scaleC
 -		if (pool == null)
 -		{
 -			for (int ctx = 0; ctx < c.c.getNumContexts(); ++ctx)
 -			{
 -				List<Edge> edges = c.c.getEdgesForContext(ctx);
 -				double toProject[] = new double[edges.size()];
 -				for(int tag=0;tag<c.K;tag++)
 -				{
 -					// FIXME: slow hash lookup for e
 -					for(int e=0; e<edges.size(); e++)
 -						toProject[e] = point[index(edges.get(e), tag, false)];
 -					long lbegin = System.currentTimeMillis();
 -					projectionContext.project(toProject);
 -					actualProjectionTime += System.currentTimeMillis() - lbegin;
 -					for(int e=0; e<edges.size(); e++)
 -						newPoint[index(edges.get(e), tag, false)] = toProject[e];
 -				}
 -			}
 -		}
 -		else
 -		{
 -			// do above in parallel using thread pool
 -			for (int ctx = 0; ctx < c.c.getNumContexts(); ++ctx)
 -			{
 -				final int context = ctx;
 -				final double[] inPoint = point;
 -				Runnable task = new Runnable()
 -				{
 -					public void run()
 -					{
 -						List<Edge> edges = c.c.getEdgesForContext(context);
 -						double toProject[] = new double[edges.size()];
 -						for(int tag=0;tag<c.K;tag++)
 -						{
 -							// FIXME: slow hash lookup for e
 -							for(int e=0; e<edges.size(); e++)
 -								toProject[e] = inPoint[index(edges.get(e), tag, false)];
 -							projectionContext.project(toProject);
 -							for(int e=0; e<edges.size(); e++)
 -								newPoint[index(edges.get(e), tag, false)] = toProject[e];
 -						}
 -					}
 -				};
 -				tasks.add(pool.submit(task));
 -			}
 -		}
 -		
 -		if (pool != null)
 -		{
 -			// wait for all the jobs to complete
 -			Exception failure = null;
 -			for (Future<?> task: tasks)
 -			{
 -				try {
 -					task.get();
 -				} catch (InterruptedException e) {
 -					System.err.println("ERROR: Projection thread interrupted");
 -					e.printStackTrace();
 -					failure = e;
 -				} catch (ExecutionException e) {
 -					System.err.println("ERROR: Projection thread died");
 -					e.printStackTrace();
 -					failure = e;
 -				}
 -			}
 -			// rethrow the exception
 -			if (failure != null)
 -			{
 -				pool.shutdownNow();
 -				throw new RuntimeException(failure);
 -			}
 -		}
 -		
 -		double[] tmp = newPoint;
 -		newPoint = point;
 -		projectionTime += System.currentTimeMillis() - begin;
 -		
 -		//if (debug)
 -			//System.out.println("\t\treturning " + Arrays.toString(tmp));
 -		return tmp;
 -	}
 -	
 -	private int index(Edge edge, int tag, boolean phrase)
 -	{
 -		// NB if indexing changes must also change code in updateFunction and constructor
 -		if (phrase)
 -			return tag * edgeIndex.size() + edgeIndex.get(edge);
 -		else
 -			return (c.K + tag) * edgeIndex.size() + edgeIndex.get(edge);
 -	}
 -
 -	private int index(int e, int tag, boolean phrase)
 -	{
 -		// NB if indexing changes must also change code in updateFunction and constructor
 -		if (phrase)
 -			return tag * edgeIndex.size() + e;
 -		else
 -			return (c.K + tag) * edgeIndex.size() + e;
 -	}
 -	
 -	@Override
 -	public double[] getGradient() {
 -		gradientCalls++;
 -		return gradient;
 -	}
 -
 -	@Override
 -	public double getValue() {
 -		functionCalls++;
 -		return loglikelihood;
 -	}
 -
 -	@Override
 -	public String toString() {
 -		return "No need for pointless toString";
 -	}
 -
 -	public double []posterior(int edgeIndex){
 -		return q[edgeIndex];
 -	}
 -	
 -	public boolean optimizeWithProjectedGradientDescent()
 -	{
 -		projectionTime = 0;
 -		actualProjectionTime = 0;
 -		objectiveTime = 0;
 -		long start = System.currentTimeMillis();
 -
 -		LineSearchMethod ls =
 -			new ArmijoLineSearchMinimizationAlongProjectionArc
 -				(new InterpolationPickFirstStep(INIT_STEP_SIZE));
 -		//LineSearchMethod  ls = new WolfRuleLineSearch(
 -		//		(new InterpolationPickFirstStep(INIT_STEP_SIZE)), c1, c2);
 -		OptimizerStats stats = new OptimizerStats();
 -		
 -		
 -		ProjectedGradientDescent optimizer = new ProjectedGradientDescent(ls);
 -		StopingCriteria stopGrad = new ProjectedGradientL2Norm(GRAD_DIFF);
 -		StopingCriteria stopValue = new ValueDifference(VAL_DIFF*(-llh));
 -		CompositeStopingCriteria compositeStop = new CompositeStopingCriteria();
 -		compositeStop.add(stopGrad);
 -		compositeStop.add(stopValue);
 -		optimizer.setMaxIterations(ITERATIONS);
 -		updateFunction();
 -		boolean success = optimizer.optimize(this,stats,compositeStop);
 -
 -		System.out.println();
 -		System.out.println(stats.prettyPrint(1));
 -		
 -		if (success)
 -			System.out.print("\toptimization took " + optimizer.getCurrentIteration() + " iterations");
 -	 	else
 -			System.out.print("\toptimization failed to converge");
 -		long total = System.currentTimeMillis() - start;
 -		System.out.println(" and " + total + " ms: projection " + projectionTime + 
 -				" actual " + actualProjectionTime + " objective " + objectiveTime);
 -
 -		return success;
 -	}
 -	
 -	double loglikelihood()
 -	{
 -		return llh;
 -	}
 -	
 -	double KL_divergence()
 -	{
 -		return -loglikelihood + MathUtils.dotProduct(parameters, gradient);
 -	}
 -	
 -	double phrase_l1lmax()
 -	{
 -		// \sum_{tag,phrase} max_{context} P(tag|context,phrase)
 -		double sum=0;
 -		for (int p = 0; p < c.c.getNumPhrases(); ++p)
 -		{
 -			List<Edge> edges = c.c.getEdgesForPhrase(p);
 -			for(int tag=0;tag<c.K;tag++)
 -			{
 -				double max=0;
 -				for (Edge edge: edges)
 -					max = Math.max(max, q[edgeIndex.get(edge)][tag]);
 -				sum+=max;
 -			}	
 -		}
 -		return sum;
 -	}
 -	
 -	double context_l1lmax()
 -	{
 -		// \sum_{tag,context} max_{phrase} P(tag|context,phrase)
 -		double sum=0;
 -		for (int ctx = 0; ctx < c.c.getNumContexts(); ++ctx)
 -		{
 -			List<Edge> edges = c.c.getEdgesForContext(ctx);
 -			for(int tag=0; tag<c.K; tag++)
 -			{
 -				double max=0;
 -				for (Edge edge: edges)
 -					max = Math.max(max, q[edgeIndex.get(edge)][tag]);
 -				sum+=max;
 -			}	
 -		}
 -		return sum;
 -	}
 -	
 -	// L - KL(q||p) - scalePT * l1lmax_phrase - scaleCT * l1lmax_context
 -	public double primal()
 -	{
 -		return loglikelihood() - KL_divergence() - scalePT * phrase_l1lmax() - scaleCT * context_l1lmax();
 -	}
 -}
\ No newline at end of file diff --git a/gi/posterior-regularisation/prjava/src/phrase/PhraseCorpus.java b/gi/posterior-regularisation/prjava/src/phrase/PhraseCorpus.java deleted file mode 100644 index 0cf31c1c..00000000 --- a/gi/posterior-regularisation/prjava/src/phrase/PhraseCorpus.java +++ /dev/null @@ -1,193 +0,0 @@ -package phrase;
 -
 -import io.FileUtil;
 -
 -import java.io.BufferedInputStream;
 -import java.io.BufferedReader;
 -import java.io.File;
 -import java.io.FileNotFoundException;
 -import java.io.IOException;
 -import java.io.PrintStream;
 -import java.util.ArrayList;
 -import java.util.HashMap;
 -import java.util.Scanner;
 -
 -public class PhraseCorpus 
 -{
 -	public HashMap<String,Integer>wordLex;
 -	public HashMap<String,Integer>phraseLex;
 -	
 -	public String wordList[];
 -	public String phraseList[];
 -	
 -	//data[phrase][num context][position]
 -	public int data[][][];
 -	public int numContexts;	
 -
 -	public PhraseCorpus(String filename) throws FileNotFoundException, IOException
 -	{
 -		BufferedReader r = FileUtil.reader(new File(filename));
 -		
 -		phraseLex=new HashMap<String,Integer>();
 -		wordLex=new HashMap<String,Integer>();
 -		
 -		ArrayList<int[][]>dataList=new ArrayList<int[][]>();
 -		String line=null;
 -		numContexts = 0;
 -		
 -		while((line=readLine(r))!=null){
 -			
 -			String toks[]=line.split("\t");
 -			String phrase=toks[0];
 -			addLex(phrase,phraseLex);
 -			
 -			toks=toks[1].split(" \\|\\|\\| ");
 -			
 -			ArrayList <int[]>ctxList=new ArrayList<int[]>();
 -			
 -			for(int i=0;i<toks.length;i+=2){
 -				String ctx=toks[i];
 -				String words[]=ctx.split(" ");
 -				if (numContexts == 0)
 -					numContexts = words.length - 1;
 -				else
 -					assert numContexts == words.length - 1;
 -				
 -				int []context=new int [numContexts+1];
 -				int idx=0;
 -				for(String word:words){
 -					if(word.equals("<PHRASE>")){
 -						continue;
 -					}
 -					addLex(word,wordLex);
 -					context[idx]=wordLex.get(word);
 -					idx++;
 -				}
 -				
 -				String count=toks[i+1];
 -				context[idx]=Integer.parseInt(count.trim().substring(2));
 -				
 -				ctxList.add(context);
 -			}
 -			
 -			dataList.add(ctxList.toArray(new int [0][]));
 -			
 -		}
 -		try{
 -			r.close();
 -		}catch(IOException ioe){
 -			ioe.printStackTrace();
 -		}
 -		data=dataList.toArray(new int[0][][]);
 -	}
 -
 -	private void addLex(String key, HashMap<String,Integer>lex){
 -		Integer i=lex.get(key);
 -		if(i==null){
 -			lex.put(key, lex.size());
 -		}
 -	}
 -	
 -	//for debugging
 -	public void saveLex(String lexFilename) throws FileNotFoundException, IOException
 -	{
 -		PrintStream ps = FileUtil.printstream(new File(lexFilename));
 -		ps.println("Phrase Lexicon");
 -		ps.println(phraseLex.size());
 -		printDict(phraseLex,ps);
 -		
 -		ps.println("Word Lexicon");
 -		ps.println(wordLex.size());
 -		printDict(wordLex,ps);
 -		ps.close();
 -	}
 -	
 -	private static void printDict(HashMap<String,Integer>lex,PrintStream ps){
 -		String []dict=buildList(lex);
 -		for(int i=0;i<dict.length;i++){
 -			ps.println(dict[i]);
 -		}
 -	}
 -	
 -	public void loadLex(String lexFilename){
 -		Scanner sc=io.FileUtil.openInFile(lexFilename);
 -		
 -		sc.nextLine();
 -		int size=sc.nextInt();
 -		sc.nextLine();
 -		String[]dict=new String[size];
 -		for(int i=0;i<size;i++){
 -			dict[i]=sc.nextLine();
 -		}
 -		phraseLex=buildMap(dict);
 -
 -		sc.nextLine();
 -		size=sc.nextInt();
 -		sc.nextLine();
 -		dict=new String[size];
 -		for(int i=0;i<size;i++){
 -			dict[i]=sc.nextLine();
 -		}
 -		wordLex=buildMap(dict);
 -		sc.close();
 -	}
 -	
 -	private HashMap<String, Integer> buildMap(String[]dict){
 -		HashMap<String,Integer> map=new HashMap<String,Integer>();
 -		for(int i=0;i<dict.length;i++){
 -			map.put(dict[i], i);
 -		}
 -		return map;
 -	}
 -	
 -	public void buildList(){
 -		if(wordList==null){
 -			wordList=buildList(wordLex);
 -			phraseList=buildList(phraseLex);
 -		}
 -	}
 -	
 -	private static String[]buildList(HashMap<String,Integer>lex){
 -		String dict[]=new String [lex.size()];
 -		for(String key:lex.keySet()){
 -			dict[lex.get(key)]=key;
 -		}
 -		return dict;
 -	}
 -	
 -	public String getContextString(int context[], boolean addPhraseMarker)
 -	{
 -		StringBuffer b = new StringBuffer();
 -		for (int i=0;i<context.length-1;i++)
 -		{
 -			if (b.length() > 0)
 -				b.append(" ");
 -
 -			if (i == context.length/2)
 -				b.append("<PHRASE> ");
 -			
 -			b.append(wordList[context[i]]);
 -		}
 -		return b.toString();
 -	}
 -	
 -	public static String readLine(BufferedReader r){
 -		try{
 -			return r.readLine();
 -		}
 -		catch(IOException ioe){
 -			ioe.printStackTrace();
 -		}
 -		return null;
 -	}
 -
 -	public static void main(String[] args) throws Exception 
 -	{
 -		String LEX_FILENAME="../pdata/lex.out";
 -		String DATA_FILENAME="../pdata/btec.con";
 -		PhraseCorpus c=new PhraseCorpus(DATA_FILENAME);
 -		c.saveLex(LEX_FILENAME);
 -		c.loadLex(LEX_FILENAME);
 -		c.saveLex(LEX_FILENAME);
 -	}
 -}
 diff --git a/gi/posterior-regularisation/prjava/src/phrase/PhraseObjective.java b/gi/posterior-regularisation/prjava/src/phrase/PhraseObjective.java deleted file mode 100644 index ac73a075..00000000 --- a/gi/posterior-regularisation/prjava/src/phrase/PhraseObjective.java +++ /dev/null @@ -1,224 +0,0 @@ -package phrase;
 -
 -import java.util.Arrays;
 -import java.util.List;
 -
 -import optimization.gradientBasedMethods.ProjectedGradientDescent;
 -import optimization.gradientBasedMethods.ProjectedObjective;
 -import optimization.gradientBasedMethods.stats.OptimizerStats;
 -import optimization.linesearch.ArmijoLineSearchMinimizationAlongProjectionArc;
 -import optimization.linesearch.InterpolationPickFirstStep;
 -import optimization.linesearch.LineSearchMethod;
 -import optimization.linesearch.WolfRuleLineSearch;
 -import optimization.projections.SimplexProjection;
 -import optimization.stopCriteria.CompositeStopingCriteria;
 -import optimization.stopCriteria.ProjectedGradientL2Norm;
 -import optimization.stopCriteria.StopingCriteria;
 -import optimization.stopCriteria.ValueDifference;
 -import optimization.util.MathUtils;
 -
 -public class PhraseObjective extends ProjectedObjective
 -{
 -	static final double GRAD_DIFF = 0.00002;
 -	static double INIT_STEP_SIZE = 300;
 -	static double VAL_DIFF = 1e-8; // tuned to BTEC subsample
 -	static int ITERATIONS = 100;
 -	private PhraseCluster c;
 -	
 -	/**@brief
 -	 *  for debugging purposes
 -	 */
 -	//public static PrintStream ps;
 -	
 -	/**@brief current phrase being optimzed*/
 -	public int phrase;
 -
 -	/**@brief un-regularized posterior
 -	 * unnormalized
 -	 * p[edge][tag]
 -	*  P(tag|edge) \propto P(tag|phrase)P(context|tag)
 -	 */
 -	private double[][]p;
 -
 -	/**@brief regularized posterior
 -	 * q[edge][tag] propto p[edge][tag]*exp(-lambda)
 -	 */
 -	private double q[][];
 -	private List<Corpus.Edge> data;
 -	
 -	/**@brief log likelihood of the associated phrase
 -	 * 
 -	 */
 -	private double loglikelihood;
 -	private SimplexProjection projection;
 -	
 -	double[] newPoint  ;
 -	
 -	private int n_param;
 -	
 -	/**@brief likelihood under p
 -	 * 
 -	 */
 -	public double llh;
 -	
 -	public PhraseObjective(PhraseCluster cluster, int phraseIdx, double scale, double[] lambda){
 -		phrase=phraseIdx;
 -		c=cluster;
 -		data=c.c.getEdgesForPhrase(phrase);
 -		n_param=data.size()*c.K;
 -		//System.out.println("Num parameters " + n_param + " for phrase #" + phraseIdx);
 -		
 -		if (lambda==null) 
 -			lambda=new double[n_param];
 -		
 -		parameters = lambda;
 -		newPoint = new double[n_param];
 -		gradient = new double[n_param];
 -		initP();
 -		projection=new SimplexProjection(scale);
 -		q=new double [data.size()][c.K];
 -
 -		setParameters(parameters);
 -	}
 -
 -	private void initP(){
 -		p=new double[data.size()][];
 -		for(int edge=0;edge<data.size();edge++){
 -			p[edge]=c.posterior(data.get(edge));
 -			llh += data.get(edge).getCount() * Math.log(arr.F.l1norm(p[edge])); // Was bug here - count inside log!
 -			arr.F.l1normalize(p[edge]);
 -		}
 -	}
 -	
 -	@Override
 -	public void setParameters(double[] params) {
 -		super.setParameters(params);
 -		updateFunction();
 -	}
 -	
 -	private void updateFunction(){
 -		updateCalls++;
 -		loglikelihood=0;
 -
 -		for(int tag=0;tag<c.K;tag++){
 -			for(int edge=0;edge<data.size();edge++){
 -				q[edge][tag]=p[edge][tag]*
 -					Math.exp(-parameters[tag*data.size()+edge]/data.get(edge).getCount());
 -			}
 -		}
 -	
 -		for(int edge=0;edge<data.size();edge++){
 -			loglikelihood+=data.get(edge).getCount() * Math.log(arr.F.l1norm(q[edge]));
 -			arr.F.l1normalize(q[edge]);
 -		}
 -		
 -		for(int tag=0;tag<c.K;tag++){
 -			for(int edge=0;edge<data.size();edge++){
 -				gradient[tag*data.size()+edge]=-q[edge][tag];
 -			}
 -		}
 -	}
 -	
 -	@Override
 -	public double[] projectPoint(double[] point) 
 -	{
 -		double toProject[]=new double[data.size()];
 -		for(int tag=0;tag<c.K;tag++){
 -			for(int edge=0;edge<data.size();edge++){
 -				toProject[edge]=point[tag*data.size()+edge];
 -			}
 -			projection.project(toProject);
 -			for(int edge=0;edge<data.size();edge++){
 -				newPoint[tag*data.size()+edge]=toProject[edge];
 -			}
 -		}
 -		return newPoint;
 -	}
 -
 -	@Override
 -	public double[] getGradient() {
 -		gradientCalls++;
 -		return gradient;
 -	}
 -
 -	@Override
 -	public double getValue() {
 -		functionCalls++;
 -		return loglikelihood;
 -	}
 -
 -	@Override
 -	public String toString() {
 -		return Arrays.toString(parameters);
 -	}
 -
 -	public double [][]posterior(){
 -		return q;
 -	}
 -	
 -	long optimizationTime;
 -	
 -	public boolean optimizeWithProjectedGradientDescent(){
 -		long start = System.currentTimeMillis();
 -		
 -		LineSearchMethod ls =
 -			new ArmijoLineSearchMinimizationAlongProjectionArc
 -				(new InterpolationPickFirstStep(INIT_STEP_SIZE));
 -		//LineSearchMethod  ls = new WolfRuleLineSearch(
 -		//		(new InterpolationPickFirstStep(INIT_STEP_SIZE)), c1, c2);
 -		OptimizerStats stats = new OptimizerStats();
 -		
 -		
 -		ProjectedGradientDescent optimizer = new ProjectedGradientDescent(ls);
 -		StopingCriteria stopGrad = new ProjectedGradientL2Norm(GRAD_DIFF);
 -		StopingCriteria stopValue = new ValueDifference(VAL_DIFF*(-llh));
 -		CompositeStopingCriteria compositeStop = new CompositeStopingCriteria();
 -		compositeStop.add(stopGrad);
 -		compositeStop.add(stopValue);
 -		optimizer.setMaxIterations(ITERATIONS);
 -		updateFunction();
 -		boolean success = optimizer.optimize(this,stats,compositeStop);
 -		//System.out.println("Ended optimzation Projected Gradient Descent\n" + stats.prettyPrint(1));
 -		//if(succed){
 -			//System.out.println("Ended optimization in " + optimizer.getCurrentIteration());
 -		//}else{
 -//			System.out.println("Failed to optimize");
 -		//}
 -		//System.out.println(Arrays.toString(parameters));
 -		
 -		//	for(int edge=0;edge<data.getSize();edge++){
 -		//	ps.println(Arrays.toString(q[edge]));
 -		//	}
 -
 -		return success;
 -	}
 -	
 -	public double KL_divergence()
 -	{
 -		return -loglikelihood + MathUtils.dotProduct(parameters, gradient);
 -	}
 -	
 -	public double loglikelihood()
 -	{
 -		return llh;
 -	}
 -	
 -	public double l1lmax()
 -	{
 -		double sum=0;
 -		for(int tag=0;tag<c.K;tag++){
 -			double max=0;
 -			for(int edge=0;edge<data.size();edge++){
 -				if(q[edge][tag]>max)
 -					max=q[edge][tag];
 -			}
 -			sum+=max;
 -		}
 -		return sum;
 -	}
 -
 -	public double primal(double scale)
 -	{
 -		return loglikelihood() - KL_divergence() - scale * l1lmax();	
 -	}
 -}
 diff --git a/gi/posterior-regularisation/prjava/src/phrase/Trainer.java b/gi/posterior-regularisation/prjava/src/phrase/Trainer.java deleted file mode 100644 index 6f302b20..00000000 --- a/gi/posterior-regularisation/prjava/src/phrase/Trainer.java +++ /dev/null @@ -1,257 +0,0 @@ -package phrase; - -import io.FileUtil; -import joptsimple.OptionParser; -import joptsimple.OptionSet; -import java.io.File; -import java.io.IOException; -import java.io.PrintStream; -import java.util.List; -import java.util.Random; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; - -import phrase.Corpus.Edge; - -import arr.F; - -public class Trainer  -{ -	public static void main(String[] args)  -	{ -        OptionParser parser = new OptionParser(); -        parser.accepts("help"); -        parser.accepts("in").withRequiredArg().ofType(File.class); -        parser.accepts("in1").withRequiredArg().ofType(File.class); -        parser.accepts("test").withRequiredArg().ofType(File.class); -        parser.accepts("out").withRequiredArg().ofType(File.class); -        parser.accepts("start").withRequiredArg().ofType(File.class); -        parser.accepts("parameters").withRequiredArg().ofType(File.class); -        parser.accepts("topics").withRequiredArg().ofType(Integer.class).defaultsTo(5); -        parser.accepts("iterations").withRequiredArg().ofType(Integer.class).defaultsTo(10); -        parser.accepts("threads").withRequiredArg().ofType(Integer.class).defaultsTo(0); -        parser.accepts("scale-phrase").withRequiredArg().ofType(Double.class).defaultsTo(0.0); -        parser.accepts("scale-context").withRequiredArg().ofType(Double.class).defaultsTo(0.0); -        parser.accepts("seed").withRequiredArg().ofType(Long.class).defaultsTo(0l); -        parser.accepts("convergence-threshold").withRequiredArg().ofType(Double.class).defaultsTo(1e-6); -        parser.accepts("variational-bayes"); -        parser.accepts("alpha-emit").withRequiredArg().ofType(Double.class).defaultsTo(0.1); -        parser.accepts("alpha-pi").withRequiredArg().ofType(Double.class).defaultsTo(0.0001); -        parser.accepts("agree-direction"); -        parser.accepts("agree-language"); -        parser.accepts("no-parameter-cache"); -        parser.accepts("skip-large-phrases").withRequiredArg().ofType(Integer.class).defaultsTo(5); -        OptionSet options = parser.parse(args); - -        if (options.has("help") || !options.has("in")) -        { -        	try { -				parser.printHelpOn(System.err); -			} catch (IOException e) { -				System.err.println("This should never happen."); -				e.printStackTrace(); -			} -        	System.exit(1);      -        } -		 -		int tags = (Integer) options.valueOf("topics"); -		int iterations = (Integer) options.valueOf("iterations"); -		double scale_phrase = (Double) options.valueOf("scale-phrase"); -		double scale_context = (Double) options.valueOf("scale-context"); -		int threads = (Integer) options.valueOf("threads"); -		double threshold = (Double) options.valueOf("convergence-threshold"); -		boolean vb = options.has("variational-bayes"); -		double alphaEmit = (vb) ? (Double) options.valueOf("alpha-emit") : 0; -		double alphaPi = (vb) ? (Double) options.valueOf("alpha-pi") : 0; -		int skip = (Integer) options.valueOf("skip-large-phrases"); -		 -		if (options.has("seed")) -			F.rng = new Random((Long) options.valueOf("seed")); -		 -		ExecutorService threadPool = null; -		if (threads > 0) -			threadPool = Executors.newFixedThreadPool(threads);			 -		 -		if (tags <= 1 || scale_phrase < 0 || scale_context < 0 || threshold < 0) -		{ -			System.err.println("Invalid arguments. Try again!"); -			System.exit(1); -		} -		 -		Corpus corpus = null; -		File infile = (File) options.valueOf("in"); -		Corpus corpus1 = null; -		File infile1 = (File) options.valueOf("in1"); -		try { -			System.out.println("Reading concordance from " + infile); -			corpus = Corpus.readFromFile(FileUtil.reader(infile)); -			corpus.printStats(System.out); -			if(options.has("in1")){ -				corpus1 = Corpus.readFromFile(FileUtil.reader(infile1)); -				corpus1.printStats(System.out); -			} -		} catch (IOException e) { -			System.err.println("Failed to open input file: " + infile); -			e.printStackTrace(); -			System.exit(1); -		} -				 -		if (!(options.has("agree-direction")||options.has("agree-language"))) -			System.out.println("Running with " + tags + " tags " + -					"for " + iterations + " iterations " + -					((skip > 0) ? "skipping large phrases for first " + skip + " iterations " : "") + -					"with scale " + scale_phrase + " phrase and " + scale_context + " context " + -					"and " + threads + " threads"); -		else -			System.out.println("Running agreement model with " + tags + " tags " + -	 				"for " + iterations); - -	 	System.out.println(); -		 - 		PhraseCluster cluster = null; - 		Agree2Sides agree2sides = null; - 		Agree agree= null; - 		VB vbModel=null; - 		if (options.has("agree-language")) - 			agree2sides = new Agree2Sides(tags, corpus,corpus1); - 		else if (options.has("agree-direction")) - 			agree = new Agree(tags, corpus); - 		else - 		{ - 			if (vb)	 - 			{ - 				vbModel=new VB(tags,corpus); - 				vbModel.alpha=alphaPi; - 				vbModel.lambda=alphaEmit; - 	 			if (threadPool != null) vbModel.useThreadPool(threadPool); - 			}  - 			else  - 			{ - 				cluster = new PhraseCluster(tags, corpus); - 	 			if (threadPool != null) cluster.useThreadPool(threadPool); - 				 -	 			if (options.has("no-parameter-cache"))  -	 				cluster.cacheLambda = false; -	 			if (options.has("start")) -	 			{ -	 				try { -						System.err.println("Reading starting parameters from " + options.valueOf("start")); -						cluster.loadParameters(FileUtil.reader((File)options.valueOf("start"))); -					} catch (IOException e) { -						System.err.println("Failed to open input file: " + options.valueOf("start")); -						e.printStackTrace(); -					} -	 			} - 			} - 		} -				 -		double last = 0; -		for (int i=0; i < iterations; i++) -		{ -			double o; -			if (agree != null) -				o = agree.EM(); -			else if(agree2sides!=null) -				o = agree2sides.EM(); -			else -			{ -				if (i < skip) -					System.out.println("Skipping phrases of length > " + (i+1)); -				 -				if (scale_phrase <= 0 && scale_context <= 0) -				{ -					if (!vb) -						o = cluster.EM((i < skip) ? i+1 : 0); -					else -						o = vbModel.EM();	 -				} -				else -					o = cluster.PREM(scale_phrase, scale_context, (i < skip) ? i+1 : 0); -			} -			 -			System.out.println("ITER: "+i+" objective: " + o); -			 -			// sometimes takes a few iterations to break the ties -			if (i > 5 && Math.abs((o - last) / o) < threshold) -			{ -				last = o; -				break; -			} -			last = o; -		} -		 -		double pl1lmax = 0, cl1lmax = 0; -		if (cluster != null) -		{ -			pl1lmax = cluster.phrase_l1lmax(); -			cl1lmax = cluster.context_l1lmax(); -		} -		else if (agree != null) -		{ -			// fairly arbitrary choice of model1 cf model2 -			pl1lmax = agree.model1.phrase_l1lmax(); -			cl1lmax = agree.model1.context_l1lmax(); -		} -		else if (agree2sides != null) -		{ -			// fairly arbitrary choice of model1 cf model2 -			pl1lmax = agree2sides.model1.phrase_l1lmax(); -			cl1lmax = agree2sides.model1.context_l1lmax(); -		} - -		System.out.println("\nFinal posterior phrase l1lmax " + pl1lmax + " context l1lmax " + cl1lmax); -		 -		if (options.has("out")) -		{ -			File outfile = (File) options.valueOf("out"); -			try { -				PrintStream ps = FileUtil.printstream(outfile); -				List<Edge> test; -				if (!options.has("test")) // just use the training -					test = corpus.getEdges(); -				else -				{	// if --test supplied, load up the file -					infile = (File) options.valueOf("test"); -					System.out.println("Reading testing concordance from " + infile); -					test = corpus.readEdges(FileUtil.reader(infile)); -				} -				if(vb) { -					assert !options.has("test"); -					vbModel.displayPosterior(ps); -				} else if (cluster != null)  -					cluster.displayPosterior(ps, test); -				else if (agree != null)  -					agree.displayPosterior(ps, test); -				else if (agree2sides != null) { -					assert !options.has("test"); -					agree2sides.displayPosterior(ps); -				} -				 -				ps.close(); -			} catch (IOException e) { -				System.err.println("Failed to open either testing file or output file"); -				e.printStackTrace(); -				System.exit(1); -			} -		} - -		if (options.has("parameters")) -		{ -			assert !vb; -			File outfile = (File) options.valueOf("parameters"); -			PrintStream ps; -			try { -				ps = FileUtil.printstream(outfile); -				cluster.displayModelParam(ps); -				ps.close(); -			} catch (IOException e) { -				System.err.println("Failed to open output parameters file: " + outfile); -				e.printStackTrace(); -				System.exit(1); -			} -		} -		 -		if (cluster != null && cluster.pool != null) -			cluster.pool.shutdown(); -	} -} diff --git a/gi/posterior-regularisation/prjava/src/phrase/VB.java b/gi/posterior-regularisation/prjava/src/phrase/VB.java deleted file mode 100644 index cd3f4966..00000000 --- a/gi/posterior-regularisation/prjava/src/phrase/VB.java +++ /dev/null @@ -1,419 +0,0 @@ -package phrase;
 -
 -import gnu.trove.TIntArrayList;
 -
 -import io.FileUtil;
 -
 -import java.io.File;
 -import java.io.IOException;
 -import java.io.PrintStream;
 -import java.util.ArrayList;
 -import java.util.Arrays;
 -import java.util.List;
 -import java.util.concurrent.Callable;
 -import java.util.concurrent.ExecutionException;
 -import java.util.concurrent.ExecutorService;
 -import java.util.concurrent.Future;
 -
 -import org.apache.commons.math.special.Gamma;
 -
 -import phrase.Corpus.Edge;
 -
 -public class VB {
 -
 -	public static int MAX_ITER=400;
 -	
 -	/**@brief
 -	 * hyper param for beta
 -	 * where beta is multinomial
 -	 * for generating words from a topic
 -	 */
 -	public double lambda=0.1;
 -	/**@brief
 -	 * hyper param for theta
 -	 * where theta is dirichlet for z
 -	 */
 -	public double alpha=0.0001;
 -	/**@brief
 -	 * variational param for beta
 -	 */
 -	private double rho[][][];
 -	private double digamma_rho[][][];
 -	private double rho_sum[][];
 -	/**@brief
 -	 * variational param for z
 -	 */
 -	//private double phi[][];
 -	/**@brief
 -	 * variational param for theta
 -	 */
 -	private double gamma[];
 -	private static double VAL_DIFF_RATIO=0.005;
 -	
 -	private int n_positions;
 -	private int n_words;
 -	private int K;
 -	private ExecutorService pool;
 -	
 -	private Corpus c;
 -	public static void main(String[] args) {
 -	//	String in="../pdata/canned.con";
 -		String in="../pdata/btec.con";
 -		String out="../pdata/vb.out";
 -		int numCluster=25;
 -		Corpus corpus = null;
 -		File infile = new File(in);
 -		try {
 -			System.out.println("Reading concordance from " + infile);
 -			corpus = Corpus.readFromFile(FileUtil.reader(infile));
 -			corpus.printStats(System.out);
 -		} catch (IOException e) {
 -			System.err.println("Failed to open input file: " + infile);
 -			e.printStackTrace();
 -			System.exit(1);
 -		}
 -		
 -		VB vb=new VB(numCluster, corpus);
 -		int iter=20;
 -		for(int i=0;i<iter;i++){
 -			double obj=vb.EM();
 -			System.out.println("Iter "+i+": "+obj);
 -		}
 -		
 -		File outfile = new File (out);
 -		try {
 -			PrintStream ps = FileUtil.printstream(outfile);
 -			vb.displayPosterior(ps);
 -		//	ps.println();
 -		//	c2f.displayModelParam(ps);
 -			ps.close();
 -		} catch (IOException e) {
 -			System.err.println("Failed to open output file: " + outfile);
 -			e.printStackTrace();
 -			System.exit(1);
 -		}
 -	}
 -
 -	public VB(int numCluster, Corpus corpus){
 -		c=corpus;
 -		K=numCluster;
 -		n_positions=c.getNumContextPositions();
 -		n_words=c.getNumWords();
 -		rho=new double[K][n_positions][n_words];
 -		//to init rho
 -		//loop through data and count up words
 -		double[] phi_tmp=new double[K];
 -		for(int i=0;i<K;i++){
 -			for(int pos=0;pos<n_positions;pos++){
 -				Arrays.fill(rho[i][pos], lambda);
 -			}
 -		}
 -		for(int d=0;d<c.getNumPhrases();d++){
 -			List<Edge>doc=c.getEdgesForPhrase(d);
 -			for(int n=0;n<doc.size();n++){
 -				TIntArrayList context=doc.get(n).getContext();
 -				arr.F.randomise(phi_tmp);
 -				for(int i=0;i<K;i++){
 -					for(int pos=0;pos<n_positions;pos++){
 -						rho[i][pos][context.get(pos)]+=phi_tmp[i];
 -					}
 -				}
 -			}
 -		}
 -		
 -	}
 -	
 -	private double inference(int phraseID, double[][] phi, double[] gamma)
 -	{
 -		List<Edge > doc=c.getEdgesForPhrase(phraseID);
 -		for(int i=0;i<phi.length;i++){
 -			for(int j=0;j<phi[i].length;j++){
 -				phi[i][j]=1.0/K;
 -			}
 -		}
 -		Arrays.fill(gamma,alpha+1.0/K);
 -		
 -		double digamma_gamma[]=new double[K];
 -		
 -		double gamma_sum=digamma(arr.F.l1norm(gamma));
 -		for(int i=0;i<K;i++){
 -			digamma_gamma[i]=digamma(gamma[i]);
 -		}
 -		double gammaSum[]=new double [K];
 -		double prev_val=0;
 -		double obj=0;
 -		
 -		for(int iter=0;iter<MAX_ITER;iter++){
 -			prev_val=obj;
 -			obj=0;
 -			Arrays.fill(gammaSum,0.0);
 -			for(int n=0;n<doc.size();n++){
 -				TIntArrayList context=doc.get(n).getContext();
 -				double phisum=0;
 -				for(int i=0;i<K;i++){
 -					double sum=0;
 -					for(int pos=0;pos<n_positions;pos++){
 -						int word=context.get(pos);
 -						sum+=digamma_rho[i][pos][word]-rho_sum[i][pos];
 -					}
 -					sum+= digamma_gamma[i]-gamma_sum;
 -					phi[n][i]=sum;
 -					
 -					if (i > 0){
 -	                    phisum = log_sum(phisum, phi[n][i]);
 -					}
 -	                else{
 -	                    phisum = phi[n][i];
 -	                }
 -					
 -				}//end of  a word
 -				
 -				for(int i=0;i<K;i++){
 -					phi[n][i]=Math.exp(phi[n][i]-phisum);
 -					gammaSum[i]+=phi[n][i];
 -				}
 -				
 -			}//end of doc
 -			
 -			for(int i=0;i<K;i++){
 -				gamma[i]=alpha+gammaSum[i];
 -			}
 -			gamma_sum=digamma(arr.F.l1norm(gamma));
 -			for(int i=0;i<K;i++){
 -				digamma_gamma[i]=digamma(gamma[i]);
 -			}
 -			//compute objective for reporting
 -
 -			obj=0;
 -			
 -			for(int i=0;i<K;i++){
 -				obj+=(alpha-1)*(digamma_gamma[i]-gamma_sum);
 -			}
 -			
 -			
 -			for(int n=0;n<doc.size();n++){
 -				TIntArrayList context=doc.get(n).getContext();
 -				
 -				for(int i=0;i<K;i++){
 -					//entropy of phi + expected log likelihood of z
 -					obj+=phi[n][i]*(digamma_gamma[i]-gamma_sum);
 -					
 -					if(phi[n][i]>1e-10){
 -						obj+=phi[n][i]*Math.log(phi[n][i]);
 -					}
 -					
 -					double beta_sum=0;
 -					for(int pos=0;pos<n_positions;pos++){
 -						int word=context.get(pos);
 -						beta_sum+=(digamma(rho[i][pos][word])-rho_sum[i][pos]);
 -					}
 -					obj+=phi[n][i]*beta_sum;
 -				}
 -			}
 -			
 -			obj-=log_gamma(arr.F.l1norm(gamma));
 -			for(int i=0;i<K;i++){
 -				obj+=Gamma.logGamma(gamma[i]);
 -				obj-=(gamma[i]-1)*(digamma_gamma[i]-gamma_sum);
 -			}
 -			
 -//			System.out.println(phraseID+": "+obj);
 -			if(iter>0 && (obj-prev_val)/Math.abs(obj)<VAL_DIFF_RATIO){
 -				break;
 -			}
 -		}//end of inference loop
 -		
 -		return obj;
 -	}//end of inference
 -	
 -	/**
 -	 * @return objective of this iteration
 -	 */
 -	public double EM(){
 -		double emObj=0;
 -		if(digamma_rho==null){
 -			digamma_rho=new double[K][n_positions][n_words];
 -		}
 -		for(int i=0;i<K;i++){
 -			for (int pos=0;pos<n_positions;pos++){
 -				for(int j=0;j<n_words;j++){
 -					digamma_rho[i][pos][j]= digamma(rho[i][pos][j]);
 -				}
 -			}
 -		}
 -		
 -		if(rho_sum==null){
 -			rho_sum=new double [K][n_positions];
 -		}
 -		for(int i=0;i<K;i++){
 -			for(int pos=0;pos<n_positions;pos++){
 -				rho_sum[i][pos]=digamma(arr.F.l1norm(rho[i][pos]));
 -			}
 -		}
 -
 -		//E
 -		double exp_rho[][][]=new double[K][n_positions][n_words];
 -		if (pool == null)
 -		{
 -			for (int d=0;d<c.getNumPhrases();d++)
 -			{		
 -				List<Edge > doc=c.getEdgesForPhrase(d);
 -				double[][] phi = new double[doc.size()][K];
 -				double[] gamma = new double[K];
 -				
 -				emObj += inference(d, phi, gamma);
 -				
 -				for(int n=0;n<doc.size();n++){
 -					TIntArrayList context=doc.get(n).getContext();
 -					for(int pos=0;pos<n_positions;pos++){
 -						int word=context.get(pos);
 -						for(int i=0;i<K;i++){	
 -							exp_rho[i][pos][word]+=phi[n][i];
 -						}
 -					}
 -				}
 -				//if(d!=0 && d%100==0)  System.out.print(".");
 -				//if(d!=0 && d%1000==0) System.out.println(d);
 -			}
 -		}
 -		else // multi-threaded version of above loop
 -		{
 -			class PartialEStep implements Callable<PartialEStep>
 -			{
 -				double[][] phi;
 -				double[] gamma;
 -				double obj;
 -				int d;
 -				PartialEStep(int d) { this.d = d; }
 -
 -				public PartialEStep call()
 -				{
 -					phi = new double[c.getEdgesForPhrase(d).size()][K];
 -					gamma = new double[K];
 -					obj = inference(d, phi, gamma);
 -					return this;
 -				}			
 -			}
 -
 -			List<Future<PartialEStep>> jobs = new ArrayList<Future<PartialEStep>>();
 -			for (int d=0;d<c.getNumPhrases();d++)
 -				jobs.add(pool.submit(new PartialEStep(d)));
 -		
 -			for (Future<PartialEStep> job: jobs)
 -			{
 -				try {
 -					PartialEStep e = job.get();
 -					
 -					emObj += e.obj;				
 -					List<Edge> doc = c.getEdgesForPhrase(e.d);
 -					for(int n=0;n<doc.size();n++){
 -						TIntArrayList context=doc.get(n).getContext();
 -						for(int pos=0;pos<n_positions;pos++){
 -							int word=context.get(pos);
 -							for(int i=0;i<K;i++){	
 -								exp_rho[i][pos][word]+=e.phi[n][i];
 -							}
 -						}
 -					}
 -				} catch (ExecutionException e) {
 -					System.err.println("ERROR: E-step thread execution failed.");
 -					throw new RuntimeException(e);
 -				} catch (InterruptedException e) {
 -					System.err.println("ERROR: Failed to join E-step thread.");
 -					throw new RuntimeException(e);
 -				}
 -			}
 -		}	
 -	//	System.out.println("EM Objective:"+emObj);
 -		
 -		//M
 -		for(int i=0;i<K;i++){
 -			for(int pos=0;pos<n_positions;pos++){
 -				for(int j=0;j<n_words;j++){
 -					rho[i][pos][j]=lambda+exp_rho[i][pos][j];
 -				}
 -			}
 -		}
 -		
 -		//E[\log p(\beta|\lambda)] - E[\log q(\beta)]
 -		for(int i=0;i<K;i++){
 -			double rhoSum=0;
 -			for(int pos=0;pos<n_positions;pos++){
 -				for(int j=0;j<n_words;j++){
 -					rhoSum+=rho[i][pos][j];
 -				}
 -				double digamma_rhoSum=Gamma.digamma(rhoSum);
 -				emObj-=Gamma.logGamma(rhoSum);
 -				for(int j=0;j<n_words;j++){
 -					emObj+=(lambda-rho[i][pos][j])*(Gamma.digamma(rho[i][pos][j])-digamma_rhoSum);
 -					emObj+=Gamma.logGamma(rho[i][pos][j]);
 -				}
 -			}
 -		}
 -		
 -		return emObj;
 -	}//end of EM
 -	
 -	public void displayPosterior(PrintStream ps)
 -	{	
 -		for(int d=0;d<c.getNumPhrases();d++){
 -			List<Edge > doc=c.getEdgesForPhrase(d);
 -			double[][] phi = new double[doc.size()][K];
 -			for(int i=0;i<phi.length;i++)
 -				for(int j=0;j<phi[i].length;j++)
 -					phi[i][j]=1.0/K;
 -			double[] gamma = new double[K];
 -
 -			inference(d, phi, gamma);
 -
 -			for(int n=0;n<doc.size();n++){
 -				Edge edge=doc.get(n);
 -				int tag=arr.F.argmax(phi[n]);
 -				ps.print(edge.getPhraseString());
 -				ps.print("\t");
 -				ps.print(edge.getContextString(true));
 -
 -				ps.println(" ||| C=" + tag);
 -			}
 -		}
 -	}
 -
 -	double log_sum(double log_a, double log_b)
 -	{
 -	  double v;
 -
 -	  if (log_a < log_b)
 -	      v = log_b+Math.log(1 + Math.exp(log_a-log_b));
 -	  else
 -	      v = log_a+Math.log(1 + Math.exp(log_b-log_a));
 -	  return(v);
 -	}
 -		
 -	double digamma(double x)
 -	{
 -	    double p;
 -	    x=x+6;
 -	    p=1/(x*x);
 -	    p=(((0.004166666666667*p-0.003968253986254)*p+
 -		0.008333333333333)*p-0.083333333333333)*p;
 -	    p=p+Math.log(x)-0.5/x-1/(x-1)-1/(x-2)-1/(x-3)-1/(x-4)-1/(x-5)-1/(x-6);
 -	    return p;
 -	}
 -	
 -	double log_gamma(double x)
 -	{
 -	     double z=1/(x*x);
 -
 -	    x=x+6;
 -	    z=(((-0.000595238095238*z+0.000793650793651)
 -		*z-0.002777777777778)*z+0.083333333333333)/x;
 -	    z=(x-0.5)*Math.log(x)-x+0.918938533204673+z-Math.log(x-1)-
 -	    Math.log(x-2)-Math.log(x-3)-Math.log(x-4)-Math.log(x-5)-Math.log(x-6);
 -	    return z;
 -	}
 -
 -	public void useThreadPool(ExecutorService threadPool) 
 -	{
 -		pool = threadPool;
 -	}
 -}//End of  class
 diff --git a/gi/posterior-regularisation/prjava/src/test/CorpusTest.java b/gi/posterior-regularisation/prjava/src/test/CorpusTest.java deleted file mode 100644 index b4c3041f..00000000 --- a/gi/posterior-regularisation/prjava/src/test/CorpusTest.java +++ /dev/null @@ -1,60 +0,0 @@ -package test;
 -
 -import java.util.Arrays;
 -import java.util.HashMap;
 -
 -import data.Corpus;
 -import hmm.POS;
 -
 -public class CorpusTest {
 -
 -	public static void main(String[] args) {
 -		Corpus c=new Corpus(POS.trainFilename);
 -
 -		
 -		int idx=30;
 -		
 -		
 -		HashMap<String, Integer>vocab=
 -			(HashMap<String, Integer>) io.SerializedObjects.readSerializedObject(Corpus.alphaFilename);
 -		
 -		HashMap<String, Integer>tagVocab=
 -			(HashMap<String, Integer>) io.SerializedObjects.readSerializedObject(Corpus.tagalphaFilename);
 -		
 -		
 -		String [] dict=new String [vocab.size()+1];
 -		for(String key:vocab.keySet()){
 -			dict[vocab.get(key)]=key;
 -		}
 -		dict[dict.length-1]=Corpus.UNK_TOK;
 -		
 -		String [] tagdict=new String [tagVocab.size()+1];
 -		for(String key:tagVocab.keySet()){
 -			tagdict[tagVocab.get(key)]=key;
 -		}
 -		tagdict[tagdict.length-1]=Corpus.UNK_TOK;
 -		
 -		String[] sent=c.get(idx);
 -		int []data=c.getInt(idx);
 -		
 -		
 -		String []roundtrip=new String [sent.length];
 -		for(int i=0;i<sent.length;i++){
 -			roundtrip[i]=dict[data[i]];
 -		}
 -		System.out.println(Arrays.toString(sent));
 -		System.out.println(Arrays.toString(roundtrip));
 -		
 -		sent=c.tag.get(idx);
 -		data=c.tagData.get(idx);
 -		
 -		
 -		roundtrip=new String [sent.length];
 -		for(int i=0;i<sent.length;i++){
 -			roundtrip[i]=tagdict[data[i]];
 -		}
 -		System.out.println(Arrays.toString(sent));
 -		System.out.println(Arrays.toString(roundtrip));
 -	}
 -
 -}
 diff --git a/gi/posterior-regularisation/prjava/src/test/HMMModelStats.java b/gi/posterior-regularisation/prjava/src/test/HMMModelStats.java deleted file mode 100644 index d54525c8..00000000 --- a/gi/posterior-regularisation/prjava/src/test/HMMModelStats.java +++ /dev/null @@ -1,105 +0,0 @@ -package test;
 -
 -import hmm.HMM;
 -import hmm.POS;
 -
 -import java.io.File;
 -import java.io.FileNotFoundException;
 -import java.io.IOException;
 -import java.io.PrintStream;
 -import java.util.ArrayList;
 -import java.util.Collections;
 -import java.util.HashMap;
 -
 -import data.Corpus;
 -
 -public class HMMModelStats {
 -
 -	public static String modelFilename="../posdata/posModel.out";
 -	public static String alphaFilename="../posdata/corpus.alphabet";
 -	public static String statsFilename="../posdata/model.stats";
 -
 -	public static final int NUM_WORD=50;
 -	
 -	public static String testFilename="../posdata/en_test.conll";
 -	
 -	public static double [][]maxwt;
 -	
 -	public static void main(String[] args) {
 -		HashMap<String, Integer>vocab=
 -			(HashMap<String, Integer>) io.SerializedObjects.readSerializedObject(alphaFilename);
 -		
 -		Corpus test=new Corpus(testFilename,vocab);
 -		
 -		String [] dict=new String [vocab.size()+1];
 -		for(String key:vocab.keySet()){
 -			dict[vocab.get(key)]=key;
 -		}
 -		dict[dict.length-1]=Corpus.UNK_TOK;
 -		
 -		HMM hmm=new HMM();
 -		hmm.readModel(modelFilename);
 -
 -		
 -		
 -		PrintStream ps = null;
 -		try {
 -			ps = io.FileUtil.printstream(new File(statsFilename));
 -		} catch (IOException e) {
 -			e.printStackTrace();
 -			System.exit(1);
 -		}
 -		
 -		double [][] emit=hmm.getEmitProb();
 -		for(int i=0;i<emit.length;i++){
 -			ArrayList<IntDoublePair>l=new ArrayList<IntDoublePair>();
 -			for(int j=0;j<emit[i].length;j++){
 -				l.add(new IntDoublePair(j,emit[i][j]));
 -			}
 -			Collections.sort(l);
 -			ps.println(i);
 -			for(int j=0;j<NUM_WORD;j++){
 -				if(j>=dict.length){
 -					break;
 -				}
 -				ps.print(dict[l.get(j).idx]+"\t");
 -				if((1+j)%10==0){
 -					ps.println();
 -				}
 -			}
 -			ps.println("\n");
 -		}
 -		
 -		checkMaxwt(hmm,ps,test.getAllData());
 -		
 -		int terminalSym=vocab.get(Corpus .END_SYM);
 -		//sample 10 sentences
 -		for(int i=0;i<10;i++){
 -			int []sent=hmm.sample(terminalSym);
 -			for(int j=0;j<sent.length;j++){
 -				ps.print(dict[sent[j]]+"\t");
 -			}
 -			ps.println();
 -		}
 -		
 -		ps.close();
 -		
 -	}
 -	
 -	public static void checkMaxwt(HMM hmm,PrintStream ps,int [][]data){
 -		double [][]emit=hmm.getEmitProb();
 -		maxwt=new double[emit.length][emit[0].length];
 -		
 -		hmm.computeMaxwt(maxwt,data);
 -		double sum=0;
 -		for(int i=0;i<maxwt.length;i++){
 -			for(int j=0;j<maxwt.length;j++){
 -				sum+=maxwt[i][j];
 -			}
 -		}
 -		
 -		ps.println("max w t P(w_i|t): "+sum);
 -		
 -	}
 -	
 -}
 diff --git a/gi/posterior-regularisation/prjava/src/test/IntDoublePair.java b/gi/posterior-regularisation/prjava/src/test/IntDoublePair.java deleted file mode 100644 index 3f9f0ad7..00000000 --- a/gi/posterior-regularisation/prjava/src/test/IntDoublePair.java +++ /dev/null @@ -1,23 +0,0 @@ -package test;
 -
 -public class IntDoublePair implements Comparable{
 -	double val;
 -	int idx;
 -	public int compareTo(Object o){
 -		if(o instanceof IntDoublePair){
 -			IntDoublePair pair=(IntDoublePair)o;
 -			if(pair.val>val){
 -				return 1;
 -			}
 -			if(pair.val<val){
 -				return -1;
 -			}
 -			return 0;
 -		}
 -		return -1;
 -	}
 -	public IntDoublePair(int i,double v){
 -		val=v;
 -		idx=i;
 -	}
 -}
 diff --git a/gi/posterior-regularisation/prjava/src/test/X2y2WithConstraints.java b/gi/posterior-regularisation/prjava/src/test/X2y2WithConstraints.java deleted file mode 100644 index 9059a59e..00000000 --- a/gi/posterior-regularisation/prjava/src/test/X2y2WithConstraints.java +++ /dev/null @@ -1,131 +0,0 @@ -package test;
 -
 -
 -
 -import optimization.gradientBasedMethods.ProjectedGradientDescent;
 -import optimization.gradientBasedMethods.ProjectedObjective;
 -import optimization.gradientBasedMethods.stats.OptimizerStats;
 -import optimization.linesearch.ArmijoLineSearchMinimizationAlongProjectionArc;
 -import optimization.linesearch.InterpolationPickFirstStep;
 -import optimization.linesearch.LineSearchMethod;
 -import optimization.projections.BoundsProjection;
 -import optimization.projections.Projection;
 -import optimization.projections.SimplexProjection;
 -import optimization.stopCriteria.CompositeStopingCriteria;
 -import optimization.stopCriteria.GradientL2Norm;
 -import optimization.stopCriteria.ProjectedGradientL2Norm;
 -import optimization.stopCriteria.StopingCriteria;
 -import optimization.stopCriteria.ValueDifference;
 -
 -
 -/**
 - * @author javg
 - * 
 - * 
 - *ax2+ b(y2 -displacement)
 - */
 -public class X2y2WithConstraints extends ProjectedObjective{
 -
 -
 -	double a, b;
 -	double dx;
 -	double dy;
 -	Projection projection;
 -	
 -	
 -	public X2y2WithConstraints(double a, double b, double[] params, double dx, double dy, Projection proj){
 -		//projection = new BoundsProjection(0.2,Double.MAX_VALUE);
 -		super();
 -		projection = proj;	
 -		this.a = a;
 -		this.b = b;
 -		this.dx = dx;
 -		this.dy = dy;
 -		setInitialParameters(params);
 -		System.out.println("Function " +a+"(x-"+dx+")^2 + "+b+"(y-"+dy+")^2");
 -		System.out.println("Gradient " +(2*a)+"(x-"+dx+") ; "+(b*2)+"(y-"+dy+")");
 -		printParameters();
 -		projection.project(parameters);
 -		printParameters();
 -		gradient = new double[2];
 -	}
 -	
 -	public double getValue() {
 -		functionCalls++;
 -		return a*(parameters[0]-dx)*(parameters[0]-dx)+b*((parameters[1]-dy)*(parameters[1]-dy));
 -	}
 -
 -	public double[] getGradient() {
 -		if(gradient == null){
 -			gradient = new double[2];
 -		}
 -		gradientCalls++;
 -		gradient[0]=2*a*(parameters[0]-dx);
 -		gradient[1]=2*b*(parameters[1]-dy);
 -		return gradient;
 -	}
 -	
 -	
 -	public double[] projectPoint(double[] point) {
 -		double[] newPoint = point.clone();
 -		projection.project(newPoint);
 -		return newPoint;
 -	}	
 -	
 -	public void optimizeWithProjectedGradientDescent(LineSearchMethod ls, OptimizerStats stats, X2y2WithConstraints o){
 -		ProjectedGradientDescent optimizer = new ProjectedGradientDescent(ls);
 -		StopingCriteria stopGrad = new ProjectedGradientL2Norm(0.001);
 -		StopingCriteria stopValue = new ValueDifference(0.001);
 -		CompositeStopingCriteria compositeStop = new CompositeStopingCriteria();
 -		compositeStop.add(stopGrad);
 -		compositeStop.add(stopValue);
 -		
 -		optimizer.setMaxIterations(5);
 -		boolean succed = optimizer.optimize(o,stats,compositeStop);
 -		System.out.println("Ended optimzation Projected Gradient Descent\n" + stats.prettyPrint(1));
 -		System.out.println("Solution: " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]);
 -		if(succed){
 -			System.out.println("Ended optimization in " + optimizer.getCurrentIteration());
 -		}else{
 -			System.out.println("Failed to optimize");
 -		}
 -	}
 -	
 -	
 -	
 -	public String toString(){
 -		
 -		return "P1: " + parameters[0] + " P2: " + parameters[1] + " value " + getValue() + " grad (" + getGradient()[0] + ":" + getGradient()[1]+")";
 -	}
 -	
 -	public static void main(String[] args) {
 -		double a = 1;
 -		double b=1;
 -		double x0 = 0;
 -		double y0  =1;
 -		double dx = 0.5;
 -		double dy = 0.2	;
 -		double [] parameters = new double[2];
 -		parameters[0] = x0;
 -		parameters[1] = y0;
 -		X2y2WithConstraints o = new X2y2WithConstraints(a,b,parameters,dx,dy, 
 -				new SimplexProjection(0.5)
 -				//new BoundsProjection(0.0,0.4)
 -		);
 -		System.out.println("Starting optimization " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1] + " a " + a + " b "+b );
 -		o.setDebugLevel(4);
 -		
 -		LineSearchMethod ls = new ArmijoLineSearchMinimizationAlongProjectionArc(new InterpolationPickFirstStep(1));
 -		
 -		OptimizerStats stats = new OptimizerStats();
 -		o.optimizeWithProjectedGradientDescent(ls, stats, o);
 -		
 -//		o = new x2y2WithConstraints(a,b,x0,y0,dx,dy);
 -//		stats = new OptimizerStats();
 -//		o.optimizeWithSpectralProjectedGradientDescent(stats, o);
 -	}
 -	
 -	
 -	
 -	
 -}
 diff --git a/gi/posterior-regularisation/prjava/src/util/Array.java b/gi/posterior-regularisation/prjava/src/util/Array.java deleted file mode 100644 index cc4725af..00000000 --- a/gi/posterior-regularisation/prjava/src/util/Array.java +++ /dev/null @@ -1,41 +0,0 @@ -package util; - -import java.util.Arrays; - -public class Array { - -	 -	 -	public  static void sortDescending(double[] ds){ -		for (int i = 0; i < ds.length; i++) ds[i] = -ds[i]; -		Arrays.sort(ds); -		for (int i = 0; i < ds.length; i++) ds[i] = -ds[i]; -	} -	 -	/**  -	 * Return a new reversed array -	 * @param array -	 * @return -	 */ -	public static int[] reverseIntArray(int[] array){ -		int[] reversed = new int[array.length]; -		for (int i = 0; i < reversed.length; i++) { -			reversed[i] = array[reversed.length-1-i]; -		} -		return reversed; -	} -	 -	public static String[] sumArray(String[] in, int from){ -		String[] res = new String[in.length-from]; -		for (int i = from; i < in.length; i++) { -			res[i-from] = in[i]; -		} -		return res; -	} -	 -	public static void main(String[] args) { -		int[] i = {1,2,3,4}; -		util.Printing.printIntArray(i, null, "original"); -		util.Printing.printIntArray(reverseIntArray(i), null, "reversed"); -	} -} diff --git a/gi/posterior-regularisation/prjava/src/util/ArrayMath.java b/gi/posterior-regularisation/prjava/src/util/ArrayMath.java deleted file mode 100644 index 398a13a2..00000000 --- a/gi/posterior-regularisation/prjava/src/util/ArrayMath.java +++ /dev/null @@ -1,186 +0,0 @@ -package util; - -import java.util.Arrays; - -public class ArrayMath { - -	public static double dotProduct(double[] v1, double[] v2) { -		assert(v1.length == v2.length); -		double result = 0; -		for(int i = 0; i < v1.length; i++) -			result += v1[i]*v2[i]; -		return result; -	} - -	public static double twoNormSquared(double[] v) { -		double result = 0; -		for(double d : v) -			result += d*d; -		return result; -	} - -	public static boolean containsInvalid(double[] v) { -		for(int i = 0; i < v.length; i++) -			if(Double.isNaN(v[i]) || Double.isInfinite(v[i])) -				return true; -		return false; -	} - - -	 -	public static double safeAdd(double[] toAdd) { -		// Make sure there are no positive infinities -		double sum = 0; -		for(int i = 0; i < toAdd.length; i++) { -			assert(!(Double.isInfinite(toAdd[i]) && toAdd[i] > 0)); -			assert(!Double.isNaN(toAdd[i])); -			sum += toAdd[i]; -		} -		 -		return sum; -	} - -	/* Methods for filling integer and double arrays (of up to four dimensions) with the given value. */ -	 -	public static void set(int[][][][] array, int value) { -		for(int i = 0; i < array.length; i++) { -			set(array[i], value); -		} -	} -	 -	public static void set(int[][][] array, int value) { -		for(int i = 0; i < array.length; i++) { -			set(array[i], value); -		} -	} -	 -	public static void set(int[][] array, int value) { -		for(int i = 0; i < array.length; i++) { -			set(array[i], value); -		} -	} -	 -	public static void set(int[] array, int value) { -		Arrays.fill(array, value); -	} -	 -	 -	public static void set(double[][][][] array, double value) { -		for(int i = 0; i < array.length; i++) { -			set(array[i], value); -		} -	} -	 -	public static void set(double[][][] array, double value) { -		for(int i = 0; i < array.length; i++) { -			set(array[i], value); -		} -	} -	 -	public static void set(double[][] array, double value) { -		for(int i = 0; i < array.length; i++) { -			set(array[i], value); -		} -	} -	 -	public static void set(double[] array, double value) { -		Arrays.fill(array, value); -	} - -	public static void setEqual(double[][][][] dest, double[][][][] source){ -		for (int i = 0; i < source.length; i++) { -			setEqual(dest[i],source[i]); -		} -	} - -	 -	public static void setEqual(double[][][] dest, double[][][] source){ -		for (int i = 0; i < source.length; i++) { -			set(dest[i],source[i]); -		} -	} - -	 -	public static void set(double[][] dest, double[][] source){ -		for (int i = 0; i < source.length; i++) { -			setEqual(dest[i],source[i]); -		} -	} - -	public static void setEqual(double[] dest, double[] source){ -		System.arraycopy(source, 0, dest, 0, source.length); -	} - -	public static void plusEquals(double[][][][] array, double val){ -		for (int i = 0; i < array.length; i++) { -			plusEquals(array[i], val); -		} -	}	 -	 -	public static void plusEquals(double[][][] array, double val){ -		for (int i = 0; i < array.length; i++) { -			plusEquals(array[i], val); -		} -	}	 -	 -	public static void plusEquals(double[][] array, double val){ -		for (int i = 0; i < array.length; i++) { -			plusEquals(array[i], val); -		} -	}	 -	 -	public static void plusEquals(double[] array, double val){ -		for (int i = 0; i < array.length; i++) { -			array[i] += val; -		} -	} - -	 -	public static double sum(double[] array) { -		double res = 0; -		for (int i = 0; i < array.length; i++) res += array[i]; -		return res; -	} - - -	 -	public static  double[][] deepclone(double[][] in){ -		double[][] res = new double[in.length][]; -		for (int i = 0; i < res.length; i++) { -			res[i] = in[i].clone(); -		} -		return res; -	} - -	 -	public static  double[][][] deepclone(double[][][] in){ -		double[][][] res = new double[in.length][][]; -		for (int i = 0; i < res.length; i++) { -			res[i] = deepclone(in[i]); -		} -		return res; -	} - -	public static double cosine(double[] a, -			double[] b) { -		return (dotProduct(a, b)+1e-5)/(Math.sqrt(dotProduct(a, a)+1e-5)*Math.sqrt(dotProduct(b, b)+1e-5)); -	} - -	public static double max(double[] ds) { -		double max = Double.NEGATIVE_INFINITY; -		for(double d:ds) max = Math.max(d,max); -		return max; -	} - -	public static void exponentiate(double[] a) { -		for (int i = 0; i < a.length; i++) { -			a[i] = Math.exp(a[i]); -		} -	} - -	public static int sum(int[] array) { -		int res = 0; -		for (int i = 0; i < array.length; i++) res += array[i]; -		return res; -	} -} diff --git a/gi/posterior-regularisation/prjava/src/util/DifferentiableObjective.java b/gi/posterior-regularisation/prjava/src/util/DifferentiableObjective.java deleted file mode 100644 index 1ff1ae4a..00000000 --- a/gi/posterior-regularisation/prjava/src/util/DifferentiableObjective.java +++ /dev/null @@ -1,14 +0,0 @@ -package util; - -public interface DifferentiableObjective { - -	public double getValue(); - -	public void getGradient(double[] gradient); - -	public void getParameters(double[] params); - -	public void setParameters(double[] newParameters); - -	public int getNumParameters(); -} diff --git a/gi/posterior-regularisation/prjava/src/util/DigammaFunction.java b/gi/posterior-regularisation/prjava/src/util/DigammaFunction.java deleted file mode 100644 index ff1478ad..00000000 --- a/gi/posterior-regularisation/prjava/src/util/DigammaFunction.java +++ /dev/null @@ -1,21 +0,0 @@ -package util; - -public class DigammaFunction { -	public static double expDigamma(double number){ -		if(number==0)return number; -		return Math.exp(digamma(number)); -	} -	 -	public static double digamma(double number){ -		if(number > 7){ -			return digammApprox(number-0.5); -		}else{ -			return digamma(number+1) - 1.0/number; -		} -	} -	 -	private static double digammApprox(double value){ -		return Math.log(value) + 0.04167*Math.pow(value, -2) - 0.00729*Math.pow(value, -4)  -		+  0.00384*Math.pow(value, -6) - 0.00413*Math.pow(value, -8); -	} -} diff --git a/gi/posterior-regularisation/prjava/src/util/FileSystem.java b/gi/posterior-regularisation/prjava/src/util/FileSystem.java deleted file mode 100644 index d7812e40..00000000 --- a/gi/posterior-regularisation/prjava/src/util/FileSystem.java +++ /dev/null @@ -1,21 +0,0 @@ -package util; - -import java.io.File; - -public class FileSystem { -	public static boolean createDir(String directory) { - -		File dir = new File(directory); -		if (!dir.isDirectory()) { -			boolean success = dir.mkdirs(); -			if (!success) { -				System.out.println("Unable to create directory " + directory); -				return false; -			} -			System.out.println("Created directory " + directory); -		} else { -			System.out.println("Reusing directory " + directory); -		} -		return true; -	} -} diff --git a/gi/posterior-regularisation/prjava/src/util/InputOutput.java b/gi/posterior-regularisation/prjava/src/util/InputOutput.java deleted file mode 100644 index da7f71bf..00000000 --- a/gi/posterior-regularisation/prjava/src/util/InputOutput.java +++ /dev/null @@ -1,67 +0,0 @@ -package util; - -import java.io.BufferedReader; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.io.OutputStream; -import java.io.PrintStream; -import java.io.UnsupportedEncodingException; -import java.util.Properties; -import java.util.zip.GZIPInputStream; -import java.util.zip.GZIPOutputStream; - -public class InputOutput { - -	/** -	 * Opens a file either compress with gzip or not compressed. -	 */ -	public static BufferedReader openReader(String fileName) throws UnsupportedEncodingException, FileNotFoundException, IOException{ -		System.out.println("Reading: " + fileName); -		BufferedReader reader; -		fileName = fileName.trim(); -		if(fileName.endsWith("gz")){ -			reader = new BufferedReader( -			new InputStreamReader(new GZIPInputStream(new FileInputStream(fileName)),"UTF8")); -		}else{ -			reader = new BufferedReader(new InputStreamReader( -					new FileInputStream(fileName), "UTF8")); -		} -		 -		return reader; -	} -	 -	 -	public static PrintStream openWriter(String fileName)  -	throws UnsupportedEncodingException, FileNotFoundException, IOException{ -		System.out.println("Writting to file: " + fileName); -		PrintStream writter; -		fileName = fileName.trim(); -		if(fileName.endsWith("gz")){ -			writter = new PrintStream(new GZIPOutputStream(new FileOutputStream(fileName)), -					true, "UTF-8"); - -		}else{ -			writter = new PrintStream(new FileOutputStream(fileName), -					true, "UTF-8"); - -		} -		 -		return writter; -	} -	 -	public static Properties readPropertiesFile(String fileName) { -		Properties properties = new Properties(); -		try { -			properties.load(new FileInputStream(fileName)); -		} catch (IOException e) { -			e.printStackTrace(); -			throw new AssertionError("Wrong properties file " + fileName); -		} -		System.out.println(properties.toString()); -		 -		return properties; -	} -} diff --git a/gi/posterior-regularisation/prjava/src/util/LogSummer.java b/gi/posterior-regularisation/prjava/src/util/LogSummer.java deleted file mode 100644 index 117393b9..00000000 --- a/gi/posterior-regularisation/prjava/src/util/LogSummer.java +++ /dev/null @@ -1,86 +0,0 @@ -package util; - -import java.lang.Math; - -/* - * Math tool for computing logs of sums, when the terms of the sum are already in log form. - * (Useful if the terms of the sum are very small numbers.) - */ -public class LogSummer { -	 -	private LogSummer() { -	} -		 -	/** -	 * Given log(a) and log(b), computes log(a + b). -	 *  -	 * @param  loga log of first sum term -	 * @param  logb log of second sum term -	 * @return     log(sum), where sum = a + b -	 */ -	public static double sum(double loga, double logb) { -		assert(!Double.isNaN(loga)); -		assert(!Double.isNaN(logb)); -		 -		if(Double.isInfinite(loga)) -			return logb; -		if(Double.isInfinite(logb)) -			return loga; - -		double maxLog; -		double difference; -		if(loga > logb) { -			difference = logb - loga; -			maxLog = loga; -		} -		else { -			difference = loga - logb; -			maxLog = logb; -		} - -		return Math.log1p(Math.exp(difference)) + maxLog; -	} - -	/** -	 * Computes log(exp(array[index]) + b), and -	 * modifies array[index] to contain this new value. -	 *  -	 * @param array array to modify -	 * @param index index at which to modify -	 * @param logb  log of the second sum term -	 */ -	public static void sum(double[] array, int index, double logb) { -		array[index] = sum(array[index], logb); -	} -	 -	/** -	 * Computes log(a + b + c + ...) from log(a), log(b), log(c), ... -	 * by recursively splitting the input and delegating to the sum method. -	 *  -	 * @param  terms an array containing the log of all the terms for the sum -	 * @return log(sum), where sum = exp(terms[0]) + exp(terms[1]) + ... -	 */ -	public static double sumAll(double... terms) { -		return sumAllHelper(terms, 0, terms.length); -	} -	 -	/** -	 * Computes log(a_0 + a_1 + ...) from a_0 = exp(terms[begin]), -	 * a_1 = exp(terms[begin + 1]), ..., a_{end - 1 - begin} = exp(terms[end - 1]). -	 *  -	 * @param  terms an array containing the log of all the terms for the sum, -	 *               and possibly some other terms that will not go into the sum -	 * @return log of the sum of the elements in the [begin, end) region of the terms array -	 */ -	private static double sumAllHelper(final double[] terms, final int begin, final int end) { -		int length = end - begin; -		switch(length) { -			case 0: return Double.NEGATIVE_INFINITY; -			case 1: return terms[begin]; -			default: -				int midIndex = begin + length/2; -				return sum(sumAllHelper(terms, begin, midIndex), sumAllHelper(terms, midIndex, end)); -		} -	} - -}
\ No newline at end of file diff --git a/gi/posterior-regularisation/prjava/src/util/MathUtil.java b/gi/posterior-regularisation/prjava/src/util/MathUtil.java deleted file mode 100644 index 799b1faf..00000000 --- a/gi/posterior-regularisation/prjava/src/util/MathUtil.java +++ /dev/null @@ -1,148 +0,0 @@ -package util; - -import java.util.Random; - -public class MathUtil { -	public static final boolean closeToOne(double number){ -		return Math.abs(number-1) < 1.E-10; -	} -	 -	public static final boolean closeToZero(double number){ -		return Math.abs(number) < 1.E-5; -	} -	 -	/** -	 * Return a ramdom multinominal distribution. -	 *  -	 * @param size -	 * @return -	 */ -	public static final double[] randomVector(int size, Random r){ -		double[] random = new double[size]; -		double sum=0; -		for(int i = 0; i < size; i++){ -			double number = r.nextDouble(); -			random[i] = number; -			sum+=number; -		} -		for(int i = 0; i < size; i++){ -			random[i] = random[i]/sum; -		} -		return random; -	} -	 -	 - -	public static double sum(double[] ds) { -		double res = 0; -		for (int i = 0; i < ds.length; i++) { -			res+=ds[i]; -		} -		return res; -	} - -	public static double max(double[] ds) { -		double res = Double.NEGATIVE_INFINITY; -		for (int i = 0; i < ds.length; i++) { -			res = Math.max(res, ds[i]); -		} -		return res; -	} - -	public static double min(double[] ds) { -		double res = Double.POSITIVE_INFINITY; -		for (int i = 0; i < ds.length; i++) { -			res = Math.min(res, ds[i]); -		} -		return res; -	} - -	 -	public static double KLDistance(double[] p, double[] q) { -		int len = p.length; -		double kl = 0; -		for (int j = 0; j < len; j++) { -				if (p[j] == 0 || q[j] == 0) { -					continue; -				} else { -					kl += q[j] * Math.log(q[j] / p[j]); -				} - -		} -		return kl; -	} -	 -	public static double L2Distance(double[] p, double[] q) { -		int len = p.length; -		double l2 = 0; -		for (int j = 0; j < len; j++) { -				if (p[j] == 0 || q[j] == 0) { -					continue; -				} else { -					l2 += (q[j] - p[j])*(q[j] - p[j]); -				} - -		} -		return Math.sqrt(l2); -	} -	 -	public static double L1Distance(double[] p, double[] q) { -		int len = p.length; -		double l1 = 0; -		for (int j = 0; j < len; j++) { -				if (p[j] == 0 || q[j] == 0) { -					continue; -				} else { -					l1 += Math.abs(q[j] - p[j]); -				} - -		} -		return l1; -	} - -	public static double dot(double[] ds, double[] ds2) { -		double res = 0; -		for (int i = 0; i < ds2.length; i++) { -			res+= ds[i]*ds2[i]; -		} -		return res; -	} -	 -	public static double expDigamma(double number){ -		return Math.exp(digamma(number)); -	} -	 -	public static double digamma(double number){ -		if(number > 7){ -			return digammApprox(number-0.5); -		}else{ -			return digamma(number+1) - 1.0/number; -		} -	} -	 -	private static double digammApprox(double value){ -		return Math.log(value) + 0.04167*Math.pow(value, -2) - 0.00729*Math.pow(value, -4)  -		+  0.00384*Math.pow(value, -6) - 0.00413*Math.pow(value, -8); -	} - -	public static double eulerGamma = 0.57721566490152386060651209008240243; -	// FIXME -- so far just the initialization from Minka's paper "Estimating a Dirichlet distribution".  -	public static double invDigamma(double y) { -		if (y>= -2.22) return Math.exp(y)+0.5; -		return -1.0/(y+eulerGamma); -	} - -	 -	 -	public static void main(String[] args) { -		for(double i = 0; i < 10 ; i+=0.1){ -			System.out.println(i+"\t"+expDigamma(i)+"\t"+(i-0.5)); -		} -//		double gammaValue = (expDigamma(3)/expDigamma(10) + expDigamma(3)/expDigamma(10) + expDigamma(4)/expDigamma(10)); -//		double normalValue = 3/10+3/4+10/10; -//		System.out.println("Gamma " + gammaValue + " normal " + normalValue); -	} - -	 -	 -} diff --git a/gi/posterior-regularisation/prjava/src/util/Matrix.java b/gi/posterior-regularisation/prjava/src/util/Matrix.java deleted file mode 100644 index 8fb6d911..00000000 --- a/gi/posterior-regularisation/prjava/src/util/Matrix.java +++ /dev/null @@ -1,16 +0,0 @@ -package util; - -public class Matrix { -	int x; -	int y; -	double[][] values; -	 -	public Matrix(int x, int y){ -		this.x = x; -		this.y=y; -		values = new double[x][y]; -	} -	 -	 -	 -} diff --git a/gi/posterior-regularisation/prjava/src/util/MemoryTracker.java b/gi/posterior-regularisation/prjava/src/util/MemoryTracker.java deleted file mode 100644 index 83a65611..00000000 --- a/gi/posterior-regularisation/prjava/src/util/MemoryTracker.java +++ /dev/null @@ -1,47 +0,0 @@ -package util; - - -public class MemoryTracker { -	 -	double initM,finalM; -	boolean start = false,finish = false; -	 -	public MemoryTracker(){ -		 -	} -	 -	public void start(){ -		System.gc(); -	    System.gc(); -	    System.gc(); -	    initM = (Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory())/(1024*1024);   -	    start = true; -	} -	 -	public void finish(){ -		if(!start){ -			throw new RuntimeException("Canot stop before starting"); -		} -		System.gc(); -	    System.gc(); -	    System.gc(); -	    finalM = (Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory())/(1024*1024);   -	    finish = true; -	} -	 -	public String print(){ -		if(!finish){ -			throw new RuntimeException("Canot print before stopping"); -		} -		return "Used: " + (finalM - initM) + "MB"; -	} -	 -	public void clear(){ -		initM = 0; -		finalM = 0; -		finish = false; -		start = false; -	} -	 -	 -} diff --git a/gi/posterior-regularisation/prjava/src/util/Pair.java b/gi/posterior-regularisation/prjava/src/util/Pair.java deleted file mode 100644 index 7b1f108d..00000000 --- a/gi/posterior-regularisation/prjava/src/util/Pair.java +++ /dev/null @@ -1,31 +0,0 @@ -package util; - -public class Pair<O1, O2> { -	public O1 _first; -	public O2 _second; - -	public final O1 first() { -		return _first; -	} - -	public final O2 second() { -		return _second; -	} - -	public final void setFirst(O1 value){ -		_first = value; -	} -	 -	public final void setSecond(O2 value){ -		_second = value; -	} -	 -	public Pair(O1 first, O2 second) { -		_first = first; -		_second = second; -	} - -	public String toString(){ -		return _first + " " + _second;  -	} -} diff --git a/gi/posterior-regularisation/prjava/src/util/Printing.java b/gi/posterior-regularisation/prjava/src/util/Printing.java deleted file mode 100644 index 14fcbe91..00000000 --- a/gi/posterior-regularisation/prjava/src/util/Printing.java +++ /dev/null @@ -1,158 +0,0 @@ -package util; - -public class Printing { -	static java.text.DecimalFormat fmt = new java.text.DecimalFormat(); - -	public static String padWithSpace(String s, int len){ -		StringBuffer sb = new StringBuffer(); -		while(sb.length() +s.length() < len){ -			sb.append(" "); -		} -		sb.append(s); -		return sb.toString(); -	} -	 -	public static String prettyPrint(double d, String patt, int len) { -		fmt.applyPattern(patt); -		String s = fmt.format(d); -		while (s.length() < len) { -			s = " " + s; -		} -		return s; -	} -	 -	public static  String formatTime(long duration) { -		StringBuilder sb = new StringBuilder(); -		double d = duration / 1000; -		fmt.applyPattern("00"); -		sb.append(fmt.format((int) (d / (60 * 60))) + ":"); -		d -= ((int) d / (60 * 60)) * 60 * 60; -		sb.append(fmt.format((int) (d / 60)) + ":"); -		d -= ((int) d / 60) * 60; -		fmt.applyPattern("00.0"); -		sb.append(fmt.format(d)); -		return sb.toString(); -	} -	 -	 -	public static String doubleArrayToString(double[] array, String[] labels, String arrayName) { -		StringBuffer res = new StringBuffer(); -		res.append(arrayName); -		res.append("\n"); -		for (int i = 0; i < array.length; i++) { -			if (labels == null){ -				res.append(i+"       \t"); -			}else{ -				res.append(labels[i]+     "\t"); -			} -		} -		res.append("sum\n"); -		double sum = 0; -		for (int i = 0; i < array.length; i++) { -			res.append(prettyPrint(array[i], -					"0.00000E00", 8) + "\t"); -			sum+=array[i]; -		} -		res.append(prettyPrint(sum, -				"0.00000E00", 8)+"\n"); -		return res.toString(); -	} -	 -	 -	 -	public static void printDoubleArray(double[] array, String labels[], String arrayName) { -		System.out.println(doubleArrayToString(array, labels,arrayName)); -	} -	 -	 -	public static String doubleArrayToString(double[][] array, String[] labels1, String[] labels2, -			String arrayName){ -		StringBuffer res = new StringBuffer(); -		res.append(arrayName); -		res.append("\n\t"); -		//Calculates the column sum to keeps the sums -		double[] sums = new double[array[0].length+1]; -		//Prints rows headings -		for (int i = 0; i < array[0].length; i++) { -			if (labels1 == null){ -				res.append(i+"        \t"); -			}else{ -				res.append(labels1[i]+"        \t"); -			} -		} -		res.append("sum\n"); -		double sum = 0; -		//For each row print heading -		for (int i = 0; i < array.length; i++) { -			if (labels2 == null){ -				res.append(i+"\t"); -			}else{ -				res.append(labels2[i]+"\t"); -			} -			//Print values for that row -			for (int j = 0; j < array[0].length; j++) { -				res.append(" " + prettyPrint(array[i][j], -						"0.00000E00", 8) + "\t"); -				sums[j] += array[i][j];  -				sum+=array[i][j]; //Sum all values of that row -			} -			//Print row sum -			res.append(prettyPrint(sum,"0.00000E00", 8)+"\n"); -			sums[array[0].length]+=sum; -			sum=0; -		} -		res.append("sum\t"); -		//Print values for colums sum -		for (int i = 0; i < array[0].length+1; i++) { -			res.append(prettyPrint(sums[i],"0.00000E00", 8)+"\t"); -		} -		res.append("\n"); -		return res.toString(); -	} -	 -	public static void printDoubleArray(double[][] array, String[] labels1, String[] labels2 -			, String arrayName) { -		System.out.println(doubleArrayToString(array, labels1,labels2,arrayName)); -	} -	 -	 -	public static void printIntArray(int[][] array, String[] labels1, String[] labels2, String arrayName, -			int size1, int size2) { -		System.out.println(arrayName); -		for (int i = 0; i < size1; i++) { -			for (int j = 0; j < size2; j++) { -				System.out.print(" " + array[i][j] +  " "); - -			} -			System.out.println(); -		} -		System.out.println(); -	} -	 -	public static String intArrayToString(int[] array, String[] labels, String arrayName) { -		StringBuffer res = new StringBuffer(); -		res.append(arrayName); -		for (int i = 0; i < array.length; i++) { -			res.append(" " + array[i] + " "); -			 -		} -		res.append("\n"); -		return res.toString(); -	} -	 -	public static void printIntArray(int[] array, String[] labels, String arrayName) { -		System.out.println(intArrayToString(array, labels,arrayName)); -	} -	 -	public static String toString(double[][] d){ -		StringBuffer sb = new StringBuffer(); -		for (int i = 0; i < d.length; i++) { -			for (int j = 0; j < d[0].length; j++) { -				sb.append(prettyPrint(d[i][j], "0.00E0", 10)); -			} -			sb.append("\n"); -		} -		return sb.toString(); -	} -	 -} diff --git a/gi/posterior-regularisation/prjava/src/util/Sorters.java b/gi/posterior-regularisation/prjava/src/util/Sorters.java deleted file mode 100644 index 836444e5..00000000 --- a/gi/posterior-regularisation/prjava/src/util/Sorters.java +++ /dev/null @@ -1,39 +0,0 @@ -package util; - -import java.util.Comparator; - -public class Sorters { -	public static class sortWordsCounts implements Comparator{ -		 -		/** -		 * Sorter for a pair of word id, counts. Sort ascending by counts -		 */ -		public int compare(Object arg0, Object arg1) { -			Pair<Integer,Integer> p1 = (Pair<Integer,Integer>)arg0; -			Pair<Integer,Integer> p2 = (Pair<Integer,Integer>)arg1; -			if(p1.second() > p2.second()){ -				return 1; -			}else{ -				return -1; -			} -		} -		 -	} -	 -public static class sortWordsDouble implements Comparator{ -		 -		/** -		 * Sorter for a pair of word id, counts. Sort by counts -		 */ -		public int compare(Object arg0, Object arg1) { -			Pair<Integer,Double> p1 = (Pair<Integer,Double>)arg0; -			Pair<Integer,Double> p2 = (Pair<Integer,Double>)arg1; -			if(p1.second() < p2.second()){ -				return 1; -			}else{ -				return -1; -			} -		} -		 -	} -} diff --git a/gi/posterior-regularisation/prjava/train-PR-cluster.sh b/gi/posterior-regularisation/prjava/train-PR-cluster.sh deleted file mode 100755 index 67552c00..00000000 --- a/gi/posterior-regularisation/prjava/train-PR-cluster.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/sh - -d=`dirname $0` -java -ea -Xmx30g -cp $d/prjava.jar:$d/lib/trove-2.0.2.jar:$d/lib/optimization.jar:$d/lib/jopt-simple-3.2.jar:$d/lib/commons-math-2.1.jar phrase.Trainer $* diff --git a/gi/posterior-regularisation/projected_gradient.cc b/gi/posterior-regularisation/projected_gradient.cc deleted file mode 100644 index f7c39817..00000000 --- a/gi/posterior-regularisation/projected_gradient.cc +++ /dev/null @@ -1,87 +0,0 @@ -// -// Minimises given functional using the projected gradient method. Based on -// algorithm and demonstration example in Linear and Nonlinear Programming, -// Luenberger and Ye, 3rd ed., p 370. -// - -#include "invert.hh" -#include <iostream> - -using namespace std; - -double  -f(double x1, double x2, double x3, double x4) -{ -    return x1 * x1 + x2 * x2 + x3 * x3 + x4 * x4 - 2 * x1 - 3 * x4; -} - -ublas::vector<double>  -g(double x1, double x2, double x3, double x4) -{ -    ublas::vector<double> v(4); -    v(0) = 2 * x1 - 2; -    v(1) = 2 * x2; -    v(2) = 2 * x3; -    v(3) = 2 * x4 - 3; -    return v; -} - -ublas::matrix<double>  -activeConstraints(double x1, double x2, double x3, double x4) -{ -    int n = 2; -    if (x1 == 0) ++n; -    if (x2 == 0) ++n; -    if (x3 == 0) ++n; -    if (x4 == 0) ++n; - -    ublas::matrix<double> a(n,4); -    a(0, 0) = 2; a(0, 1) = 1; a(0, 2) = 1; a(0, 3) = 4; -    a(1, 0) = 1; a(1, 1) = 1; a(1, 2) = 2; a(1, 3) = 1; - -    int c = 2; -    if (x1 == 0) a(c++, 0) = 1; -    if (x2 == 0) a(c++, 1) = 1; -    if (x3 == 0) a(c++, 2) = 1; -    if (x4 == 0) a(c++, 3) = 1; - -    return a; -} - -ublas::matrix<double> -projection(const ublas::matrix<double> &a) -{ -    ublas::matrix<double> aT = ublas::trans(a); -    ublas::matrix<double> inv(a.size1(), a.size1()); -    bool ok = invert_matrix(ublas::matrix<double>(ublas::prod(a, aT)), inv); -    assert(ok && "Failed to invert matrix"); -    return ublas::identity_matrix<double>(4) -  -        ublas::prod(aT, ublas::matrix<double>(ublas::prod(inv, a))); -} - -int main(int argc, char *argv[]) -{ -    double x1 = 2, x2 = 2, x3 = 1, x4 = 0; - -    double fval = f(x1, x2, x3, x4); -    cout << "f = " << fval << endl; -    ublas::vector<double> grad = g(x1, x2, x3, x4); -    cout << "g = " << grad << endl; -    ublas::matrix<double> A = activeConstraints(x1, x2, x3, x4); -    cout << "A = " << A << endl; -    ublas::matrix<double> P = projection(A); -    cout << "P = " << P << endl; -    // the direction of movement -    ublas::vector<double> d = prod(P, grad); -    cout << "d = " << (d / d(0)) << endl; - -    // special case for d = 0 - -    // next solve for limits on the line search - -    // then use golden rule technique between these values (if bounded) - -    // or simple Armijo's rule technique - -    return 0; -} diff --git a/gi/posterior-regularisation/simplex_pg.py b/gi/posterior-regularisation/simplex_pg.py deleted file mode 100644 index 5da796d3..00000000 --- a/gi/posterior-regularisation/simplex_pg.py +++ /dev/null @@ -1,55 +0,0 @@ -# -# Following Leunberger and Ye, Linear and Nonlinear Progamming, 3rd ed. p367 -# "The gradient projection method" -# applied to an equality constraint for a simplex. -# -#   min f(x) -#   s.t. x >= 0, sum_i x = d -# -# FIXME: enforce the positivity constraint - a limit on the line search? -# - -from numpy import * -from scipy import * -from linesearch import line_search -# local copy of scipy's Amijo line_search - wasn't enforcing alpha max correctly -import sys - -dims = 4 - -def f(x): -    fv = x[0]*x[0] + x[1]*x[1] + x[2]*x[2] + x[3]*x[3] - 2*x[0] - 3*x[3] -    # print 'evaluating f at', x, 'value', fv -    return fv - -def g(x): -    return array([2*x[0] - 2, 2*x[1], 2*x[2], 2*x[3]-3]) - -def pg(x): -    gv = g(x) -    return gv - sum(gv) / dims - -x = ones(dims) / dims -old_fval = None - -while True: -    fv = f(x) -    gv = g(x) -    dv = pg(x) - -    print 'x', x, 'f', fv, 'g', gv, 'd', dv - -    if old_fval == None: -        old_fval = fv + 0.1 - -    # solve for maximum step size i.e. when positivity constraints kick in -    # x - alpha d = 0   => alpha = x/d -    amax = max(x/dv) -    if amax < 1e-8: break - -    stuff = line_search(f, pg, x, -dv, dv, fv, old_fval, amax=amax) -    alpha = stuff[0] # Nb. can avoid next evaluation of f,g,d using 'stuff' -    if alpha < 1e-8: break -    x -= alpha * dv - -    old_fval = fv diff --git a/gi/posterior-regularisation/split-languages.py b/gi/posterior-regularisation/split-languages.py deleted file mode 100755 index 206da661..00000000 --- a/gi/posterior-regularisation/split-languages.py +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/python - -import sys - -sout = open(sys.argv[1], 'w') -tout = open(sys.argv[2], 'w') -for line in sys.stdin: -	phrase, contexts = line.rstrip().split('\t') -	sp, tp = phrase.split(' <SPLIT> ') -	sout.write('%s\t' % sp) -	tout.write('%s\t' % tp) -	parts = contexts.split(' ||| ') -	for i in range(0, len(parts), 2): -		sc, tc = parts[i].split(' <SPLIT> ') -		if i != 0: -			sout.write(' ||| ') -			tout.write(' ||| ') -		sout.write('%s ||| %s' % (sc, parts[i+1])) -		tout.write('%s ||| %s' % (tc, parts[i+1])) -	sout.write('\n') -	tout.write('\n') -sout.close() -tout.close() diff --git a/gi/posterior-regularisation/train_pr_agree.py b/gi/posterior-regularisation/train_pr_agree.py deleted file mode 100644 index 9d41362d..00000000 --- a/gi/posterior-regularisation/train_pr_agree.py +++ /dev/null @@ -1,400 +0,0 @@ -import sys -import scipy.optimize -from scipy.stats import geom -from numpy import * -from numpy.random import random, seed - -style = sys.argv[1] -if len(sys.argv) >= 3: -     seed(int(sys.argv[2])) - -# -# Step 1: load the concordance counts -#  - -edges = [] -word_types = {} -phrase_types = {} -context_types = {} - -for line in sys.stdin: -    phrase, rest = line.strip().split('\t') -    ptoks = tuple(map(lambda t: word_types.setdefault(t, len(word_types)), phrase.split())) -    pid = phrase_types.setdefault(ptoks, len(phrase_types)) - -    parts = rest.split('|||') -    for i in range(0, len(parts), 2): -        context, count = parts[i:i+2] - -        ctx = filter(lambda x: x != '<PHRASE>', context.split()) -        ctoks = tuple(map(lambda t: word_types.setdefault(t, len(word_types)), ctx)) -        cid = context_types.setdefault(ctoks, len(context_types)) - -        cnt = int(count.strip()[2:]) -        edges.append((pid, cid, cnt)) - -word_type_list = [None] * len(word_types) -for typ, index in word_types.items(): -    word_type_list[index] = typ - -phrase_type_list = [None] * len(phrase_types) -for typ, index in phrase_types.items(): -    phrase_type_list[index] = typ - -context_type_list = [None] * len(context_types) -for typ, index in context_types.items(): -    context_type_list[index] = typ - -num_tags = 5 -num_types = len(word_types) -num_phrases = len(phrase_types) -num_contexts = len(context_types) -num_edges = len(edges) - -print 'Read in', num_edges, 'edges', num_phrases, 'phrases', num_contexts, 'contexts and', num_types, 'word types' - -# -# Step 2:  expectation maximisation  -# - -def normalise(a): -    return a / float(sum(a)) - -class PhraseToContextModel: -    def __init__(self): -        # Pr(tag | phrase) -        self.tagDist = [normalise(random(num_tags)+1) for p in range(num_phrases)] -        # Pr(context at pos i = w | tag) indexed by i, tag, word -        self.contextWordDist = [[normalise(random(num_types)+1) for t in range(num_tags)] for i in range(4)] - -    def prob(self, pid, cid): -        # return distribution p(tag, context | phrase) as vector of length |tags| -        context = context_type_list[cid] -        dist = zeros(num_tags) -        for t in range(num_tags): -            prob = self.tagDist[pid][t] -            for k, tokid in enumerate(context): -                prob *= self.contextWordDist[k][t][tokid] -            dist[t] = prob -        return dist - -    def expectation_maximisation_step(self, lamba=None): -        tagCounts = zeros((num_phrases, num_tags)) -        contextWordCounts = zeros((4, num_tags, num_types)) - -        # E-step -        llh = 0 -        for pid, cid, cnt in edges: -            q = self.prob(pid, cid) -            z = sum(q) -            q /= z -            llh += log(z) -            context = context_type_list[cid] -            if lamba != None: -                q *= exp(lamba) -                q /= sum(q) -            for t in range(num_tags): -                tagCounts[pid][t] += cnt * q[t] -            for i in range(4): -                for t in range(num_tags): -                    contextWordCounts[i][t][context[i]] += cnt * q[t] - -        # M-step -        for p in range(num_phrases): -            self.tagDist[p] = normalise(tagCounts[p]) -        for i in range(4): -            for t in range(num_tags): -                self.contextWordDist[i][t] = normalise(contextWordCounts[i,t]) - -        return llh - -class ContextToPhraseModel: -    def __init__(self): -        # Pr(tag | context) = Multinomial -        self.tagDist = [normalise(random(num_tags)+1) for p in range(num_contexts)] -        # Pr(phrase = w | tag) = Multinomial -        self.phraseSingleDist = [normalise(random(num_types)+1) for t in range(num_tags)] -        # Pr(phrase_1 = w | tag) = Multinomial -        self.phraseLeftDist = [normalise(random(num_types)+1) for t in range(num_tags)] -        # Pr(phrase_-1 = w | tag) = Multinomial -        self.phraseRightDist = [normalise(random(num_types)+1) for t in range(num_tags)] -        # Pr(|phrase| = l | tag) = Geometric -        self.phraseLengthDist = [0.5] * num_tags -        # n.b. internal words for phrases of length >= 3 are drawn from uniform distribution - -    def prob(self, pid, cid): -        # return distribution p(tag, phrase | context) as vector of length |tags| -        phrase = phrase_type_list[pid] -        dist = zeros(num_tags) -        for t in range(num_tags): -            prob = self.tagDist[cid][t] -            f = self.phraseLengthDist[t] -            prob *= geom.pmf(len(phrase), f) -            if len(phrase) == 1: -                prob *= self.phraseSingleDist[t][phrase[0]] -            else: -                prob *= self.phraseLeftDist[t][phrase[0]] -                prob *= self.phraseRightDist[t][phrase[-1]] -            dist[t] = prob -        return dist - -    def expectation_maximisation_step(self, lamba=None): -        tagCounts = zeros((num_contexts, num_tags)) -        phraseSingleCounts = zeros((num_tags, num_types)) -        phraseLeftCounts = zeros((num_tags, num_types)) -        phraseRightCounts = zeros((num_tags, num_types)) -        phraseLength = zeros(num_types) - -        # E-step -        llh = 0 -        for pid, cid, cnt in edges: -            q = self.prob(pid, cid) -            z = sum(q) -            q /= z -            llh += log(z) -            if lamba != None: -                q *= exp(lamba) -                q /= sum(q) -            #print 'p', phrase_type_list[pid], 'c', context_type_list[cid], 'q', q -            phrase = phrase_type_list[pid] -            for t in range(num_tags): -                tagCounts[cid][t] += cnt * q[t] -                phraseLength[t] += cnt * len(phrase) * q[t] -                if len(phrase) == 1: -                    phraseSingleCounts[t][phrase[0]] += cnt * q[t] -                else: -                    phraseLeftCounts[t][phrase[0]] += cnt * q[t] -                    phraseRightCounts[t][phrase[-1]] += cnt * q[t] - -        # M-step -        for t in range(num_tags): -            self.phraseLengthDist[t] = min(max(sum(tagCounts[:,t]) / phraseLength[t], 1e-6), 1-1e-6) -            self.phraseSingleDist[t] = normalise(phraseSingleCounts[t]) -            self.phraseLeftDist[t] = normalise(phraseLeftCounts[t]) -            self.phraseRightDist[t] = normalise(phraseRightCounts[t]) -        for c in range(num_contexts): -            self.tagDist[c] = normalise(tagCounts[c]) - -        #print 't', self.tagDist -        #print 'l', self.phraseLengthDist -        #print 's', self.phraseSingleDist -        #print 'L', self.phraseLeftDist -        #print 'R', self.phraseRightDist - -        return llh - -class ProductModel: -    """ -    WARNING: I haven't verified the maths behind this model. It's quite likely to be incorrect. -    """ - -    def __init__(self): -        self.pcm = PhraseToContextModel() -        self.cpm = ContextToPhraseModel() - -    def prob(self, pid, cid): -        p1 = self.pcm.prob(pid, cid) -        p2 = self.cpm.prob(pid, cid) -        return (p1 / sum(p1)) * (p2 / sum(p2)) - -    def expectation_maximisation_step(self): -        tagCountsGivenPhrase = zeros((num_phrases, num_tags)) -        contextWordCounts = zeros((4, num_tags, num_types)) - -        tagCountsGivenContext = zeros((num_contexts, num_tags)) -        phraseSingleCounts = zeros((num_tags, num_types)) -        phraseLeftCounts = zeros((num_tags, num_types)) -        phraseRightCounts = zeros((num_tags, num_types)) -        phraseLength = zeros(num_types) - -        kl = llh1 = llh2 = 0 -        for pid, cid, cnt in edges: -            p1 = self.pcm.prob(pid, cid) -            llh1 += log(sum(p1)) * cnt -            p2 = self.cpm.prob(pid, cid) -            llh2 += log(sum(p2)) * cnt - -            q = (p1 / sum(p1)) * (p2 / sum(p2)) -            kl += log(sum(q)) * cnt -            qi = sqrt(q) -            qi /= sum(qi) - -            phrase = phrase_type_list[pid] -            context = context_type_list[cid] -            for t in range(num_tags): -                tagCountsGivenPhrase[pid][t] += cnt * qi[t] -                tagCountsGivenContext[cid][t] += cnt * qi[t] -                phraseLength[t] += cnt * len(phrase) * qi[t] -                if len(phrase) == 1: -                    phraseSingleCounts[t][phrase[0]] += cnt * qi[t] -                else: -                    phraseLeftCounts[t][phrase[0]] += cnt * qi[t] -                    phraseRightCounts[t][phrase[-1]] += cnt * qi[t] -                for i in range(4): -                    contextWordCounts[i][t][context[i]] += cnt * qi[t] - -        kl *= -2 - -        for t in range(num_tags): -            for i in range(4): -                self.pcm.contextWordDist[i][t] = normalise(contextWordCounts[i,t]) -            self.cpm.phraseLengthDist[t] = min(max(sum(tagCountsGivenContext[:,t]) / phraseLength[t], 1e-6), 1-1e-6) -            self.cpm.phraseSingleDist[t] = normalise(phraseSingleCounts[t]) -            self.cpm.phraseLeftDist[t] = normalise(phraseLeftCounts[t]) -            self.cpm.phraseRightDist[t] = normalise(phraseRightCounts[t]) -        for p in range(num_phrases): -            self.pcm.tagDist[p] = normalise(tagCountsGivenPhrase[p]) -        for c in range(num_contexts): -            self.cpm.tagDist[c] = normalise(tagCountsGivenContext[c]) - -        # return the overall objective -        return llh1 + llh2 + kl - -class RegularisedProductModel: -    # as above, but with a slack regularisation term which kills the -    # closed-form solution for the E-step - -    def __init__(self, epsilon): -        self.pcm = PhraseToContextModel() -        self.cpm = ContextToPhraseModel() -        self.epsilon = epsilon -        self.lamba = zeros(num_tags) - -    def prob(self, pid, cid): -        p1 = self.pcm.prob(pid, cid) -        p2 = self.cpm.prob(pid, cid) -        return (p1 / sum(p1)) * (p2 / sum(p2)) - -    def dual(self, lamba): -        return self.logz(lamba) + self.epsilon * dot(lamba, lamba) ** 0.5 - -    def dual_gradient(self, lamba): -        return self.expected_features(lamba) + self.epsilon * 2 * lamba - -    def expectation_maximisation_step(self): -        # PR-step: optimise lambda to minimise log(z_lambda) + eps ||lambda||_2 -        self.lamba = scipy.optimize.fmin_slsqp(self.dual, self.lamba, -                                bounds=[(0, 1e100)] * num_tags, -                                fprime=self.dual_gradient, iprint=1) - -        # E,M-steps: collect expected counts under q_lambda and normalise -        llh1 = self.pcm.expectation_maximisation_step(self.lamba) -        llh2 = self.cpm.expectation_maximisation_step(-self.lamba) - -        # return the overall objective: llh - KL(q||p1.p2) -        # llh = llh1 + llh2 -        # kl = sum q log q / p1 p2 = sum q { lambda . phi } - log Z -        return llh1 + llh2 + self.logz(self.lamba) \ -            - dot(self.lamba, self.expected_features(self.lamba)) - -    def logz(self, lamba): -        lz = 0 -        for pid, cid, cnt in edges: -            p1 = self.pcm.prob(pid, cid) -            z1 = dot(p1 / sum(p1), exp(lamba)) -            lz += log(z1) * cnt - -            p2 = self.cpm.prob(pid, cid) -            z2 = dot(p2 / sum(p2), exp(-lamba)) -            lz += log(z2) * cnt -        return lz - -    def expected_features(self, lamba): -        fs = zeros(num_tags) -        for pid, cid, cnt in edges: -            p1 = self.pcm.prob(pid, cid) -            q1 = (p1 / sum(p1)) * exp(lamba) -            fs += cnt * q1 / sum(q1) - -            p2 = self.cpm.prob(pid, cid) -            q2 = (p2 / sum(p2)) * exp(-lamba) -            fs -= cnt * q2 / sum(q2) -        return fs - - -class InterpolatedModel: -    def __init__(self, epsilon): -        self.pcm = PhraseToContextModel() -        self.cpm = ContextToPhraseModel() -        self.epsilon = epsilon -        self.lamba = zeros(num_tags) - -    def prob(self, pid, cid): -        p1 = self.pcm.prob(pid, cid) -        p2 = self.cpm.prob(pid, cid) -        return (p1 + p2) / 2 - -    def dual(self, lamba): -        return self.logz(lamba) + self.epsilon * dot(lamba, lamba) ** 0.5 - -    def dual_gradient(self, lamba): -        return self.expected_features(lamba) + self.epsilon * 2 * lamba - -    def expectation_maximisation_step(self): -        # PR-step: optimise lambda to minimise log(z_lambda) + eps ||lambda||_2 -        self.lamba = scipy.optimize.fmin_slsqp(self.dual, self.lamba, -                                bounds=[(0, 1e100)] * num_tags, -                                fprime=self.dual_gradient, iprint=2) - -        # E,M-steps: collect expected counts under q_lambda and normalise -        llh1 = self.pcm.expectation_maximisation_step(self.lamba) -        llh2 = self.cpm.expectation_maximisation_step(self.lamba) - -        # return the overall objective: llh1 + llh2 - KL(q||p1.p2) -        # kl = sum_y q log q / 0.5 * (p1 + p2) = sum_y q(y) { -lambda . phi(y) } - log Z -        #    = -log Z + lambda . (E_q1[-phi] + E_q2[-phi]) / 2 -        kl = -self.logz(self.lamba) + dot(self.lamba, self.expected_features(self.lamba)) -        return llh1 + llh2 - kl, llh1, llh2, kl -        # FIXME: KL comes out negative... - -    def logz(self, lamba): -        lz = 0 -        for pid, cid, cnt in edges: -            p1 = self.pcm.prob(pid, cid) -            q1 = p1 / sum(p1) * exp(-lamba) -            q1z = sum(q1) - -            p2 = self.cpm.prob(pid, cid) -            q2 = p2 / sum(p2) * exp(-lamba) -            q2z = sum(q2) - -            lz += log(0.5 * (q1z + q2z)) * cnt -        return lz - -    # z = 1/2 * (sum_y p1(y|x) exp (-lambda . phi(y)) + sum_y p2(y|x) exp (-lambda . phi(y))) -    #   = 1/2 (z1 + z2) -    # d (log z) / dlambda = 1/2 (E_q1 [ -phi ] + E_q2 [ -phi ] ) -    def expected_features(self, lamba): -        fs = zeros(num_tags) -        for pid, cid, cnt in edges: -            p1 = self.pcm.prob(pid, cid) -            q1 = (p1 / sum(p1)) * exp(-lamba) -            fs -= 0.5 * cnt * q1 / sum(q1) - -            p2 = self.cpm.prob(pid, cid) -            q2 = (p2 / sum(p2)) * exp(-lamba) -            fs -= 0.5 * cnt * q2 / sum(q2) -        return fs - -if style == 'p2c': -    m = PhraseToContextModel() -elif style == 'c2p': -    m = ContextToPhraseModel() -elif style == 'prod': -    m = ProductModel() -elif style == 'prodslack': -    m = RegularisedProductModel(0.5) -elif style == 'sum': -    m = InterpolatedModel(0.5) - -for iteration in range(30): -    obj = m.expectation_maximisation_step() -    print 'iteration', iteration, 'objective', obj - -for pid, cid, cnt in edges: -    p = m.prob(pid, cid) -    phrase = phrase_type_list[pid] -    phrase_str = ' '.join(map(word_type_list.__getitem__, phrase)) -    context = context_type_list[cid] -    context_str = ' '.join(map(word_type_list.__getitem__, context)) -    print '%s\t%s ||| C=%d' % (phrase_str, context_str, argmax(p)) diff --git a/gi/posterior-regularisation/train_pr_global.py b/gi/posterior-regularisation/train_pr_global.py deleted file mode 100644 index 8521bccb..00000000 --- a/gi/posterior-regularisation/train_pr_global.py +++ /dev/null @@ -1,296 +0,0 @@ -import sys -import scipy.optimize -from numpy import * -from numpy.random import random - -# -# Step 1: load the concordance counts -#  - -edges_phrase_to_context = [] -edges_context_to_phrase = [] -types = {} -context_types = {} -num_edges = 0 - -for line in sys.stdin: -    phrase, rest = line.strip().split('\t') -    parts = rest.split('|||') -    edges_phrase_to_context.append((phrase, [])) -    for i in range(0, len(parts), 2): -        context, count = parts[i:i+2] - -        ctx = tuple(filter(lambda x: x != '<PHRASE>', context.split())) -        cnt = int(count.strip()[2:]) -        edges_phrase_to_context[-1][1].append((ctx, cnt)) - -        cid = context_types.get(ctx, len(context_types)) -        if cid == len(context_types): -            context_types[ctx] = cid -            edges_context_to_phrase.append((ctx, [])) -        edges_context_to_phrase[cid][1].append((phrase, cnt)) - -        for token in ctx: -            types.setdefault(token, len(types)) -        for token in phrase.split(): -            types.setdefault(token, len(types)) - -        num_edges += 1 - -print 'Read in', num_edges, 'edges and', len(types), 'word types' - -print 'edges_phrase_to_context', edges_phrase_to_context - -# -# Step 2: initialise the model parameters -# - -num_tags = 10 -num_types = len(types) -num_phrases = len(edges_phrase_to_context) -num_contexts = len(edges_context_to_phrase) -delta = int(sys.argv[1]) -gamma = int(sys.argv[2]) - -def normalise(a): -    return a / float(sum(a)) - -# Pr(tag | phrase) -tagDist = [normalise(random(num_tags)+1) for p in range(num_phrases)] -#tagDist = [normalise(array(range(1,num_tags+1))) for p in range(num_phrases)] -# Pr(context at pos i = w | tag) indexed by i, tag, word -#contextWordDist = [[normalise(array(range(1,num_types+1))) for t in range(num_tags)] for i in range(4)] -contextWordDist = [[normalise(random(num_types)+1) for t in range(num_tags)] for i in range(4)] -# PR langrange multipliers -lamba = zeros(2 * num_edges * num_tags) -omega_offset = num_edges * num_tags -lamba_index = {} -next = 0 -for phrase, ccs in edges_phrase_to_context: -    for context, count in ccs: -        lamba_index[phrase,context] = next -        next += num_tags -#print lamba_index - -# -# Step 3: expectation maximisation -# - -for iteration in range(20): -    tagCounts = [zeros(num_tags) for p in range(num_phrases)] -    contextWordCounts = [[zeros(num_types) for t in range(num_tags)] for i in range(4)] - -    #print 'tagDist', tagDist -    #print 'contextWordCounts[0][0]', contextWordCounts[0][0] - -    # Tune lambda -    # dual: min log Z(lamba) s.t. lamba >= 0; -    # sum_c lamba_pct <= delta; sum_p lamba_pct <= gamma -    def dual(ls): -        logz = 0 -        for p, (phrase, ccs) in enumerate(edges_phrase_to_context): -            for context, count in ccs: -                conditionals = zeros(num_tags) -                for t in range(num_tags): -                    prob = tagDist[p][t] -                    for i in range(4): -                        prob *= contextWordDist[i][t][types[context[i]]] -                    conditionals[t] = prob -                cz = sum(conditionals) -                conditionals /= cz - -                #print 'dual', phrase, context, count, 'p =', conditionals - -                local_z = 0 -                for t in range(num_tags): -                    li = lamba_index[phrase,context] + t -                    local_z += conditionals[t] * exp(-ls[li] - ls[omega_offset+li]) -                logz += log(local_z) * count - -        #print 'ls', ls -        #print 'lambda', list(ls) -        #print 'dual', logz -        return logz - -    def loglikelihood(): -        llh = 0 -        for p, (phrase, ccs) in enumerate(edges_phrase_to_context): -            for context, count in ccs: -                conditionals = zeros(num_tags) -                for t in range(num_tags): -                    prob = tagDist[p][t] -                    for i in range(4): -                        prob *= contextWordDist[i][t][types[context[i]]] -                    conditionals[t] = prob -                cz = sum(conditionals) -                llh += log(cz) * count -        return llh - -    def primal(ls): -        # FIXME: returns negative values for KL (impossible) -        logz = dual(ls) -        expectations = -dual_deriv(ls) -        kl = -logz - dot(ls, expectations) -        llh = loglikelihood() - -        pt_l1linf = 0 -        for phrase, ccs in edges_phrase_to_context: -            for t in range(num_tags): -                best = -1e500 -                for context, count in ccs: -                    li = lamba_index[phrase,context] + t -                    s = expectations[li] -                    if s > best: best = s -                pt_l1linf += best - -        ct_l1linf = 0 -        for context, pcs in edges_context_to_phrase: -            for t in range(num_tags): -                best = -1e500 -                for phrase, count in pcs: -                    li = omega_offset + lamba_index[phrase,context] + t -                    s = expectations[li] -                    if s > best: best = s -                ct_l1linf += best - -        return llh, kl, pt_l1linf, ct_l1linf, llh - kl - delta * pt_l1linf - gamma * ct_l1linf - -    def dual_deriv(ls): -        # d/dl log(z) = E_q[phi] -        deriv = zeros(2 * num_edges * num_tags) -        for p, (phrase, ccs) in enumerate(edges_phrase_to_context): -            for context, count in ccs: -                conditionals = zeros(num_tags) -                for t in range(num_tags): -                    prob = tagDist[p][t] -                    for i in range(4): -                        prob *= contextWordDist[i][t][types[context[i]]] -                    conditionals[t] = prob -                cz = sum(conditionals) -                conditionals /= cz - -                scores = zeros(num_tags) -                for t in range(num_tags): -                    li = lamba_index[phrase,context] + t -                    scores[t] = conditionals[t] * exp(-ls[li] - ls[omega_offset + li]) -                local_z = sum(scores) - -                #print 'ddual', phrase, context, count, 'q =', scores / local_z - -                for t in range(num_tags): -                    deriv[lamba_index[phrase,context] + t] -= count * scores[t] / local_z -                    deriv[omega_offset + lamba_index[phrase,context] + t] -= count * scores[t] / local_z - -        #print 'ddual', list(deriv) -        return deriv - -    def constraints(ls): -        cons = zeros(num_phrases * num_tags + num_edges * num_tags) - -        index = 0 -        for phrase, ccs in edges_phrase_to_context: -            for t in range(num_tags): -                if delta > 0: -                    total = delta -                    for cprime, count in ccs: -                        total -= ls[lamba_index[phrase, cprime] + t] -                    cons[index] = total -                index += 1 - -        for context, pcs in edges_context_to_phrase: -            for t in range(num_tags): -                if gamma > 0: -                    total = gamma -                    for pprime, count in pcs: -                        total -= ls[omega_offset + lamba_index[pprime, context] + t] -                    cons[index] = total -                index += 1 - -        #print 'cons', cons -        return cons - -    def constraints_deriv(ls): -        cons = zeros((num_phrases * num_tags + num_edges * num_tags, 2 * num_edges * num_tags)) - -        index = 0 -        for phrase, ccs in edges_phrase_to_context: -            for t in range(num_tags): -                if delta > 0: -                    d = cons[index,:]#zeros(num_edges * num_tags) -                    for cprime, count in ccs: -                        d[lamba_index[phrase, cprime] + t] = -1 -                    #cons[index] = d -                index += 1 - -        for context, pcs in edges_context_to_phrase: -            for t in range(num_tags): -                if gamma > 0: -                    d = cons[index,:]#d = zeros(num_edges * num_tags) -                    for pprime, count in pcs: -                        d[omega_offset + lamba_index[pprime, context] + t] = -1 -                    #cons[index] = d -                index += 1 -        #print 'dcons', cons -        return cons - -    print 'Pre lambda optimisation dual', dual(lamba), 'primal', primal(lamba) -    #print 'lambda', lamba, lamba.shape -    #print 'bounds', [(0, max(delta, gamma))] * (2 * num_edges * num_tags) - -    lamba = scipy.optimize.fmin_slsqp(dual, lamba, -                            bounds=[(0, max(delta, gamma))] * (2 * num_edges * num_tags), -                            f_ieqcons=constraints, -                            fprime=dual_deriv, -                            fprime_ieqcons=constraints_deriv, -                            iprint=0) -    print 'Post lambda optimisation dual', dual(lamba), 'primal', primal(lamba) - -    # E-step -    llh = log_z = 0 -    for p, (phrase, ccs) in enumerate(edges_phrase_to_context): -        for context, count in ccs: -            conditionals = zeros(num_tags) -            for t in range(num_tags): -                prob = tagDist[p][t] -                for i in range(4): -                    prob *= contextWordDist[i][t][types[context[i]]] -                conditionals[t] = prob -            cz = sum(conditionals) -            conditionals /= cz -            llh += log(cz) * count - -            q = zeros(num_tags) -            li = lamba_index[phrase, context] -            for t in range(num_tags): -                q[t] = conditionals[t] * exp(-lamba[li + t] - lamba[omega_offset + li + t]) -            qz = sum(q) -            log_z += count * log(qz) - -            for t in range(num_tags): -                tagCounts[p][t] += count * q[t] / qz - -            for i in range(4): -                for t in range(num_tags): -                    contextWordCounts[i][t][types[context[i]]] += count * q[t] / qz - -    print 'iteration', iteration, 'llh', llh, 'logz', log_z - -    # M-step -    for p in range(num_phrases): -        tagDist[p] = normalise(tagCounts[p]) -    for i in range(4): -        for t in range(num_tags): -            contextWordDist[i][t] = normalise(contextWordCounts[i][t]) - -for p, (phrase, ccs) in enumerate(edges_phrase_to_context): -    for context, count in ccs: -        conditionals = zeros(num_tags) -        for t in range(num_tags): -            prob = tagDist[p][t] -            for i in range(4): -                prob *= contextWordDist[i][t][types[context[i]]] -            conditionals[t] = prob -        cz = sum(conditionals) -        conditionals /= cz - -        print '%s\t%s ||| C=%d |||' % (phrase, context, argmax(conditionals)), conditionals diff --git a/gi/posterior-regularisation/train_pr_parallel.py b/gi/posterior-regularisation/train_pr_parallel.py deleted file mode 100644 index 3b9cefed..00000000 --- a/gi/posterior-regularisation/train_pr_parallel.py +++ /dev/null @@ -1,333 +0,0 @@ -import sys -import scipy.optimize -from numpy import * -from numpy.random import random, seed - -# -# Step 1: load the concordance counts -#  - -edges_phrase_to_context = [] -edges_context_to_phrase = [] -types = {} -context_types = {} -num_edges = 0 - -for line in sys.stdin: -    phrase, rest = line.strip().split('\t') -    parts = rest.split('|||') -    edges_phrase_to_context.append((phrase, [])) -    for i in range(0, len(parts), 2): -        context, count = parts[i:i+2] - -        ctx = tuple(filter(lambda x: x != '<PHRASE>', context.split())) -        cnt = int(count.strip()[2:]) -        edges_phrase_to_context[-1][1].append((ctx, cnt)) - -        cid = context_types.get(ctx, len(context_types)) -        if cid == len(context_types): -            context_types[ctx] = cid -            edges_context_to_phrase.append((ctx, [])) -        edges_context_to_phrase[cid][1].append((phrase, cnt)) - -        for token in ctx: -            types.setdefault(token, len(types)) -        for token in phrase.split(): -            types.setdefault(token, len(types)) - -        num_edges += 1 - -# -# Step 2: initialise the model parameters -# - -num_tags = 25 -num_types = len(types) -num_phrases = len(edges_phrase_to_context) -num_contexts = len(edges_context_to_phrase) -delta = float(sys.argv[1]) -assert sys.argv[2] in ('local', 'global') -local = sys.argv[2] == 'local' -if len(sys.argv) >= 2: -     seed(int(sys.argv[3])) - -print 'Read in', num_edges, 'edges', num_phrases, 'phrases', num_contexts, 'contexts and', len(types), 'word types' - -def normalise(a): -    return a / float(sum(a)) - -# Pr(tag | phrase) -tagDist = [normalise(random(num_tags)+1) for p in range(num_phrases)] -# Pr(context at pos i = w | tag) indexed by i, tag, word -contextWordDist = [[normalise(random(num_types)+1) for t in range(num_tags)] for i in range(4)] - -# -# Step 3: expectation maximisation -# - -class GlobalDualObjective: -    """ -    Objective, log(z), for all phrases s.t. lambda >= 0, sum_c lambda_pct <= scale  -    """ - -    def __init__(self, scale): -        self.scale = scale -        self.posterior = zeros((num_edges, num_tags)) -        self.q = zeros((num_edges, num_tags)) -        self.llh = 0 - -        index = 0 -        for j, (phrase, edges) in enumerate(edges_phrase_to_context): -            for context, count in edges: -                for t in range(num_tags): -                    prob = tagDist[j][t] -                    for k, token in enumerate(context): -                        prob *= contextWordDist[k][t][types[token]] -                    self.posterior[index,t] = prob -                z = sum(self.posterior[index,:]) -                self.posterior[index,:] /= z -                self.llh += log(z) * count -                index += 1 - -    def objective(self, ls): -        ls = ls.reshape((num_edges, num_tags)) -        logz = 0 - -        index = 0 -        for j, (phrase, edges) in enumerate(edges_phrase_to_context): -            for context, count in edges: -                for t in range(num_tags): -                    self.q[index,t] = self.posterior[index,t] * exp(-ls[index,t]) -                local_z = sum(self.q[index,:]) -                self.q[index,:] /= local_z -                logz += log(local_z) * count -                index += 1 - -        return logz - -    # FIXME: recomputes q many more times than necessary - -    def gradient(self, ls): -        ls = ls.reshape((num_edges, num_tags)) -        gradient = zeros((num_edges, num_tags)) - -        index = 0 -        for j, (phrase, edges) in enumerate(edges_phrase_to_context): -            for context, count in edges: -                for t in range(num_tags): -                    self.q[index,t] = self.posterior[index,t] * exp(-ls[index,t]) -                local_z = sum(self.q[index,:]) -                self.q[index,:] /= local_z -                for t in range(num_tags): -                    gradient[index,t] -= self.q[index,t] * count -                index += 1 - -        return gradient.ravel() - -    def constraints(self, ls): -        ls = ls.reshape((num_edges, num_tags)) -        cons = ones((num_phrases, num_tags)) * self.scale -        index = 0 -        for j, (phrase, edges) in enumerate(edges_phrase_to_context): -            for i, (context, count) in enumerate(edges): -                for t in range(num_tags): -                    cons[j,t] -= ls[index,t] * count -                index += 1 -        return cons.ravel() - -    def constraints_gradient(self, ls): -        ls = ls.reshape((num_edges, num_tags)) -        gradient = zeros((num_phrases, num_tags, num_edges, num_tags)) -        index = 0 -        for j, (phrase, edges) in enumerate(edges_phrase_to_context): -            for i, (context, count) in enumerate(edges): -                for t in range(num_tags): -                    gradient[j,t,index,t] -= count -                index += 1 -        return gradient.reshape((num_phrases*num_tags, num_edges*num_tags)) - -    def optimize(self): -        ls = zeros(num_edges * num_tags) -        #print '\tpre lambda optimisation dual', self.objective(ls) #, 'primal', primal(lamba) -        ls = scipy.optimize.fmin_slsqp(self.objective, ls, -                                bounds=[(0, self.scale)] * num_edges * num_tags, -                                f_ieqcons=self.constraints, -                                fprime=self.gradient, -                                fprime_ieqcons=self.constraints_gradient, -                                iprint=0) # =2 for verbose -        #print '\tpost lambda optimisation dual', self.objective(ls) #, 'primal', primal(lamba) - -        # returns llh, kl and l1lmax contribution -        l1lmax = 0 -        index = 0 -        for j, (phrase, edges) in enumerate(edges_phrase_to_context): -            for t in range(num_tags): -                lmax = None -                for i, (context, count) in enumerate(edges): -                    lmax = max(lmax, self.q[index+i,t]) -                l1lmax += lmax -            index += len(edges) - -        return self.llh, -self.objective(ls) + dot(ls, self.gradient(ls)), l1lmax - -class LocalDualObjective: -    """ -    Local part of objective, log(z) relevant to lambda_p**. -    Optimised subject to lambda >= 0, sum_c lambda_pct <= scale forall t  -    """ - -    def __init__(self, phraseId, scale): -        self.phraseId = phraseId -        self.scale = scale -        edges = edges_phrase_to_context[self.phraseId][1] -        self.posterior = zeros((len(edges), num_tags)) -        self.q = zeros((len(edges), num_tags)) -        self.llh = 0 - -        for i, (context, count) in enumerate(edges): -            for t in range(num_tags): -                prob = tagDist[phraseId][t] -                for j, token in enumerate(context): -                    prob *= contextWordDist[j][t][types[token]] -                self.posterior[i,t] = prob -            z = sum(self.posterior[i,:]) -            self.posterior[i,:] /= z -            self.llh += log(z) * count - -    def objective(self, ls): -        edges = edges_phrase_to_context[self.phraseId][1] -        ls = ls.reshape((len(edges), num_tags)) -        logz = 0 - -        for i, (context, count) in enumerate(edges): -            for t in range(num_tags): -                self.q[i,t] = self.posterior[i,t] * exp(-ls[i,t]) -            local_z = sum(self.q[i,:]) -            self.q[i,:] /= local_z -            logz += log(local_z) * count - -        return logz - -    # FIXME: recomputes q many more times than necessary - -    def gradient(self, ls): -        edges = edges_phrase_to_context[self.phraseId][1] -        ls = ls.reshape((len(edges), num_tags)) -        gradient = zeros((len(edges), num_tags)) - -        for i, (context, count) in enumerate(edges): -            for t in range(num_tags): -                self.q[i,t] = self.posterior[i,t] * exp(-ls[i,t]) -            local_z = sum(self.q[i,:]) -            self.q[i,:] /= local_z -            for t in range(num_tags): -                gradient[i,t] -= self.q[i,t] * count - -        return gradient.ravel() - -    def constraints(self, ls): -        edges = edges_phrase_to_context[self.phraseId][1] -        ls = ls.reshape((len(edges), num_tags)) -        cons = ones(num_tags) * self.scale -        for t in range(num_tags): -            for i, (context, count) in enumerate(edges): -                cons[t] -= ls[i,t] * count -        return cons - -    def constraints_gradient(self, ls): -        edges = edges_phrase_to_context[self.phraseId][1] -        ls = ls.reshape((len(edges), num_tags)) -        gradient = zeros((num_tags, len(edges), num_tags)) -        for t in range(num_tags): -            for i, (context, count) in enumerate(edges): -                gradient[t,i,t] -= count -        return gradient.reshape((num_tags, len(edges)*num_tags)) - -    def optimize(self, ls=None): -        edges = edges_phrase_to_context[self.phraseId][1] -        if ls == None: -            ls = zeros(len(edges) * num_tags) -        #print '\tpre lambda optimisation dual', self.objective(ls) #, 'primal', primal(lamba) -        ls = scipy.optimize.fmin_slsqp(self.objective, ls, -                                bounds=[(0, self.scale)] * len(edges) * num_tags, -                                f_ieqcons=self.constraints, -                                fprime=self.gradient, -                                fprime_ieqcons=self.constraints_gradient, -                                iprint=0) # =2 for verbose -        #print '\tlambda', list(ls) -        #print '\tpost lambda optimisation dual', self.objective(ls) #, 'primal', primal(lamba) - -        # returns llh, kl and l1lmax contribution -        l1lmax = 0 -        for t in range(num_tags): -            lmax = None -            for i, (context, count) in enumerate(edges): -                lmax = max(lmax, self.q[i,t]) -            l1lmax += lmax - -        return self.llh, -self.objective(ls) + dot(ls, self.gradient(ls)), l1lmax, ls - -ls = [None] * num_phrases -for iteration in range(20): -    tagCounts = [zeros(num_tags) for p in range(num_phrases)] -    contextWordCounts = [[zeros(num_types) for t in range(num_tags)] for i in range(4)] - -    # E-step -    llh = kl = l1lmax = 0 -    if local: -        for p in range(num_phrases): -            o = LocalDualObjective(p, delta) -            #print '\toptimising lambda for phrase', p, '=', edges_phrase_to_context[p][0] -            #print '\toptimising lambda for phrase', p, 'ls', ls[p] -            obj = o.optimize(ls[p]) -            #print '\tphrase', p, 'deltas', obj -            llh += obj[0] -            kl += obj[1] -            l1lmax += obj[2] -            ls[p] = obj[3] - -            edges = edges_phrase_to_context[p][1] -            for j, (context, count) in enumerate(edges): -                for t in range(num_tags): -                    tagCounts[p][t] += count * o.q[j,t] -                for i in range(4): -                    for t in range(num_tags): -                        contextWordCounts[i][t][types[context[i]]] += count * o.q[j,t] - -        #print 'iteration', iteration, 'LOCAL objective', (llh + kl + delta * l1lmax), 'llh', llh, 'kl', kl, 'l1lmax', l1lmax -    else: -        o = GlobalDualObjective(delta) -        obj = o.optimize() -        llh, kl, l1lmax = o.optimize() - -        index = 0 -        for p, (phrase, edges) in enumerate(edges_phrase_to_context): -            for context, count in edges: -                for t in range(num_tags): -                    tagCounts[p][t] += count * o.q[index,t] -                for i in range(4): -                    for t in range(num_tags): -                        contextWordCounts[i][t][types[context[i]]] += count * o.q[index,t] -                index += 1 - -    print 'iteration', iteration, 'objective', (llh - kl - delta * l1lmax), 'llh', llh, 'kl', kl, 'l1lmax', l1lmax - -    # M-step -    for p in range(num_phrases): -        tagDist[p] = normalise(tagCounts[p]) -    for i in range(4): -        for t in range(num_tags): -            contextWordDist[i][t] = normalise(contextWordCounts[i][t]) - -for p, (phrase, ccs) in enumerate(edges_phrase_to_context): -    for context, count in ccs: -        conditionals = zeros(num_tags) -        for t in range(num_tags): -            prob = tagDist[p][t] -            for i in range(4): -                prob *= contextWordDist[i][t][types[context[i]]] -            conditionals[t] = prob -        cz = sum(conditionals) -        conditionals /= cz - -        print '%s\t%s ||| C=%d |||' % (phrase, context, argmax(conditionals)), conditionals diff --git a/gi/pyp-topics/scripts/contexts2documents.py b/gi/pyp-topics/scripts/contexts2documents.py deleted file mode 100755 index 9be4ebbb..00000000 --- a/gi/pyp-topics/scripts/contexts2documents.py +++ /dev/null @@ -1,37 +0,0 @@ -#!/usr/bin/python - -import sys -from operator import itemgetter - -if len(sys.argv) > 3: -  print "Usage: contexts2documents.py [contexts_index_out] [phrases_index_out]" -  exit(1) - -context_index = {}  -phrase_index = {} -for line in sys.stdin: -  phrase, line_tail = line.split('\t') - -  raw_contexts = line_tail.split('|||') -  contexts = [c.strip() for x,c in enumerate(raw_contexts) if x%2 == 0] -  counts   = [int(c.split('=')[1].strip()) for x,c in enumerate(raw_contexts) if x%2 != 0] -  phrase_index.setdefault(phrase, len(phrase_index)) -  print len(contexts), -  for context,count in zip(contexts,counts):  -    c = context_index.setdefault(context, len(context_index)) -    print "%d:%d" % (c,count), -  print -if 1 < len(sys.argv) < 4: -  contexts_out = open(sys.argv[1],'w') -  contexts = context_index.items() -  contexts.sort(key = itemgetter(1)) -  for context in contexts:  -    print >>contexts_out, context[0] -  contexts_out.close() -if len(sys.argv) == 3: -  phrases_out = open(sys.argv[2],'w') -  phrases = phrase_index.items() -  phrases.sort(key = itemgetter(1)) -  for phrase in phrases:  -    print >>phrases_out, phrase[0] -  phrases_out.close() diff --git a/gi/pyp-topics/scripts/extract_contexts.py b/gi/pyp-topics/scripts/extract_contexts.py deleted file mode 100755 index b2723f2a..00000000 --- a/gi/pyp-topics/scripts/extract_contexts.py +++ /dev/null @@ -1,144 +0,0 @@ -#!/usr/bin/python - -import sys,collections - -def extract_backoff(context_list, order): -  assert len(context_list) == (2*order) -  backoffs = [] -  for i in range(1,order+1): -    if i == order: -      backoffs.append(([context_list[i-1]+"|"], ["|"+context_list[i]])) -    else: -      right_limit = 2*order-i -      core = context_list[i:right_limit] -      left = [context_list[i-1]+"|"*(order-i+1)] -      right = ["|"*(order-i+1)+context_list[right_limit]] -      backoffs.append((core, left, right)) -# print context_list, backoffs -  return backoffs - -def tuple_to_str(t): -  s="" -  for i,x in enumerate(t): -    if i > 0: s += "|" -    s += str(x) -  return s - -if len(sys.argv) < 3: -  print "Usage: extract-contexts.py output_filename order cutoff lowercase" -  exit(1) - -output_filename = sys.argv[1] -order = int(sys.argv[2]) -cutoff = 0 -if len(sys.argv) > 3: -  cutoff = int(sys.argv[3]) -lowercase = False -if len(sys.argv) > 4: -  lowercase = bool(sys.argv[4]) - -contexts_dict={} -contexts_list=[] -contexts_freq=collections.defaultdict(int) -contexts_backoff={} - -token_dict={} -token_list=[] -documents_dict=collections.defaultdict(dict) - -contexts_at_order = [i for i in range(order+1)] - -prefix = ["<s%d>|<s>"%i for i in range(order)] -suffix = ["</s%d>|</s>"%i for i in range(order)] - -for line in sys.stdin: -  tokens = list(prefix) -  tokens.extend(line.split()) -  tokens.extend(suffix) -  if lowercase: -    tokens = map(lambda x: x.lower(), tokens) - -  for i in range(order, len(tokens)-order): -    context_list = [] -    term="" -    for j in range(i-order, i+order+1): -      token,tag = tokens[j].rsplit('|',2) -      if j != i: -        context_list.append(token) -      else: -        if token not in token_dict:  -          token_dict[token] = len(token_dict) -          token_list.append(token) -        term = token_dict[token]  - -    context = tuple_to_str(tuple(context_list)) - -    if context not in contexts_dict:  -      context_index = len(contexts_dict) -      contexts_dict[context] = context_index -      contexts_list.append(context) -      contexts_at_order[0] += 1 - -      # handle backoff -      backoff_contexts = extract_backoff(context_list, order) -      bo_indexes=[(context_index,)] -#     bo_indexes=[(context,)] -      for i,bo in enumerate(backoff_contexts): -        factor_indexes=[] -        for factor in bo: -          bo_tuple = tuple_to_str(tuple(factor)) -          if bo_tuple not in contexts_dict: -            contexts_dict[bo_tuple] = len(contexts_dict) -            contexts_list.append(bo_tuple) -            contexts_at_order[i+1] += 1 -#         factor_indexes.append(bo_tuple) -          factor_indexes.append(contexts_dict[bo_tuple]) -        bo_indexes.append(tuple(factor_indexes)) -       -      for i in range(len(bo_indexes)-1): -        contexts_backoff[bo_indexes[i][0]] = bo_indexes[i+1] - -    context_index = contexts_dict[context] -    contexts_freq[context_index] += 1 - -    if context_index not in documents_dict[term]: -      documents_dict[term][context_index] = 1 -    else: -      documents_dict[term][context_index] += 1 - -term_file = open(output_filename+".terms",'w') -for t in token_list: print >>term_file, t -term_file.close() - -contexts_file = open(output_filename+".contexts",'w') -for c in contexts_list:  -  print >>contexts_file, c -contexts_file.close() - -data_file = open(output_filename+".data",'w') -for t in range(len(token_list)):  -  line="" -  num_active=0 -  for c in documents_dict[t]: -    count = documents_dict[t][c] -    if contexts_freq[c] >= cutoff: -      line += (' ' + str(c) + ':' + str(count)) -      num_active += 1 -  if num_active > 0: -    print >>data_file, "%d%s" % (num_active,line) -data_file.close() - -contexts_backoff_file = open(output_filename+".contexts_backoff",'w') -print >>contexts_backoff_file, len(contexts_list), order, -#for x in contexts_at_order:  -#  print >>contexts_backoff_file, x, -#print >>contexts_backoff_file -for x in range(order-1): -  print >>contexts_backoff_file, 3, -print >>contexts_backoff_file, 2 - -for x in contexts_backoff:  -  print >>contexts_backoff_file, x,  -  for y in contexts_backoff[x]: print >>contexts_backoff_file, y, -  print >>contexts_backoff_file  -contexts_backoff_file.close() diff --git a/gi/pyp-topics/scripts/extract_contexts_test.py b/gi/pyp-topics/scripts/extract_contexts_test.py deleted file mode 100755 index 693b6e0b..00000000 --- a/gi/pyp-topics/scripts/extract_contexts_test.py +++ /dev/null @@ -1,72 +0,0 @@ -#!/usr/bin/python - -import sys,collections - -def tuple_to_str(t): -  s="" -  for i,x in enumerate(t): -    if i > 0: s += "|" -    s += str(x) -  return s - -if len(sys.argv) < 5: -  print "Usage: extract-contexts_test.py output_filename vocab contexts order lowercase" -  exit(1) - -output_filename = sys.argv[1] -output = open(output_filename+".test_data",'w') - -unk_term="-UNK-" -vocab_dict={} -for i,x in enumerate(file(sys.argv[2], 'r').readlines()):  -  vocab_dict[x.strip()]=i - -contexts_dict={} -contexts_list=[] -for i,x in enumerate(file(sys.argv[3], 'r').readlines()):  -  contexts_dict[x.strip()]=i -  contexts_list.append(x.strip()) - -order = int(sys.argv[4]) - -lowercase = False -if len(sys.argv) > 5: -  lowercase = bool(sys.argv[5]) -if lowercase: unk_term = unk_term.lower() - -prefix = ["<s%d>|<s>"%i for i in range(order)] -suffix = ["</s%d>|</s>"%i for i in range(order)] - -assert unk_term in vocab_dict -for line in sys.stdin: -  tokens = list(prefix) -  tokens.extend(line.split()) -  tokens.extend(suffix) -  if lowercase: -    tokens = map(lambda x: x.lower(), tokens) - -  for i in range(order, len(tokens)-order): -    context_list=[] -    term="" -    for j in range(i-order, i+order+1): -      token,tag = tokens[j].rsplit('|',2) -      if j != i: -        context_list.append(token) -      else: -        if token not in vocab_dict:  -          term = vocab_dict[unk_term]  -        else: -          term = vocab_dict[token]  -    context = tuple_to_str(context_list) -    if context not in contexts_dict:  -      contexts_dict[context] = len(contexts_dict) -      contexts_list.append(context) -    context_index = contexts_dict[context] -    print >>output, "%d:%d" % (term,context_index), -  print >>output -output.close() - -contexts_file = open(output_filename+".test_contexts",'w') -for c in contexts_list:  -  print >>contexts_file, c -contexts_file.close() diff --git a/gi/pyp-topics/scripts/extract_leaves.py b/gi/pyp-topics/scripts/extract_leaves.py deleted file mode 100755 index 14783b36..00000000 --- a/gi/pyp-topics/scripts/extract_leaves.py +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/python - -import nltk -import nltk.probability -import sys -import getopt  - -lexicalise=False -rm_traces=False -cutoff=100 -length_cutoff=10000 -try:                                 -  opts, args = getopt.getopt(sys.argv[1:], "hs:c:l", ["help", "lexicalise", "cutoff","sentence-length","remove-traces"]) -except getopt.GetoptError:           -  print "Usage: extract_leaves.py [-lsc]"                         -  sys.exit(2)                      -for opt, arg in opts:                 -  if opt in ("-h", "--help"):       -    print "Usage: extract_leaves.py [-lsc]"                         -    sys.exit()                   -  elif opt in ("-l", "--lexicalise"):                 -    lexicalise = True                  -  elif opt in ("-c", "--cutoff"):                 -    cutoff = int(arg)  -  elif opt in ("-s", "--sentence-length"):                 -    length_cutoff = int(arg)  -  elif opt in ("--remove-traces"):                 -    rm_traces = True                  - -token_freq = nltk.probability.FreqDist() -lines = [] -for line in sys.stdin: -  t = nltk.Tree.parse(line) -  pos = t.pos() -  if len(pos) <= length_cutoff: -    lines.append(pos) -    for token, tag in pos: -      token_freq.inc(token)   - -for line in lines: -  for token,tag in line: -    if not (rm_traces and tag == "-NONE-"): -      if lexicalise: -        if token_freq[token] < cutoff: -          token = '-UNK-' -        print '%s|%s' % (token,tag), -      else: -        print '%s' % tag, -  print diff --git a/gi/pyp-topics/scripts/map-documents.py b/gi/pyp-topics/scripts/map-documents.py deleted file mode 100755 index 703de312..00000000 --- a/gi/pyp-topics/scripts/map-documents.py +++ /dev/null @@ -1,20 +0,0 @@ -#!/usr/bin/python - -import sys - -if len(sys.argv) != 2: -  print "Usage: map-documents.py vocab-file" -  exit(1) - -vocab = file(sys.argv[1], 'r').readlines() -term_dict = map(lambda x: x.strip(), vocab) - -for line in sys.stdin: -  tokens = line.split() -  for token in tokens: -    elements = token.split(':') -    if len(elements) == 1: -      print "%s" % (term_dict[int(elements[0])]), -    else: -      print "%s:%s" % (term_dict[int(elements[0])], elements[1]), -  print diff --git a/gi/pyp-topics/scripts/map-terms.py b/gi/pyp-topics/scripts/map-terms.py deleted file mode 100755 index eb0298d7..00000000 --- a/gi/pyp-topics/scripts/map-terms.py +++ /dev/null @@ -1,20 +0,0 @@ -#!/usr/bin/python - -import sys - -if len(sys.argv) != 2: -  print "Usage: map-terms.py vocab-file" -  exit(1) - -vocab = file(sys.argv[1], 'r').readlines() -term_dict = map(lambda x: x.strip().replace(' ','_'), vocab) - -for line in sys.stdin: -  tokens = line.split() -  for token in tokens: -    elements = token.split(':') -    if len(elements) == 1: -      print "%s" % (term_dict[int(elements[0])]), -    else: -      print "%s:%s" % (term_dict[int(elements[0])], elements[1]), -  print diff --git a/gi/pyp-topics/scripts/run.sh b/gi/pyp-topics/scripts/run.sh deleted file mode 100644 index 19e625b1..00000000 --- a/gi/pyp-topics/scripts/run.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/sh - - -./simple-extract-context.sh ~/workspace/clsp2010/jhuws2010/data/btec/split.zh-en.al 1 | ~/workspace/pyp-topics/scripts/contexts2documents.py > split.zh-en.data - -~/workspace/pyp-topics/bin/pyp-topics-train -d split.zh-en.data -t 50 -s 100 -o split.zh-en.documents.gz -w split.zh-en.topics.gz -gunzip split.zh-en.documents.gz - -~/workspace/cdec/extools/extractor -i ../jhuws2010/data/btec/split.zh-en.al -S 1 -c 500000 -L 12 --base_phrase_spans | ~/workspace/pyp-topics/scripts/spans2labels.py split.zh-en.phrases split.zh-en.contexts split.zh-en.documents > corpus.zh-en.labelled_spans - -paste -d " " ~/workspace/clsp2010/jhuws2010/data/btec/split.zh-en.al corpus.labelled_spans > split.zh-en.labelled_spans - -./simple-extract.sh ~/workspace/clsp2010/scratch/split.zh-en.labelled_spans diff --git a/gi/pyp-topics/scripts/score-mkcls.py b/gi/pyp-topics/scripts/score-mkcls.py deleted file mode 100755 index 6bd33fc5..00000000 --- a/gi/pyp-topics/scripts/score-mkcls.py +++ /dev/null @@ -1,61 +0,0 @@ -#!/usr/bin/python - -import sys -from collections import defaultdict - -def dict_max(d): -  max_val=-1 -  max_key=None -  for k in d: -    if d[k] > max_val:  -      max_val = d[k] -      max_key = k -  assert max_key -  return max_key - -if len(sys.argv) != 3: -  print "Usage: score-mkcls.py gold classes" -  exit(1) - -gold_file=open(sys.argv[1],'r') - -term_to_topics = {} -for line in open(sys.argv[2],'r'): -  term,cls = line.split() -  term_to_topics[term] = cls - -gold_to_topics = defaultdict(dict) -topics_to_gold = defaultdict(dict) - -for gold_line in gold_file: -  gold_tokens = gold_line.split() -  for gold_token in gold_tokens: -    gold_term,gold_tag = gold_token.rsplit('|',1) -    pred_token = term_to_topics[gold_term] -    gold_to_topics[gold_tag][pred_token] \ -      = gold_to_topics[gold_tag].get(pred_token, 0) + 1 -    topics_to_gold[pred_token][gold_tag] \ -      = topics_to_gold[pred_token].get(gold_tag, 0) + 1 - -pred=0 -correct=0 -gold_file=open(sys.argv[1],'r') -for gold_line in gold_file: -  gold_tokens = gold_line.split() - -  for gold_token in gold_tokens: -    gold_term,gold_tag = gold_token.rsplit('|',1) -    pred_token = term_to_topics[gold_term] -    print "%s|%s|%s" % (gold_token, pred_token, dict_max(topics_to_gold[pred_token])), -    pred += 1 -    if gold_tag == dict_max(topics_to_gold[pred_token]): -      correct += 1 -  print -print >>sys.stderr, "Many-to-One Accuracy = %f" % (float(correct) / pred) -#for x in gold_to_topics:  -#  print x,dict_max(gold_to_topics[x]) -#print "###################################################" -#for x in range(len(topics_to_gold)):  -#  print x,dict_max(topics_to_gold[str(x)]) -#  print x,topics_to_gold[str(x)] -#print term_to_topics diff --git a/gi/pyp-topics/scripts/score-topics.py b/gi/pyp-topics/scripts/score-topics.py deleted file mode 100755 index 1d8a1fcd..00000000 --- a/gi/pyp-topics/scripts/score-topics.py +++ /dev/null @@ -1,64 +0,0 @@ -#!/usr/bin/python - -import sys -from collections import defaultdict - -def dict_max(d): -  max_val=-1 -  max_key=None -  for k in d: -    if d[k] > max_val:  -      max_val = d[k] -      max_key = k -  assert max_key -  return max_key - -if len(sys.argv) != 3: -  print "Usage: score-topics.py gold pred" -  exit(1) - -gold_file=open(sys.argv[1],'r') -pred_file=open(sys.argv[2],'r') - -gold_to_topics = defaultdict(dict) -topics_to_gold = defaultdict(dict) -term_to_topics = defaultdict(dict) - -for gold_line,pred_line in zip(gold_file,pred_file): -  gold_tokens = gold_line.split() -  pred_tokens = pred_line.split() -  assert len(gold_tokens) == len(pred_tokens) - -  for gold_token,pred_token in zip(gold_tokens,pred_tokens): -    gold_term,gold_tag = gold_token.rsplit('|',1) -    gold_to_topics[gold_tag][pred_token] \ -      = gold_to_topics[gold_tag].get(pred_token, 0) + 1 -    term_to_topics[gold_term][pred_token] \ -      = term_to_topics[gold_term].get(pred_token, 0) + 1 -    topics_to_gold[pred_token][gold_tag] \ -      = topics_to_gold[pred_token].get(gold_tag, 0) + 1 - -pred=0 -correct=0 -gold_file=open(sys.argv[1],'r') -pred_file=open(sys.argv[2],'r') -for gold_line,pred_line in zip(gold_file,pred_file): -  gold_tokens = gold_line.split() -  pred_tokens = pred_line.split() - -  for gold_token,pred_token in zip(gold_tokens,pred_tokens): -    gold_term,gold_tag = gold_token.rsplit('|',1) -#   print "%s|%s" % (gold_token, dict_max(gold_to_topics[gold_tag])), -    print "%s|%s|%s" % (gold_token, pred_token, dict_max(topics_to_gold[pred_token])), -    pred += 1 -    if gold_tag == dict_max(topics_to_gold[pred_token]): -      correct += 1 -  print -print >>sys.stderr, "Many-to-One Accuracy = %f" % (float(correct) / pred) -#for x in gold_to_topics:  -#  print x,dict_max(gold_to_topics[x]) -#print "###################################################" -#for x in range(len(topics_to_gold)):  -#  print x,dict_max(topics_to_gold[str(x)]) -#  print x,topics_to_gold[str(x)] -#print term_to_topics diff --git a/gi/pyp-topics/scripts/spans2labels.py b/gi/pyp-topics/scripts/spans2labels.py deleted file mode 100755 index 50fa8106..00000000 --- a/gi/pyp-topics/scripts/spans2labels.py +++ /dev/null @@ -1,137 +0,0 @@ -#!/usr/bin/python - -import sys -from operator import itemgetter - -if len(sys.argv) <= 2: -  print "Usage: spans2labels.py phrase_context_index [order] [threshold] [languages={s,t,b}{s,t,b}] [type={tag,tok,both},{tag,tok,both}]" -  exit(1) - -order=1 -threshold = 0 -cutoff_cat = "<UNK>" -if len(sys.argv) > 2: -  order = int(sys.argv[2]) -if len(sys.argv) > 3: -  threshold = float(sys.argv[3]) -phr=ctx='t' -if len(sys.argv) > 4: -  phr, ctx = sys.argv[4] -  assert phr in 'stb' -  assert ctx in 'stb' -phr_typ = ctx_typ = 'both' -if len(sys.argv) > 5: -  phr_typ, ctx_typ = sys.argv[5].split(',') -  assert phr_typ in ('tag', 'tok', 'both') -  assert ctx_typ in ('tag', 'tok', 'both') - -#print >>sys.stderr, "Loading phrase index" -phrase_context_index = {} -for line in file(sys.argv[1], 'r'): -  phrase,tail= line.split('\t') -  contexts = tail.split(" ||| ") -  try: # remove Phil's bizarre integer pair -       x,y = contexts[0].split() -       x=int(x); y=int(y) -       contexts = contexts[1:] -  except: -       pass -  if len(contexts) == 1: continue -  assert len(contexts) % 2 == 0 -  for i in range(0, len(contexts), 2): -    #parse contexts[i+1] = " C=1 P=0.8 ... " -    features=dict([ keyval.split('=') for keyval in contexts[i+1].split()]) -    category = features['C']     -    if features.has_key('P') and float(features['P']) < threshold: -	category = cutoff_cat -     -    phrase_context_index[(phrase,contexts[i])] = category  -    #print (phrase,contexts[i]), category - -#print >>sys.stderr, "Labelling spans" -for line in sys.stdin: -  #print >>sys.stderr, "line", line.strip() -  line_segments = line.split(' ||| ') -  assert len(line_segments) >= 3 -  source = ['<s>' for x in range(order)] + line_segments[0].split() + ['</s>' for x in range(order)] -  target = ['<s>' for x in range(order)] + line_segments[1].split() + ['</s>' for x in range(order)] -  phrases = [ [int(i) for i in x.split('-')] for x in line_segments[2].split()] - -  if phr_typ != 'both' or ctx_typ != 'both': -    if phr in 'tb' or ctx in 'tb': -        target_toks = ['<s>' for x in range(order)] + map(lambda x: x.rsplit('_', 1)[0], line_segments[1].split()) + ['</s>' for x in range(order)] -        target_tags = ['<s>' for x in range(order)] + map(lambda x: x.rsplit('_', 1)[-1], line_segments[1].split()) + ['</s>' for x in range(order)] - -        if phr in 'tb': -            if phr_typ == 'tok': -                targetP = target_toks -            elif phr_typ == 'tag': -                targetP = target_tags -        if ctx in 'tb': -            if ctx_typ == 'tok': -                targetC = target_toks -            elif ctx_typ == 'tag': -                targetC = target_tags - -    if phr in 'sb' or ctx in 'sb': -        source_toks = ['<s>' for x in range(order)] + map(lambda x: x.rsplit('_', 1)[0], line_segments[0].split()) + ['</s>' for x in range(order)] -        source_tags = ['<s>' for x in range(order)] + map(lambda x: x.rsplit('_', 1)[-1], line_segments[0].split()) + ['</s>' for x in range(order)] - -        if phr in 'sb': -            if phr_typ == 'tok': -                sourceP = source_toks -            elif phr_typ == 'tag': -                sourceP = source_tags -        if ctx in 'sb': -            if ctx_typ == 'tok': -                sourceC = source_toks -            elif ctx_typ == 'tag': -                sourceC = source_tags -  else: -    sourceP = sourceC = source -    targetP = targetC = target - -  #print >>sys.stderr, "line", source, '---', target, 'phrases', phrases - -  print "|||", - -  for s1,s2,t1,t2 in phrases: -    s1 += order -    s2 += order -    t1 += order -    t2 += order - -    phraset = phrases = contextt = contexts = '' -    if phr in 'tb': -        phraset = reduce(lambda x, y: x+y+" ", targetP[t1:t2], "").strip() -    if phr in 'sb': -        phrases = reduce(lambda x, y: x+y+" ", sourceP[s1:s2], "").strip() - -    if ctx in 'tb': -        left_context = reduce(lambda x, y: x+y+" ", targetC[t1-order:t1], "") -        right_context = reduce(lambda x, y: x+y+" ", targetC[t2:t2+order], "").strip() -        contextt = "%s<PHRASE> %s" % (left_context, right_context) -    if ctx in 'sb': -        left_context = reduce(lambda x, y: x+y+" ", sourceC[s1-order:s1], "") -        right_context = reduce(lambda x, y: x+y+" ", sourceC[s2:s2+order], "").strip() -        contexts = "%s<PHRASE> %s" % (left_context, right_context) - -    if phr == 'b': -        phrase = phraset + ' <SPLIT> ' + phrases -    elif phr == 's': -        phrase = phrases -    else: -        phrase = phraset - -    if ctx == 'b': -        context = contextt + ' <SPLIT> ' + contexts -    elif ctx == 's': -        context = contexts -    else: -        context = contextt - -    #print "%d-%d-%d-%d looking up" % (s1-order,s2-order,t1-order,t2-order), (phrase, context) -    label = phrase_context_index.get((phrase,context), cutoff_cat) -    if label != cutoff_cat: #cutoff'd spans are left unlabelled -      print "%d-%d-%d-%d:X%s" % (s1-order,s2-order,t1-order,t2-order,label), -  print diff --git a/gi/pyp-topics/scripts/tokens2classes.py b/gi/pyp-topics/scripts/tokens2classes.py deleted file mode 100755 index 33df255f..00000000 --- a/gi/pyp-topics/scripts/tokens2classes.py +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/python - -import sys - -if len(sys.argv) != 3: -  print "Usage: tokens2classes.py source_classes target_classes" -  exit(1) - -source_to_topics = {} -for line in open(sys.argv[1],'r'): -  term,cls = line.split() -  source_to_topics[term] = cls - -target_to_topics = {} -for line in open(sys.argv[2],'r'): -  term,cls = line.split() -  target_to_topics[term] = cls - -for line in sys.stdin: -  source, target, tail = line.split(" ||| ") - -  for token in source.split(): -    print source_to_topics[token], -  print "|||", -  for token in target.split(): -    print target_to_topics[token], -  print "|||", tail, diff --git a/gi/pyp-topics/scripts/topics.py b/gi/pyp-topics/scripts/topics.py deleted file mode 100755 index 0db1af71..00000000 --- a/gi/pyp-topics/scripts/topics.py +++ /dev/null @@ -1,20 +0,0 @@ -#!/usr/bin/python - -import sys - -if len(sys.argv) != 2: -  print "Usage: topics.py words-per-topic" -  exit(1) - -for t,line in enumerate(sys.stdin): -  tokens = line.split() -  terms = [] -  for token in tokens: -    elements = token.rsplit(':',1) -    terms.append((int(elements[1]),elements[0])) -  terms.sort() -  terms.reverse() - -  print "Topic %d:" % t -  map(lambda (x,y) : sys.stdout.write("   %s:%s\n" % (y,x)), terms[:int(sys.argv[1])]) -  print diff --git a/gi/pyp-topics/src/Makefile.am b/gi/pyp-topics/src/Makefile.am deleted file mode 100644 index d3f95d0b..00000000 --- a/gi/pyp-topics/src/Makefile.am +++ /dev/null @@ -1,16 +0,0 @@ -bin_PROGRAMS = pyp-topics-train pyp-contexts-train #mpi-pyp-contexts-train - -contexts_lexer.cc: contexts_lexer.l -	$(LEX) -s -CF -8 -o$@ $< - -pyp_topics_train_SOURCES = mt19937ar.c corpus.cc gzstream.cc pyp-topics.cc train.cc contexts_lexer.cc contexts_corpus.cc -pyp_topics_train_LDADD = $(top_srcdir)/utils/libutils.a -lz - -pyp_contexts_train_SOURCES = mt19937ar.c corpus.cc gzstream.cc pyp-topics.cc contexts_lexer.cc contexts_corpus.cc train-contexts.cc -pyp_contexts_train_LDADD = $(top_srcdir)/utils/libutils.a -lz - -#mpi_pyp_contexts_train_SOURCES = mt19937ar.c corpus.cc gzstream.cc mpi-pyp-topics.cc contexts_lexer.cc contexts_corpus.cc mpi-train-contexts.cc -#mpi_pyp_contexts_train_LDADD = $(top_srcdir)/utils/libutils.a -lz - -AM_CPPFLAGS = -W -Wall -Wno-sign-compare -funroll-loops -I../../../utils - diff --git a/gi/pyp-topics/src/Makefile.mpi b/gi/pyp-topics/src/Makefile.mpi deleted file mode 100644 index b7b8a290..00000000 --- a/gi/pyp-topics/src/Makefile.mpi +++ /dev/null @@ -1,26 +0,0 @@ -BLD_ARCH=$(shell uname -s) --include macros.${BLD_ARCH} - -local_objs = mt19937ar.o corpus.o gzstream.o mpi-pyp-topics.o contexts_lexer.o contexts_corpus.o mpi-train-contexts.o - -all: mpi-pyp-contexts-train - --include makefile.depend - -#-----------------------# -# Local stuff -#-----------------------# - -mpi-pyp-contexts-train: mpi-train-contexts.o $(local_objs) -	$(CXX) -o $@ $^ $(LDFLAGS) - -.PHONY: depend echo -depend: -#$(CXX) -MM $(CXXFLAGS) *.cc *.c | sed 's/^\(.*\.o:\)/obj\/\1/' > makefile.depend -	$(CXX) -MM $(CXXFLAGS) *.cc *.c > makefile.depend - -clean: -	rm -f *.o - -#clobber: clean -#	rm makefile.depend ../bin/${ARCH}/* diff --git a/gi/pyp-topics/src/clock_gettime_stub.c b/gi/pyp-topics/src/clock_gettime_stub.c deleted file mode 100644 index 4883b7c1..00000000 --- a/gi/pyp-topics/src/clock_gettime_stub.c +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Copyright (c), MM Weiss - * All rights reserved. - *  - * Redistribution and use in source and binary forms, with or without modification,  - * are permitted provided that the following conditions are met: - *  - *     1. Redistributions of source code must retain the above copyright notice,  - *     this list of conditions and the following disclaimer. - *      - *     2. Redistributions in binary form must reproduce the above copyright notice,  - *     this list of conditions and the following disclaimer in the documentation  - *     and/or other materials provided with the distribution. - *      - *     3. Neither the name of the MM Weiss nor the names of its contributors  - *     may be used to endorse or promote products derived from this software without  - *     specific prior written permission. - *  - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY  - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES  - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT  - * SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,  - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT  - * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)  - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR  - * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,  - * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* - *  clock_gettime_stub.c - *  gcc -Wall -c clock_gettime_stub.c - *  posix realtime functions; MacOS user space glue - */ -  -/*  @comment - *  other possible implementation using intel builtin rdtsc - *  rdtsc-workaround: http://www.mcs.anl.gov/~kazutomo/rdtsc.html - *   - *  we could get the ticks by doing this - *  - *  __asm __volatile("mov %%ebx, %%esi\n\t" - *  		"cpuid\n\t" - *  		"xchg %%esi, %%ebx\n\t" - *  		"rdtsc" - *  		: "=a" (a), - *  		  "=d" (d) - * 	); -  - *  we could even replace our tricky sched_yield call by assembly code to get a better accurency, - *  anyway the following C stub will satisfy 99% of apps using posix clock_gettime call,  - *  moreover, the setter version (clock_settime) could be easly written using mach primitives: - *  http://www.opensource.apple.com/source/xnu/xnu-${VERSION}/osfmk/man/ (clock_[set|get]_time) - *   - *  hackers don't be crackers, don't you use a flush toilet? - *  - * - *  @see draft: ./posix-realtime-stub/posix-realtime-stub.c - * - */ -  - -#ifdef __APPLE__ - -#pragma weak clock_gettime - -#include <sys/time.h> -#include <sys/resource.h> -#include <mach/mach.h> -#include <mach/clock.h> -#include <mach/mach_time.h> -#include <errno.h> -#include <unistd.h> -#include <sched.h> - -typedef enum { -	CLOCK_REALTIME, -	CLOCK_MONOTONIC, -	CLOCK_PROCESS_CPUTIME_ID, -	CLOCK_THREAD_CPUTIME_ID -} clockid_t; - -static mach_timebase_info_data_t __clock_gettime_inf; - -static int clock_gettime(clockid_t clk_id, struct timespec *tp) { -	kern_return_t   ret; -	clock_serv_t    clk; -	clock_id_t clk_serv_id; -	mach_timespec_t tm; -	 -	uint64_t start, end, delta, nano; -	 -	//task_basic_info_data_t tinfo; -	//task_thread_times_info_data_t ttinfo; -	//mach_msg_type_number_t tflag; -	 -	int retval = -1; -	switch (clk_id) { -		case CLOCK_REALTIME: -		case CLOCK_MONOTONIC: -			clk_serv_id = clk_id == CLOCK_REALTIME ? CALENDAR_CLOCK : SYSTEM_CLOCK; -			if (KERN_SUCCESS == (ret = host_get_clock_service(mach_host_self(), clk_serv_id, &clk))) { -				if (KERN_SUCCESS == (ret = clock_get_time(clk, &tm))) { -					tp->tv_sec  = tm.tv_sec; -					tp->tv_nsec = tm.tv_nsec; -					retval = 0; -				} -			} -			if (KERN_SUCCESS != ret) { -				errno = EINVAL; -				retval = -1; -			} -		break; -		case CLOCK_PROCESS_CPUTIME_ID: -		case CLOCK_THREAD_CPUTIME_ID: -			start = mach_absolute_time(); -			if (clk_id == CLOCK_PROCESS_CPUTIME_ID) { -				getpid(); -			} else { -				sched_yield(); -			} -			end = mach_absolute_time(); -			delta = end - start;	 -			if (0 == __clock_gettime_inf.denom) { -				mach_timebase_info(&__clock_gettime_inf); -			} -			nano = delta * __clock_gettime_inf.numer / __clock_gettime_inf.denom; -			tp->tv_sec = nano * 1e-9;   -			tp->tv_nsec = nano - (tp->tv_sec * 1e9); -			retval = 0; -		break; -		default: -			errno = EINVAL; -			retval = -1; -	} -	return retval; -} - -#endif // __APPLE__ - -/* EOF */ diff --git a/gi/pyp-topics/src/contexts_corpus.cc b/gi/pyp-topics/src/contexts_corpus.cc deleted file mode 100644 index 92b1b34c..00000000 --- a/gi/pyp-topics/src/contexts_corpus.cc +++ /dev/null @@ -1,164 +0,0 @@ -#include <sstream> -#include <iostream> -#include <set> - -#include "contexts_corpus.hh" -#include "gzstream.hh" -#include "contexts_lexer.h" - -#include <boost/tuple/tuple.hpp> - - -using namespace std; - -////////////////////////////////////////////////// -// ContextsCorpus -////////////////////////////////////////////////// - -bool read_callback_binary_contexts = false; - -void read_callback(const ContextsLexer::PhraseContextsType& new_contexts, void* extra) { -  assert(new_contexts.contexts.size() == new_contexts.counts.size()); - -  boost::tuple<ContextsCorpus*, BackoffGenerator*, map<string,int>* >* extra_pair -    = static_cast< boost::tuple<ContextsCorpus*, BackoffGenerator*, map<string,int>* >* >(extra); - -  ContextsCorpus* corpus_ptr = extra_pair->get<0>(); -  BackoffGenerator* backoff_gen = extra_pair->get<1>(); -  //map<string,int>* counts = extra_pair->get<2>(); - -  Document* doc(new Document()); - -  //cout << "READ: " << new_contexts.phrase << "\t"; -  for (int i=0; i < (int)new_contexts.counts.size(); ++i) { -    int cache_word_count = corpus_ptr->m_dict.max(); - -    //string context_str = corpus_ptr->m_dict.toString(new_contexts.contexts[i]); -    int context_index = new_contexts.counts.at(i).first; -    string context_str = corpus_ptr->m_dict.toString(new_contexts.contexts[context_index]); - -    // filter out singleton contexts -    //if (!counts->empty()) { -    //  map<string,int>::const_iterator find_it = counts->find(context_str); -    //  if (find_it == counts->end() || find_it->second < 2) -    //    continue; -    //} - -    WordID id = corpus_ptr->m_dict.Convert(context_str); -    if (cache_word_count != corpus_ptr->m_dict.max()) { -      corpus_ptr->m_backoff->terms_at_level(0)++; -      corpus_ptr->m_num_types++; -    } - -    //int count = new_contexts.counts[i]; -    int count = new_contexts.counts.at(i).second; -    if (read_callback_binary_contexts) { -      doc->push_back(id); -      corpus_ptr->m_num_terms++; -    } -    else { -      for (int j=0; j<count; ++j) -        doc->push_back(id); -      corpus_ptr->m_num_terms += count; -    } - -    // generate the backoff map -    if (backoff_gen) { -      int order = 1; -      WordID backoff_id = id; -      //ContextsLexer::Context backedoff_context = new_contexts.contexts[i]; -      ContextsLexer::Context backedoff_context = new_contexts.contexts[context_index]; -      while (true) { -        if (!corpus_ptr->m_backoff->has_backoff(backoff_id)) { -          //cerr << "Backing off from " << corpus_ptr->m_dict.Convert(backoff_id) << " to "; -          backedoff_context = (*backoff_gen)(backedoff_context); - -          if (backedoff_context.empty()) { -            //cerr << "Nothing." << endl; -            (*corpus_ptr->m_backoff)[backoff_id] = -1; -            break; -          } - -          if (++order > corpus_ptr->m_backoff->order()) -            corpus_ptr->m_backoff->order(order); - -          int cache_word_count = corpus_ptr->m_dict.max(); -          int new_backoff_id = corpus_ptr->m_dict.Convert(backedoff_context); -          if (cache_word_count != corpus_ptr->m_dict.max()) -            corpus_ptr->m_backoff->terms_at_level(order-1)++; - -          //cerr << corpus_ptr->m_dict.Convert(new_backoff_id) << " ." << endl; - -          backoff_id = ((*corpus_ptr->m_backoff)[backoff_id] = new_backoff_id); -        } -        else break; -      } -    } -    //cout << context_str << " (" << id << ") ||| C=" << count << " ||| "; -  } -  //cout << endl; - -  //if (!doc->empty()) { -    corpus_ptr->m_documents.push_back(doc); -    corpus_ptr->m_keys.push_back(new_contexts.phrase); -  //} -} - -void filter_callback(const ContextsLexer::PhraseContextsType& new_contexts, void* extra) { -  assert(new_contexts.contexts.size() == new_contexts.counts.size()); - -  map<string,int>* context_counts = (static_cast<map<string,int>*>(extra)); - -  for (int i=0; i < (int)new_contexts.counts.size(); ++i) { -    int context_index = new_contexts.counts.at(i).first; -    int count = new_contexts.counts.at(i).second; -    //if (read_callback_binary_contexts) count = 1; -    //int count = new_contexts.counts[i]; -    pair<map<string,int>::iterator,bool> result  -      = context_counts->insert(make_pair(Dict::toString(new_contexts.contexts[context_index]),count)); -      //= context_counts->insert(make_pair(Dict::toString(new_contexts.contexts[i]),count)); -    if (!result.second) -      result.first->second += count; -  } -} - - -unsigned ContextsCorpus::read_contexts(const string &filename,  -                                       BackoffGenerator* backoff_gen_ptr, -                                       bool /*filter_singeltons*/, -                                       bool binary_contexts) { -  read_callback_binary_contexts = binary_contexts; - -  map<string,int> counts; -  //if (filter_singeltons)  -  { -  //  cerr << "--- Filtering singleton contexts ---" << endl; - -    igzstream in(filename.c_str()); -    ContextsLexer::ReadContexts(&in, filter_callback, &counts); -  } - -  m_num_terms = 0; -  m_num_types = 0; - -  igzstream in(filename.c_str()); -  boost::tuple<ContextsCorpus*, BackoffGenerator*, map<string,int>* > extra_pair(this,backoff_gen_ptr,&counts); -  ContextsLexer::ReadContexts(&in, read_callback, &extra_pair); - -  //m_num_types = m_dict.max(); - -  cerr << "Read backoff with order " << m_backoff->order() << "\n"; -  for (int o=0; o<m_backoff->order(); o++) -    cerr << "  Terms at " << o << " = " << m_backoff->terms_at_level(o) << endl; -  //cerr << endl; - -  int i=0; double av_freq=0; -  for (map<string,int>::const_iterator it=counts.begin(); it != counts.end(); ++it, ++i) { -    WordID id = m_dict.Convert(it->first); -    m_context_counts[id] = it->second; -    av_freq += it->second; -  } -  cerr << "  Average term frequency = " << av_freq / (double) i << endl; - -  return m_documents.size(); -} diff --git a/gi/pyp-topics/src/contexts_corpus.hh b/gi/pyp-topics/src/contexts_corpus.hh deleted file mode 100644 index 2527f655..00000000 --- a/gi/pyp-topics/src/contexts_corpus.hh +++ /dev/null @@ -1,90 +0,0 @@ -#ifndef _CONTEXTS_CORPUS_HH -#define _CONTEXTS_CORPUS_HH - -#include <vector> -#include <string> -#include <map> -#include <tr1/unordered_map> - -#include <boost/ptr_container/ptr_vector.hpp> - -#include "corpus.hh" -#include "contexts_lexer.h" -#include "dict.h" - - -class BackoffGenerator { -public: -  virtual ContextsLexer::Context -    operator()(const ContextsLexer::Context& c) = 0; - -protected: -  ContextsLexer::Context strip_edges(const ContextsLexer::Context& c) { -    if (c.size() <= 1) return ContextsLexer::Context(); -    assert(c.size() % 2 == 1); -    return ContextsLexer::Context(c.begin() + 1, c.end() - 1); -  } -}; - -class NullBackoffGenerator : public BackoffGenerator { -  virtual ContextsLexer::Context -    operator()(const ContextsLexer::Context&)  -    { return ContextsLexer::Context(); } -}; - -class SimpleBackoffGenerator : public BackoffGenerator { -  virtual ContextsLexer::Context -    operator()(const ContextsLexer::Context& c) {  -      if (c.size() <= 3) -        return ContextsLexer::Context(); -      return strip_edges(c);  -    } -}; - - -//////////////////////////////////////////////////////////////// -// ContextsCorpus -//////////////////////////////////////////////////////////////// - -class ContextsCorpus : public Corpus { -  friend void read_callback(const ContextsLexer::PhraseContextsType&, void*); - -public: -    ContextsCorpus() : m_backoff(new TermBackoff) {} -    virtual ~ContextsCorpus() {} - -    virtual unsigned read_contexts(const std::string &filename,  -                                   BackoffGenerator* backoff_gen=0, -                                   bool filter_singeltons=false, -                                   bool binary_contexts=false); - -    TermBackoffPtr backoff_index() { -      return m_backoff; -    } - -    std::vector<std::string> context2string(const WordID& id) const { -      std::vector<std::string> res; -      assert (id >= 0); -      m_dict.AsVector(id, &res); -      return res; -    } - -    virtual int context_count(const WordID& id) const { -      return m_context_counts.find(id)->second; -    } - - -    const std::string& key(const int& i) const { -      return m_keys.at(i); -    } - -    const Dict& dict() const { return m_dict; } - -protected: -    TermBackoffPtr m_backoff; -    Dict m_dict; -    std::vector<std::string> m_keys; -    std::tr1::unordered_map<int,int> m_context_counts; -}; - -#endif // _CONTEXTS_CORPUS_HH diff --git a/gi/pyp-topics/src/contexts_lexer.h b/gi/pyp-topics/src/contexts_lexer.h deleted file mode 100644 index 66004990..00000000 --- a/gi/pyp-topics/src/contexts_lexer.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef _CONTEXTS_LEXER_H_ -#define _CONTEXTS_LEXER_H_  - -#include <iostream> -#include <vector> -#include <string> - -#include "dict.h"  - -struct ContextsLexer { -  typedef std::vector<std::string> Context; -  struct PhraseContextsType { -    std::string          phrase; -    std::vector<Context> contexts; -    std::vector< std::pair<int,int> >     counts; -  }; - -  typedef void (*ContextsCallback)(const PhraseContextsType& new_contexts, void* extra); -  static void ReadContexts(std::istream* in, ContextsCallback func, void* extra); -}; - -#endif diff --git a/gi/pyp-topics/src/contexts_lexer.l b/gi/pyp-topics/src/contexts_lexer.l deleted file mode 100644 index 64cd7ca3..00000000 --- a/gi/pyp-topics/src/contexts_lexer.l +++ /dev/null @@ -1,113 +0,0 @@ -%{ -#include "contexts_lexer.h" - -#include <string> -#include <iostream> -#include <sstream> -#include <cstring> -#include <cassert> -#include <algorithm> - -int lex_line = 0; -std::istream* contextslex_stream = NULL; -ContextsLexer::ContextsCallback contexts_callback = NULL; -void* contexts_callback_extra = NULL; - -#undef YY_INPUT -#define YY_INPUT(buf, result, max_size) (result = contextslex_stream->read(buf, max_size).gcount()) - -#define YY_SKIP_YYWRAP 1 -int num_phrases = 0; -int yywrap() { return 1; } - -#define MAX_TOKEN_SIZE 255 -std::string contextslex_tmp_token(MAX_TOKEN_SIZE, '\0'); -ContextsLexer::PhraseContextsType current_contexts; - -#define MAX_CONTEXT_SIZE 255 -//std::string tmp_context[MAX_CONTEXT_SIZE]; -ContextsLexer::Context tmp_context; - - -void contextslex_reset() { -  current_contexts.phrase.clear(); -  current_contexts.contexts.clear(); -  current_contexts.counts.clear(); -  tmp_context.clear(); -} - -%} - -INT [\-+]?[0-9]+|inf|[\-+]inf - -%x CONTEXT COUNT COUNT_END -%% - -<INITIAL>[^\t]+	{  -    contextslex_reset(); -    current_contexts.phrase.assign(yytext, yyleng); -    BEGIN(CONTEXT); -  } -<INITIAL>\t	{  -    ;  -  } - -<INITIAL,CONTEXT,COUNT>\n	{ -    std::cerr << "ERROR: contexts_lexer.l: unexpected newline while trying to read phrase|context|count." << std::endl; -    abort(); -  } - -<CONTEXT>\|\|\|	{ -    current_contexts.contexts.push_back(tmp_context); -    tmp_context.clear(); -		BEGIN(COUNT); -	} -<CONTEXT>[^ \t]+	{  -		contextslex_tmp_token.assign(yytext, yyleng); -    tmp_context.push_back(contextslex_tmp_token); -  } -<CONTEXT>[ \t]+	{ ; } - -<COUNT>[ \t]+	{ ; } -<COUNT>C={INT} {  -		current_contexts.counts.push_back(std::make_pair(current_contexts.counts.size(), atoi(yytext+2))); -    BEGIN(COUNT_END); -  } -<COUNT>.	{  -    std::cerr << "ERROR: contexts_lexer.l: unexpected content while reading count." << std::endl; -    abort(); -  } - -<COUNT_END>[ \t]+  { ; } -<COUNT_END>\|\|\|	{ -		BEGIN(CONTEXT); -  } -<COUNT_END>\n { -    //std::cerr << "READ:" << current_contexts.phrase << " with " << current_contexts.contexts.size()  -    //  << " contexts, and " << current_contexts.counts.size() << " counts." << std::endl; -    std::sort(current_contexts.counts.rbegin(), current_contexts.counts.rend());  - -		contexts_callback(current_contexts, contexts_callback_extra); -    current_contexts.phrase.clear(); -    current_contexts.contexts.clear(); -    current_contexts.counts.clear(); -		BEGIN(INITIAL); -  } -<COUNT_END>.  {  -		contextslex_tmp_token.assign(yytext, yyleng); -    std::cerr << "ERROR: contexts_lexer.l: unexpected content while looking for ||| closing count." << std::endl; -    abort(); -  } - -%% - -#include "filelib.h"  - -void ContextsLexer::ReadContexts(std::istream* in, ContextsLexer::ContextsCallback func, void* extra) { -  lex_line = 1; -  contextslex_stream = in; -  contexts_callback_extra = extra, -  contexts_callback = func; -  yylex(); -} - diff --git a/gi/pyp-topics/src/corpus.cc b/gi/pyp-topics/src/corpus.cc deleted file mode 100644 index f182381f..00000000 --- a/gi/pyp-topics/src/corpus.cc +++ /dev/null @@ -1,104 +0,0 @@ -#include <sstream> -#include <iostream> -#include <set> - -#include "corpus.hh" -#include "gzstream.hh" - -using namespace std; - -////////////////////////////////////////////////// -// Corpus -////////////////////////////////////////////////// - -Corpus::Corpus() : m_num_terms(0), m_num_types(0) {} - -unsigned Corpus::read(const std::string &filename) { -  m_num_terms = 0; -  m_num_types = 0; -  std::set<int> seen_types; - -  igzstream in(filename.c_str()); - -  string buf; -  int token; -  unsigned doc_count=0; -  while (getline(in, buf)) { -    Document* doc(new Document()); -    istringstream ss(buf); - -    ss >> token; // the number of unique terms - -    char delimeter; -    int count; -    while(ss >> token >> delimeter >> count) { -      for (int i=0; i<count; ++i) -        doc->push_back(token); -      m_num_terms += count; -      seen_types.insert(token); -    } - -    m_documents.push_back(doc); -    doc_count++; -  } - -  m_num_types = seen_types.size(); - -  return doc_count; -} - - -////////////////////////////////////////////////// -// TestCorpus -////////////////////////////////////////////////// - -TestCorpus::TestCorpus() {} - -void TestCorpus::read(const std::string &filename) { -  igzstream in(filename.c_str()); - -  string buf; -  Term term; -  DocumentId doc; -  char delimeter; -  while (getline(in, buf)) { -    DocumentTerms* line(new DocumentTerms()); -    istringstream ss(buf); - -    while(ss >> doc >> delimeter >> term) -      line->push_back(DocumentTerm(doc, term)); - -    m_lines.push_back(line); -  } -} - -////////////////////////////////////////////////// -// TermBackoff -////////////////////////////////////////////////// - -void TermBackoff::read(const std::string &filename) { -  igzstream in(filename.c_str()); - -  string buf; -  int num_terms; -  getline(in, buf); -  istringstream ss(buf);  -  ss >> num_terms >> m_backoff_order; - -  m_dict.resize(num_terms, -1); -  for (int i=0; i<m_backoff_order; ++i) { -    int count; ss >> count; -    m_terms_at_order.push_back(count); -  } - -  Term term, backoff; -  while (getline(in, buf)) { -    istringstream ss(buf); -    ss >> term >> backoff; - -    assert(term < num_terms); -    assert(term >= 0); - -    m_dict[term] = backoff; -  } -} diff --git a/gi/pyp-topics/src/corpus.hh b/gi/pyp-topics/src/corpus.hh deleted file mode 100644 index 2aa03527..00000000 --- a/gi/pyp-topics/src/corpus.hh +++ /dev/null @@ -1,133 +0,0 @@ -#ifndef _CORPUS_HH -#define _CORPUS_HH - -#include <vector> -#include <string> -#include <map> -#include <limits> - -#include <boost/shared_ptr.hpp> -#include <boost/ptr_container/ptr_vector.hpp> - -//////////////////////////////////////////////////////////////// -// Corpus -//////////////////////////////////////////////////////////////// -typedef int Term; - -typedef std::vector<Term> Document; -typedef std::vector<Term> Terms; - -class Corpus { -public: -    typedef boost::ptr_vector<Document>::const_iterator const_iterator; - -public: -    Corpus(); -    virtual ~Corpus() {} - -    virtual unsigned read(const std::string &filename); - -    const_iterator begin() const { return m_documents.begin(); } -    const_iterator end() const { return m_documents.end(); } - -    const Document& at(size_t i) const { return m_documents.at(i); } - -    int num_documents() const { return m_documents.size(); } -    int num_terms() const { return m_num_terms; } -    int num_types() const { return m_num_types; } - -    virtual int context_count(const int&) const { -      return std::numeric_limits<int>::max(); -    } - -protected: -    int m_num_terms, m_num_types; -    boost::ptr_vector<Document> m_documents;  -}; - -typedef int DocumentId; -struct DocumentTerm { -  DocumentTerm(DocumentId d, Term t) : term(t), doc(d) {} -  Term term; -  DocumentId doc; -}; -typedef std::vector<DocumentTerm> DocumentTerms; - -class TestCorpus { -public: -    typedef boost::ptr_vector<DocumentTerms>::const_iterator const_iterator; - -public: -    TestCorpus(); -    ~TestCorpus() {} - -    void read(const std::string &filename); - -    const_iterator begin() const { return m_lines.begin(); } -    const_iterator end() const { return m_lines.end(); } - -    int num_instances() const { return m_lines.size(); } - -protected: -    boost::ptr_vector<DocumentTerms> m_lines;  -}; - -class TermBackoff { -public: -    typedef std::vector<Term> dictionary_type; -    typedef dictionary_type::const_iterator const_iterator; -    const static int NullBackoff=-1; - -public: -    TermBackoff() { order(1); } -    ~TermBackoff() {} - -    void read(const std::string &filename); - -    const_iterator begin() const { return m_dict.begin(); } -    const_iterator end() const { return m_dict.end(); } - -    const Term& operator[](const Term& t) const { -      assert(t < static_cast<int>(m_dict.size())); -      return m_dict[t]; -    } - -    Term& operator[](const Term& t) { -      if (t >= static_cast<int>(m_dict.size())) -        m_dict.resize(t+1, -1); -      return m_dict[t]; -    } - -    bool has_backoff(const Term& t) { -      return t >= 0 && t < static_cast<int>(m_dict.size()) && m_dict[t] >= 0; -    } - -    int order() const { return m_backoff_order; } -    void order(int o) {  -      if (o >= (int)m_terms_at_order.size()) -        m_terms_at_order.resize(o, 0); -      m_backoff_order = o;  -    } - -//    int levels() const { return m_terms_at_order.size(); } -    bool is_null(const Term& term) const { return term < 0; } -    int terms_at_level(int level) const {  -      assert (level < (int)m_terms_at_order.size()); -      return m_terms_at_order.at(level); -    } - -    int& terms_at_level(int level) {  -      assert (level < (int)m_terms_at_order.size()); -      return m_terms_at_order.at(level); -    } - -    int size() const { return m_dict.size(); } - -protected: -    dictionary_type m_dict;  -    int m_backoff_order; -    std::vector<int> m_terms_at_order; -}; -typedef boost::shared_ptr<TermBackoff> TermBackoffPtr; - -#endif // _CORPUS_HH diff --git a/gi/pyp-topics/src/gammadist.c b/gi/pyp-topics/src/gammadist.c deleted file mode 100644 index 4e260db8..00000000 --- a/gi/pyp-topics/src/gammadist.c +++ /dev/null @@ -1,247 +0,0 @@ -/* gammadist.c -- computes probability of samples under / produces samples from a Gamma distribution - * - * Mark Johnson, 22nd March 2008 - * - * WARNING: you need to set the flag -std=c99 to compile - * - * gammavariate() was translated from random.py in Python library - * - * The Gamma distribution is: - * - *   Gamma(x | alpha, beta) = pow(x/beta, alpha-1) * exp(-x/beta) / (gamma(alpha)*beta) - * - * shape parameter alpha > 0 (also called c), scale parameter beta > 0 (also called s);  - * mean is alpha*beta, variance is alpha*beta**2 - * - * Note that many parameterizations of the Gamma function are in terms of an _inverse_ - * scale parameter beta, which is the inverse of the beta given here. - * - * To define a main() that tests the routines, uncomment the following #define: - */ -/* #define GAMMATEST */ - -#include <assert.h> -#include <math.h>  - -#include "gammadist.h" -#include "mt19937ar.h" - -/* gammadist() returns the probability density of x under a Gamma(alpha,beta)  - * distribution - */ - -long double gammadist(long double x, long double alpha, long double beta) { -  assert(alpha > 0); -  assert(beta > 0); -  return  pow(x/beta, alpha-1) * exp(-x/beta) / (tgamma(alpha)*beta); -} - -/* lgammadist() returns the log probability density of x under a Gamma(alpha,beta) - * distribution - */ - -long double lgammadist(long double x, long double alpha, long double beta) { -  assert(alpha > 0); -  assert(beta > 0); -  return (alpha-1)*log(x) - alpha*log(beta) - x/beta - lgamma(alpha); -} - -/* This definition of gammavariate is from Python code in - * the Python random module. - */ - -long double gammavariate(long double alpha, long double beta) { - -  assert(alpha > 0); -  assert(beta > 0); - -  if (alpha > 1.0) { -     -    /* Uses R.C.H. Cheng, "The generation of Gamma variables with -       non-integral shape parameters", Applied Statistics, (1977), 26, -       No. 1, p71-74 */ - -    long double ainv = sqrt(2.0 * alpha - 1.0); -    long double bbb = alpha - log(4.0); -    long double ccc = alpha + ainv; -     -    while (1) { -      long double u1 = mt_genrand_real3(); -      if (u1 > 1e-7  || u1 < 0.9999999) { -	long double u2 = 1.0 - mt_genrand_real3(); -	long double v = log(u1/(1.0-u1))/ainv; -	long double x = alpha*exp(v); -	long double z = u1*u1*u2; -	long double r = bbb+ccc*v-x; -	if (r + (1.0+log(4.5)) - 4.5*z >= 0.0 || r >= log(z)) -	  return x * beta; -      } -    } -  } -  else if (alpha == 1.0) { -    long double u = mt_genrand_real3(); -    while (u <= 1e-7) -      u = mt_genrand_real3(); -    return -log(u) * beta; -  } -  else {  -    /* alpha is between 0 and 1 (exclusive)  -       Uses ALGORITHM GS of Statistical Computing - Kennedy & Gentle */ -     -    while (1) { -      long double u = mt_genrand_real3(); -      long double b = (exp(1) + alpha)/exp(1); -      long double p = b*u; -      long double x = (p <= 1.0) ? pow(p, 1.0/alpha) : -log((b-p)/alpha); -      long double u1 = mt_genrand_real3(); -      if (! (((p <= 1.0) && (u1 > exp(-x))) || -	     ((p > 1.0)  &&  (u1 > pow(x, alpha - 1.0))))) -	return x * beta; -    } -  } -} - -/* betadist() returns the probability density of x under a Beta(alpha,beta) - * distribution. - */ - -long double betadist(long double x, long double alpha, long double beta) { -  assert(x >= 0); -  assert(x <= 1); -  assert(alpha > 0); -  assert(beta > 0); -  return pow(x,alpha-1)*pow(1-x,beta-1)*tgamma(alpha+beta)/(tgamma(alpha)*tgamma(beta)); -} - -/* lbetadist() returns the log probability density of x under a Beta(alpha,beta) - * distribution. - */ - -long double lbetadist(long double x, long double alpha, long double beta) { -  assert(x > 0); -  assert(x < 1); -  assert(alpha > 0); -  assert(beta > 0); -  return (alpha-1)*log(x)+(beta-1)*log(1-x)+lgamma(alpha+beta)-lgamma(alpha)-lgamma(beta); -} - -/* betavariate() generates a sample from a Beta distribution with - * parameters alpha and beta. - * - * 0 < alpha < 1, 0 < beta < 1, mean is alpha/(alpha+beta) - */ - -long double betavariate(long double alpha, long double beta) { -  long double x = gammavariate(alpha, 1); -  long double y = gammavariate(beta, 1); -  return x/(x+y); -} - -#ifdef GAMMATEST -#include <stdio.h> - -int main(int argc, char **argv) { -  int iteration, niterations = 1000; - -  for (iteration = 0; iteration < niterations; ++iteration) { -    long double alpha = 100*mt_genrand_real3(); -    long double gv = gammavariate(alpha, 1); -    long double pgv = gammadist(gv, alpha, 1); -    long double pgvl = exp(lgammadist(gv, alpha, 1)); -    fprintf(stderr, "iteration = %d, gammavariate(%lg,1) = %lg, gammadist(%lg,%lg,1) = %lg, exp(lgammadist(%lg,%lg,1) = %lg\n", -	    iteration, alpha, gv, gv, alpha, pgv, gv, alpha, pgvl); -  } -  return 0; -} - -#endif /* GAMMATEST */ - - -/* Other routines I tried, but which weren't as good as the ones above */ - -#if 0 - -/*! gammavariate() returns samples from a Gamma distribution - *! where alpha is the shape parameter and beta is the scale  - *! parameter, using the algorithm described on p. 94 of  - *! Gentle (1998) Random Number Generation and Monte Carlo Methods,  - *! Springer. - */ - -long double gammavariate(long double alpha) { - -  assert(alpha > 0);  -   -  if (alpha > 1.0) { -    while (1) { -      long double u1 = mt_genrand_real3(); -      long double u2 = mt_genrand_real3(); -      long double v = (alpha - 1/(6*alpha))*u1/(alpha-1)*u2; -      if (2*(u2-1)/(alpha-1) + v + 1/v <= 2  -         || 2*log(u2)/(alpha-1) - log(v) + v <= 1) -	return (alpha-1)*v; -    } -  } else if (alpha < 1.0) {   -    while (1) { -      long double t = 0.07 + 0.75*sqrt(1-alpha); -      long double b = alpha + exp(-t)*alpha/t; -      long double u1 = mt_genrand_real3(); -      long double u2 = mt_genrand_real3(); -      long double v = b*u1; -      if (v <= 1) { -	long double x = t*pow(v, 1/alpha); -	if (u2 <= (2 - x)/(2 + x)) -	  return x; -	if (u2 <= exp(-x)) -	  return x; -      } -      else { -	long double x = log(t*(b-v)/alpha); -	long double y = x/t; -	if (u2*(alpha + y*(1-alpha)) <= 1) -	  return x; -	if (u2 <= pow(y,alpha-1)) -	  return x; -      } -    } -  } -  else   -    return -log(mt_genrand_real3()); -}  - - -/*! gammavariate() returns a deviate distributed as a gamma - *! distribution of order alpha, beta, i.e., a waiting time to the alpha'th - *! event in a Poisson process of unit mean. - *! - *! Code from Numerical Recipes - */ - -long double nr_gammavariate(long double ia) { -  int j; -  long double am,e,s,v1,v2,x,y; -  assert(ia > 0); -  if (ia < 10) {  -    x=1.0;  -    for (j=1;j<=ia;j++)  -      x *= mt_genrand_real3(); -    x = -log(x); -  } else {  -    do { -      do { -	do {  -	  v1=mt_genrand_real3(); -	  v2=2.0*mt_genrand_real3()-1.0; -	} while (v1*v1+v2*v2 > 1.0);  -	y=v2/v1; -	am=ia-1; -	s=sqrt(2.0*am+1.0); -	x=s*y+am; -      } while (x <= 0.0); -      e=(1.0+y*y)*exp(am*log(x/am)-s*y); -    } while (mt_genrand_real3() > e); -  } -  return x; -}  - -#endif diff --git a/gi/pyp-topics/src/gammadist.h b/gi/pyp-topics/src/gammadist.h deleted file mode 100644 index b6ad6c40..00000000 --- a/gi/pyp-topics/src/gammadist.h +++ /dev/null @@ -1,72 +0,0 @@ -/* gammadist.h -- computes probability of samples under / produces samples from a Gamma distribution - * - * Mark Johnson, 22nd March 2008 - * - * gammavariate() was translated from random.py in Python library - * - * The Gamma distribution is: - * - *   Gamma(x | alpha, beta) = pow(x/beta, alpha-1) * exp(-x/beta) / (gamma(alpha)*beta) - * - * shape parameter alpha > 0 (also called c), scale parameter beta > 0 (also called s);  - * mean is alpha*beta, variance is alpha*beta**2 - * - * Note that many parameterizations of the Gamma function are in terms of an _inverse_ - * scale parameter beta, which is the inverse of the beta given here. - */ - -#ifndef GAMMADIST_H -#define GAMMADIST_H - -#ifdef __cplusplus -extern "C" { -#endif -   -  /* gammadist() returns the probability density of x under a Gamma(alpha,beta)  -   * distribution -   */ - -  long double gammadist(long double x, long double alpha, long double beta); - -  /* lgammadist() returns the log probability density of x under a Gamma(alpha,beta) -   * distribution -   */ - -  long double lgammadist(long double x, long double alpha, long double beta); - -  /* gammavariate() generates samples from a Gamma distribution -   * conditioned on the parameters alpha and beta. -   *  -   * alpha > 0, beta > 0, mean is alpha*beta, variance is alpha*beta**2 -   * -   * Warning: a few older sources define the gamma distribution in terms -   * of alpha > -1.0 -   */ - -  long double gammavariate(long double alpha, long double beta); - -  /* betadist() returns the probability density of x under a Beta(alpha,beta) -   * distribution. -   */ - -  long double betadist(long double x, long double alpha, long double beta); - -  /* lbetadist() returns the log probability density of x under a Beta(alpha,beta) -   * distribution. -   */ -   -  long double lbetadist(long double x, long double alpha, long double beta); - -  /* betavariate() generates a sample from a Beta distribution with -   * parameters alpha and beta. -   * -   * 0 < alpha < 1, 0 < beta < 1, mean is alpha/(alpha+beta) -   */ - -  long double betavariate(long double alpha, long double beta); - -#ifdef __cplusplus -}; -#endif - -#endif /* GAMMADIST_H */ diff --git a/gi/pyp-topics/src/gzstream.cc b/gi/pyp-topics/src/gzstream.cc deleted file mode 100644 index 7c4d3a12..00000000 --- a/gi/pyp-topics/src/gzstream.cc +++ /dev/null @@ -1,165 +0,0 @@ -// ============================================================================ -// gzstream, C++ iostream classes wrapping the zlib compression library. -// Copyright (C) 2001  Deepak Bandyopadhyay, Lutz Kettner -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA -// ============================================================================ -// -// File          : gzstream.C -// Revision      : $Revision: 1.1 $ -// Revision_date : $Date: 2006/03/30 04:05:52 $ -// Author(s)     : Deepak Bandyopadhyay, Lutz Kettner -//  -// Standard streambuf implementation following Nicolai Josuttis, "The  -// Standard C++ Library". -// ============================================================================ - -#include "gzstream.hh" -#include <iostream> -#include <string.h>  // for memcpy - -#ifdef GZSTREAM_NAMESPACE -namespace GZSTREAM_NAMESPACE { -#endif - -// ---------------------------------------------------------------------------- -// Internal classes to implement gzstream. See header file for user classes. -// ---------------------------------------------------------------------------- - -// -------------------------------------- -// class gzstreambuf: -// -------------------------------------- - -gzstreambuf* gzstreambuf::open( const char* name, int open_mode) { -    if ( is_open()) -        return (gzstreambuf*)0; -    mode = open_mode; -    // no append nor read/write mode -    if ((mode & std::ios::ate) || (mode & std::ios::app) -        || ((mode & std::ios::in) && (mode & std::ios::out))) -        return (gzstreambuf*)0; -    char  fmode[10]; -    char* fmodeptr = fmode; -    if ( mode & std::ios::in) -        *fmodeptr++ = 'r'; -    else if ( mode & std::ios::out) -        *fmodeptr++ = 'w'; -    *fmodeptr++ = 'b'; -    *fmodeptr = '\0'; -    file = gzopen( name, fmode); -    if (file == 0) -        return (gzstreambuf*)0; -    opened = 1; -    return this; -} - -gzstreambuf * gzstreambuf::close() { -    if ( is_open()) { -        sync(); -        opened = 0; -        if ( gzclose( file) == Z_OK) -            return this; -    } -    return (gzstreambuf*)0; -} - -int gzstreambuf::underflow() { // used for input buffer only -    if ( gptr() && ( gptr() < egptr())) -        return * reinterpret_cast<unsigned char *>( gptr()); - -    if ( ! (mode & std::ios::in) || ! opened) -        return EOF; -    // Josuttis' implementation of inbuf -    int n_putback = gptr() - eback(); -    if ( n_putback > 4) -        n_putback = 4; -    memcpy( buffer + (4 - n_putback), gptr() - n_putback, n_putback); - -    int num = gzread( file, buffer+4, bufferSize-4); -    if (num <= 0) // ERROR or EOF -        return EOF; - -    // reset buffer pointers -    setg( buffer + (4 - n_putback),   // beginning of putback area -          buffer + 4,                 // read position -          buffer + 4 + num);          // end of buffer - -    // return next character -    return * reinterpret_cast<unsigned char *>( gptr());     -} - -int gzstreambuf::flush_buffer() { -    // Separate the writing of the buffer from overflow() and -    // sync() operation. -    int w = pptr() - pbase(); -    if ( gzwrite( file, pbase(), w) != w) -        return EOF; -    pbump( -w); -    return w; -} - -int gzstreambuf::overflow( int c) { // used for output buffer only -    if ( ! ( mode & std::ios::out) || ! opened) -        return EOF; -    if (c != EOF) { -        *pptr() = c; -        pbump(1); -    } -    if ( flush_buffer() == EOF) -        return EOF; -    return c; -} - -int gzstreambuf::sync() { -    // Changed to use flush_buffer() instead of overflow( EOF) -    // which caused improper behavior with std::endl and flush(), -    // bug reported by Vincent Ricard. -    if ( pptr() && pptr() > pbase()) { -        if ( flush_buffer() == EOF) -            return -1; -    } -    return 0; -} - -// -------------------------------------- -// class gzstreambase: -// -------------------------------------- - -gzstreambase::gzstreambase( const char* name, int mode) { -    init( &buf); -    open( name, mode); -} - -gzstreambase::~gzstreambase() { -    buf.close(); -} - -void gzstreambase::open( const char* name, int open_mode) { -    if ( ! buf.open( name, open_mode)) -        clear( rdstate() | std::ios::badbit); -} - -void gzstreambase::close() { -    if ( buf.is_open()) -        if ( ! buf.close()) -            clear( rdstate() | std::ios::badbit); -} - -#ifdef GZSTREAM_NAMESPACE -} // namespace GZSTREAM_NAMESPACE -#endif - -// ============================================================================ -// EOF // diff --git a/gi/pyp-topics/src/gzstream.hh b/gi/pyp-topics/src/gzstream.hh deleted file mode 100644 index ad9785fd..00000000 --- a/gi/pyp-topics/src/gzstream.hh +++ /dev/null @@ -1,121 +0,0 @@ -// ============================================================================ -// gzstream, C++ iostream classes wrapping the zlib compression library. -// Copyright (C) 2001  Deepak Bandyopadhyay, Lutz Kettner -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA -// ============================================================================ -// -// File          : gzstream.h -// Revision      : $Revision: 1.1 $ -// Revision_date : $Date: 2006/03/30 04:05:52 $ -// Author(s)     : Deepak Bandyopadhyay, Lutz Kettner -//  -// Standard streambuf implementation following Nicolai Josuttis, "The  -// Standard C++ Library". -// ============================================================================ - -#ifndef GZSTREAM_H -#define GZSTREAM_H 1 - -// standard C++ with new header file names and std:: namespace -#include <iostream> -#include <fstream> -#include <zlib.h> - -#ifdef GZSTREAM_NAMESPACE -namespace GZSTREAM_NAMESPACE { -#endif - -// ---------------------------------------------------------------------------- -// Internal classes to implement gzstream. See below for user classes. -// ---------------------------------------------------------------------------- - -class gzstreambuf : public std::streambuf { -private: -    static const int bufferSize = 47+256;    // size of data buff -    // totals 512 bytes under g++ for igzstream at the end. - -    gzFile           file;               // file handle for compressed file -    char             buffer[bufferSize]; // data buffer -    char             opened;             // open/close state of stream -    int              mode;               // I/O mode - -    int flush_buffer(); -public: -    gzstreambuf() : opened(0) { -        setp( buffer, buffer + (bufferSize-1)); -        setg( buffer + 4,     // beginning of putback area -              buffer + 4,     // read position -              buffer + 4);    // end position       -        // ASSERT: both input & output capabilities will not be used together -    } -    int is_open() { return opened; } -    gzstreambuf* open( const char* name, int open_mode); -    gzstreambuf* close(); -    ~gzstreambuf() { close(); } -     -    virtual int     overflow( int c = EOF); -    virtual int     underflow(); -    virtual int     sync(); -}; - -class gzstreambase : virtual public std::ios { -protected: -    gzstreambuf buf; -public: -    gzstreambase() { init(&buf); } -    gzstreambase( const char* name, int open_mode); -    ~gzstreambase(); -    void open( const char* name, int open_mode); -    void close(); -    gzstreambuf* rdbuf() { return &buf; } -}; - -// ---------------------------------------------------------------------------- -// User classes. Use igzstream and ogzstream analogously to ifstream and -// ofstream respectively. They read and write files based on the gz*  -// function interface of the zlib. Files are compatible with gzip compression. -// ---------------------------------------------------------------------------- - -class igzstream : public gzstreambase, public std::istream { -public: -    igzstream() : std::istream( &buf) {}  -    igzstream( const char* name, int open_mode = std::ios::in) -        : gzstreambase( name, open_mode), std::istream( &buf) {}   -    gzstreambuf* rdbuf() { return gzstreambase::rdbuf(); } -    void open( const char* name, int open_mode = std::ios::in) { -        gzstreambase::open( name, open_mode); -    } -}; - -class ogzstream : public gzstreambase, public std::ostream { -public: -    ogzstream() : std::ostream( &buf) {} -    ogzstream( const char* name, int mode = std::ios::out) -        : gzstreambase( name, mode), std::ostream( &buf) {}   -    gzstreambuf* rdbuf() { return gzstreambase::rdbuf(); } -    void open( const char* name, int open_mode = std::ios::out) { -        gzstreambase::open( name, open_mode); -    } -}; - -#ifdef GZSTREAM_NAMESPACE -} // namespace GZSTREAM_NAMESPACE -#endif - -#endif // GZSTREAM_H -// ============================================================================ -// EOF // - diff --git a/gi/pyp-topics/src/log_add.h b/gi/pyp-topics/src/log_add.h deleted file mode 100644 index e0620c5a..00000000 --- a/gi/pyp-topics/src/log_add.h +++ /dev/null @@ -1,30 +0,0 @@ -#ifndef log_add_hh -#define log_add_hh - -#include <limits> -#include <iostream> -#include <cassert> -#include <cmath> - -template <typename T> -struct Log -{ -    static T zero() { return -std::numeric_limits<T>::infinity(); }  - -    static T add(T l1, T l2) -    { -        if (l1 == zero()) return l2; -        if (l1 > l2)  -            return l1 + std::log(1 + exp(l2 - l1)); -        else -            return l2 + std::log(1 + exp(l1 - l2)); -    } - -    static T subtract(T l1, T l2) -    { -        //std::assert(l1 >= l2); -        return l1 + log(1 - exp(l2 - l1)); -    } -}; - -#endif diff --git a/gi/pyp-topics/src/macros.Linux b/gi/pyp-topics/src/macros.Linux deleted file mode 100644 index 7c6e7fa7..00000000 --- a/gi/pyp-topics/src/macros.Linux +++ /dev/null @@ -1,18 +0,0 @@ -CC = /home/pblunsom/software/bin/mpicc -CXX = /home/pblunsom/software/bin/mpicxx -LD = /home/pblunsom/software/bin/mpicxx -FC = /home/pblunsom/software/bin/mpif77 - -SOFTWARE_DIR=/export/ws10smt/software - -CXXFLAGS = -Wall -I${SOFTWARE_DIR}/include -CFLAGS = -Wall -I${SOFTWARE_DIR}/include -FFLAGS = -Wall -LDFLAGS = -lm -lz -L${SOFTWARE_DIR}/lib \ -	-lboost_program_options -lboost_mpi -lboost_serialization \ -  -lboost_regex -L../../../decoder -lcdec - -FFLAGS += -g -O6  -march=native -CFLAGS += -g -O6  -march=native -CXXFLAGS += -g -O6  -march=native -LDFLAGS += -g -O6  -march=native diff --git a/gi/pyp-topics/src/makefile.darwin b/gi/pyp-topics/src/makefile.darwin deleted file mode 100644 index af608fd8..00000000 --- a/gi/pyp-topics/src/makefile.darwin +++ /dev/null @@ -1,15 +0,0 @@ -CC = /usr/bin/gcc -CXX = /usr/bin/g++ -LD = /usr/bin/g++ -FC=/usr/bin/g77 - -ARCH=i686-m64 -CXXFLAGS = -m64 -Wall -I/Users/pblunsom/packages/include  -CFLAGS = -m64 -Wall -I/Users/pblunsom/packages/include  -FFLAGS = -m64 -Wall -LDFLAGS = -L/Users/pblunsom/packages/lib -lboost_program_options -lm -lz - -FFLAGS += -g -O3 -funroll-loops #-pg -CFLAGS += -g -O3 -funroll-loops #-pg -CXXFLAGS += -g -O3 -funroll-loops #-pg -LDFLAGS += -g -O3 -funroll-loops #-pg diff --git a/gi/pyp-topics/src/makefile.depend b/gi/pyp-topics/src/makefile.depend deleted file mode 100644 index 9b8e306c..00000000 --- a/gi/pyp-topics/src/makefile.depend +++ /dev/null @@ -1,4042 +0,0 @@ -contexts_corpus.o: contexts_corpus.cc contexts_corpus.hh \ - /home/pblunsom/packages/include/boost/ptr_container/ptr_vector.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/ptr_sequence_adapter.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/reversible_ptr_container.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/throw_exception.hpp \ - /home/pblunsom/packages/include/boost/assert.hpp \ - /home/pblunsom/packages/include/boost/config.hpp \ - /home/pblunsom/packages/include/boost/config/user.hpp \ - /home/pblunsom/packages/include/boost/config/select_compiler_config.hpp \ - /home/pblunsom/packages/include/boost/config/compiler/gcc.hpp \ - /home/pblunsom/packages/include/boost/config/select_stdlib_config.hpp \ - /home/pblunsom/packages/include/boost/config/no_tr1/utility.hpp \ - /home/pblunsom/packages/include/boost/config/stdlib/libstdcpp3.hpp \ - /home/pblunsom/packages/include/boost/config/select_platform_config.hpp \ - /home/pblunsom/packages/include/boost/config/platform/linux.hpp \ - /home/pblunsom/packages/include/boost/config/posix_features.hpp \ - /home/pblunsom/packages/include/boost/config/suffix.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/scoped_deleter.hpp \ - /home/pblunsom/packages/include/boost/scoped_array.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/scoped_array.hpp \ - /home/pblunsom/packages/include/boost/checked_delete.hpp \ - /home/pblunsom/packages/include/boost/detail/workaround.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/operator_bool.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/static_move_ptr.hpp \ - /home/pblunsom/packages/include/boost/compressed_pair.hpp \ - /home/pblunsom/packages/include/boost/detail/compressed_pair.hpp \ - /home/pblunsom/packages/include/boost/type_traits/remove_cv.hpp \ - /home/pblunsom/packages/include/boost/type_traits/broken_compiler_spec.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/lambda_support.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/lambda.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/ttp.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/msvc.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/gcc.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/workaround.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/ctps.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/cv_traits_impl.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/type_trait_def.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/template_arity_spec.hpp \ - /home/pblunsom/packages/include/boost/mpl/int.hpp \ - /home/pblunsom/packages/include/boost/mpl/int_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/adl_barrier.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/adl.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/intel.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/nttp_decl.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/nttp.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/integral_wrapper.hpp \ - /home/pblunsom/packages/include/boost/mpl/integral_c_tag.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/static_constant.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/static_cast.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/cat.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/config/config.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/template_arity_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessor/params.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/preprocessor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comma_if.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/punctuation/comma_if.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/if.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/iif.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/bool.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/facilities/empty.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/punctuation/comma.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repeat.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/repeat.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/debug/error.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/detail/auto_rec.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple/eat.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/inc.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/inc.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/overload_resolution.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/type_trait_undef.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_empty.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_convertible.hpp \ - /home/pblunsom/packages/include/boost/type_traits/intrinsics.hpp \ - /home/pblunsom/packages/include/boost/type_traits/config.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_same.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/bool_trait_def.hpp \ - /home/pblunsom/packages/include/boost/type_traits/integral_constant.hpp \ - /home/pblunsom/packages/include/boost/mpl/bool.hpp \ - /home/pblunsom/packages/include/boost/mpl/bool_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/integral_c.hpp \ - /home/pblunsom/packages/include/boost/mpl/integral_c_fwd.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/bool_trait_undef.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_reference.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_volatile.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/yes_no_type.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_array.hpp \ - /home/pblunsom/packages/include/boost/type_traits/add_reference.hpp \ - /home/pblunsom/packages/include/boost/type_traits/ice.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/ice_or.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/ice_and.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/ice_not.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/ice_eq.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_arithmetic.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_integral.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_float.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_void.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_abstract.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_class.hpp \ - /home/pblunsom/packages/include/boost/call_traits.hpp \ - /home/pblunsom/packages/include/boost/detail/call_traits.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_pointer.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_member_pointer.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_member_function_pointer.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/is_mem_fun_pointer_impl.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/default_deleter.hpp \ - /home/pblunsom/packages/include/boost/mpl/if.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/value_wknd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/integral.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/eti.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/na_spec.hpp \ - /home/pblunsom/packages/include/boost/mpl/lambda_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/void_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/na.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/na_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/lambda_arity_param.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/arity.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/dtp.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessor/enum.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessor/def_params_tail.hpp \ - /home/pblunsom/packages/include/boost/mpl/limits/arity.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/and.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/bitand.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/identity.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/facilities/identity.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/empty.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/add.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/dec.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/while.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/fold_left.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/detail/fold_left.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/expr_iif.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/adt.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/detail/is_binary.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/detail/check.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/compl.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/fold_right.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/detail/fold_right.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/reverse.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/detail/while.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple/elem.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/sub.hpp \ - /home/pblunsom/packages/include/boost/type_traits/remove_bounds.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/is_convertible.hpp \ - /home/pblunsom/packages/include/boost/mpl/and.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/use_preprocessed.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/nested_type_wknd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/include_preprocessed.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/compiler.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/stringize.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/and.hpp \ - /home/pblunsom/packages/include/boost/mpl/identity.hpp \ - /home/pblunsom/packages/include/boost/utility/enable_if.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/move.hpp \ - /home/pblunsom/packages/include/boost/static_assert.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/exception.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/clone_allocator.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/nullable.hpp \ - /home/pblunsom/packages/include/boost/mpl/eval_if.hpp \ - /home/pblunsom/packages/include/boost/range/functions.hpp \ - /home/pblunsom/packages/include/boost/range/begin.hpp \ - /home/pblunsom/packages/include/boost/range/config.hpp \ - /home/pblunsom/packages/include/boost/range/iterator.hpp \ - /home/pblunsom/packages/include/boost/range/mutable_iterator.hpp \ - /home/pblunsom/packages/include/boost/range/detail/extract_optional_type.hpp \ - /home/pblunsom/packages/include/boost/iterator/iterator_traits.hpp \ - /home/pblunsom/packages/include/boost/detail/iterator.hpp \ - /home/pblunsom/packages/include/boost/range/const_iterator.hpp \ - /home/pblunsom/packages/include/boost/type_traits/remove_const.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_const.hpp \ - /home/pblunsom/packages/include/boost/range/end.hpp \ - /home/pblunsom/packages/include/boost/range/detail/implementation_help.hpp \ - /home/pblunsom/packages/include/boost/range/detail/common.hpp \ - /home/pblunsom/packages/include/boost/range/detail/sfinae.hpp \ - /home/pblunsom/packages/include/boost/range/size.hpp \ - /home/pblunsom/packages/include/boost/range/difference_type.hpp \ - /home/pblunsom/packages/include/boost/range/distance.hpp \ - /home/pblunsom/packages/include/boost/range/empty.hpp \ - /home/pblunsom/packages/include/boost/range/rbegin.hpp \ - /home/pblunsom/packages/include/boost/range/reverse_iterator.hpp \ - /home/pblunsom/packages/include/boost/iterator/reverse_iterator.hpp \ - /home/pblunsom/packages/include/boost/iterator.hpp \ - /home/pblunsom/packages/include/boost/utility.hpp \ - /home/pblunsom/packages/include/boost/utility/addressof.hpp \ - /home/pblunsom/packages/include/boost/utility/base_from_member.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_binary_params.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple/rem.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_params.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/repeat_from_to.hpp \ - /home/pblunsom/packages/include/boost/utility/binary.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/deduce_d.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/cat.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/fold_left.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/seq.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/elem.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/size.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/transform.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/mod.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/detail/div_base.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison/less_equal.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/not.hpp \ - /home/pblunsom/packages/include/boost/next_prior.hpp \ - /home/pblunsom/packages/include/boost/noncopyable.hpp \ - /home/pblunsom/packages/include/boost/iterator/iterator_adaptor.hpp \ - /home/pblunsom/packages/include/boost/iterator/iterator_categories.hpp \ - /home/pblunsom/packages/include/boost/iterator/detail/config_def.hpp \ - /home/pblunsom/packages/include/boost/mpl/placeholders.hpp \ - /home/pblunsom/packages/include/boost/mpl/arg.hpp \ - /home/pblunsom/packages/include/boost/mpl/arg_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/na_assert.hpp \ - /home/pblunsom/packages/include/boost/mpl/assert.hpp \ - /home/pblunsom/packages/include/boost/mpl/not.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/yes_no.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/arrays.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/pp_counter.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/arity_spec.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/arg_typedef.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/arg.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/placeholders.hpp \ - /home/pblunsom/packages/include/boost/iterator/detail/config_undef.hpp \ - /home/pblunsom/packages/include/boost/iterator/iterator_facade.hpp \ - /home/pblunsom/packages/include/boost/iterator/interoperable.hpp \ - /home/pblunsom/packages/include/boost/mpl/or.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/or.hpp \ - /home/pblunsom/packages/include/boost/iterator/detail/facade_iterator_category.hpp \ - /home/pblunsom/packages/include/boost/detail/indirect_traits.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_function.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/false_result.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/is_function_ptr_helper.hpp \ - /home/pblunsom/packages/include/boost/type_traits/remove_reference.hpp \ - /home/pblunsom/packages/include/boost/type_traits/remove_pointer.hpp \ - /home/pblunsom/packages/include/boost/iterator/detail/enable_if.hpp \ - /home/pblunsom/packages/include/boost/implicit_cast.hpp \ - /home/pblunsom/packages/include/boost/type_traits/add_const.hpp \ - /home/pblunsom/packages/include/boost/type_traits/add_pointer.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_pod.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_scalar.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_enum.hpp \ - /home/pblunsom/packages/include/boost/mpl/always.hpp \ - /home/pblunsom/packages/include/boost/mpl/apply.hpp \ - /home/pblunsom/packages/include/boost/mpl/apply_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/apply_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/apply_wrap.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/has_apply.hpp \ - /home/pblunsom/packages/include/boost/mpl/has_xxx.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/type_wrapper.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/has_xxx.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/msvc_typename.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/has_apply.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/msvc_never_true.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/apply_wrap.hpp \ - /home/pblunsom/packages/include/boost/mpl/lambda.hpp \ - /home/pblunsom/packages/include/boost/mpl/bind.hpp \ - /home/pblunsom/packages/include/boost/mpl/bind_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/bind.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/bind_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/next.hpp \ - /home/pblunsom/packages/include/boost/mpl/next_prior.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/common_name_wknd.hpp \ - /home/pblunsom/packages/include/boost/mpl/protect.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/bind.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/full_lambda.hpp \ - /home/pblunsom/packages/include/boost/mpl/quote.hpp \ - /home/pblunsom/packages/include/boost/mpl/void.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/has_type.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/bcc.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/quote.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/template_arity.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/template_arity.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/full_lambda.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/apply.hpp \ - /home/pblunsom/packages/include/boost/range/rend.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/indirect_fun.hpp \ - /home/pblunsom/packages/include/boost/utility/result_of.hpp \ - /home/pblunsom/packages/include/boost/type.hpp \ - /home/pblunsom/packages/include/boost/preprocessor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/library.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/div.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/mul.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/data.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/elem.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/size.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/insert.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/push_back.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison/not_equal.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/pop_back.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/deduce_z.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/pop_front.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/push_front.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/remove.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/replace.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/reverse.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple/reverse.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison/equal.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison/greater.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison/less.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison/greater_equal.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/config/limits.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/expr_if.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/debug.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/debug/assert.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/debug/line.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration/iterate.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/slot/slot.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/slot/detail/def.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/facilities.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/facilities/apply.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/detail/is_unary.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/facilities/expand.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/facilities/intercept.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration/local.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration/self.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/append.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/at.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/rest_n.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/cat.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/enum.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/for_each_i.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/for.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/detail/for.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/filter.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/first_n.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/for_each.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/for_each_product.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/to_tuple.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple/to_list.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/size.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/transform.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/bitnor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/bitor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/bitxor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/nor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/or.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/xor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/punctuation.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/punctuation/paren.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/punctuation/paren_if.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/deduce_r.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_params_with_a_default.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_params_with_defaults.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_shifted.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_shifted_binary_params.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_shifted_params.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_trailing.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_trailing_binary_params.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_trailing_params.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/selection.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/selection/max.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/selection/min.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/enum.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/filter.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/first_n.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/detail/split.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/fold_right.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/reverse.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/for_each.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/for_each_i.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/for_each_product.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/insert.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/rest_n.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/pop_back.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/pop_front.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/push_back.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/push_front.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/remove.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/replace.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/subseq.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/to_array.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/to_tuple.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/slot.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple/to_seq.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration/detail/iter/forward1.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration/detail/bounds/lower1.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/slot/detail/shared.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration/detail/bounds/upper1.hpp \ - /home/pblunsom/packages/include/boost/utility/detail/result_of_iterate.hpp \ - /home/pblunsom/packages/include/boost/pointee.hpp \ - /home/pblunsom/packages/include/boost/detail/is_incrementable.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/void_ptr_iterator.hpp \ - corpus.hh /home/pblunsom/packages/include/boost/shared_ptr.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/shared_ptr.hpp \ - /home/pblunsom/packages/include/boost/config/no_tr1/memory.hpp \ - /home/pblunsom/packages/include/boost/throw_exception.hpp \ - /home/pblunsom/packages/include/boost/exception/detail/attribute_noreturn.hpp \ - /home/pblunsom/packages/include/boost/exception/exception.hpp \ - /home/pblunsom/packages/include/boost/current_function.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/shared_count.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/bad_weak_ptr.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_counted_base.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_has_sync.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp \ - /home/pblunsom/packages/include/boost/detail/sp_typeinfo.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_counted_impl.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_convertible.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/spinlock_pool.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/spinlock.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/spinlock_sync.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/yield_k.hpp \ - /home/pblunsom/packages/include/boost/memory_order.hpp contexts_lexer.h \ - ../../../decoder/dict.h \ - /home/pblunsom/packages/include/boost/functional/hash.hpp \ - /home/pblunsom/packages/include/boost/functional/hash/hash.hpp \ - /home/pblunsom/packages/include/boost/functional/hash/hash_fwd.hpp \ - /home/pblunsom/packages/include/boost/functional/hash/detail/hash_float.hpp \ - /home/pblunsom/packages/include/boost/functional/hash/detail/float_functions.hpp \ - /home/pblunsom/packages/include/boost/config/no_tr1/cmath.hpp \ - /home/pblunsom/packages/include/boost/functional/hash/detail/limits.hpp \ - /home/pblunsom/packages/include/boost/limits.hpp \ - /home/pblunsom/packages/include/boost/integer/static_log2.hpp \ - /home/pblunsom/packages/include/boost/integer_fwd.hpp \ - /home/pblunsom/packages/include/boost/cstdint.hpp \ - /home/pblunsom/packages/include/boost/functional/hash/detail/hash_float_generic.hpp \ - /home/pblunsom/packages/include/boost/functional/hash/extensions.hpp \ - /home/pblunsom/packages/include/boost/detail/container_fwd.hpp \ - ../../../decoder/wordid.h gzstream.hh \ - /home/pblunsom/packages/include/boost/tuple/tuple.hpp \ - /home/pblunsom/packages/include/boost/ref.hpp \ - /home/pblunsom/packages/include/boost/tuple/detail/tuple_basic.hpp \ - /home/pblunsom/packages/include/boost/type_traits/cv_traits.hpp \ - /home/pblunsom/packages/include/boost/type_traits/add_volatile.hpp \ - /home/pblunsom/packages/include/boost/type_traits/add_cv.hpp \ - /home/pblunsom/packages/include/boost/type_traits/remove_volatile.hpp \ - /home/pblunsom/packages/include/boost/type_traits/function_traits.hpp -contexts_lexer.o: contexts_lexer.cc contexts_lexer.h \ - ../../../decoder/dict.h \ - /home/pblunsom/packages/include/boost/functional/hash.hpp \ - /home/pblunsom/packages/include/boost/functional/hash/hash.hpp \ - /home/pblunsom/packages/include/boost/functional/hash/hash_fwd.hpp \ - /home/pblunsom/packages/include/boost/config.hpp \ - /home/pblunsom/packages/include/boost/config/user.hpp \ - /home/pblunsom/packages/include/boost/config/select_compiler_config.hpp \ - /home/pblunsom/packages/include/boost/config/compiler/gcc.hpp \ - /home/pblunsom/packages/include/boost/config/select_stdlib_config.hpp \ - /home/pblunsom/packages/include/boost/config/no_tr1/utility.hpp \ - /home/pblunsom/packages/include/boost/config/stdlib/libstdcpp3.hpp \ - /home/pblunsom/packages/include/boost/config/select_platform_config.hpp \ - /home/pblunsom/packages/include/boost/config/platform/linux.hpp \ - /home/pblunsom/packages/include/boost/config/posix_features.hpp \ - /home/pblunsom/packages/include/boost/config/suffix.hpp \ - /home/pblunsom/packages/include/boost/detail/workaround.hpp \ - /home/pblunsom/packages/include/boost/functional/hash/detail/hash_float.hpp \ - /home/pblunsom/packages/include/boost/functional/hash/detail/float_functions.hpp \ - /home/pblunsom/packages/include/boost/config/no_tr1/cmath.hpp \ - /home/pblunsom/packages/include/boost/functional/hash/detail/limits.hpp \ - /home/pblunsom/packages/include/boost/limits.hpp \ - /home/pblunsom/packages/include/boost/integer/static_log2.hpp \ - /home/pblunsom/packages/include/boost/integer_fwd.hpp \ - /home/pblunsom/packages/include/boost/cstdint.hpp \ - /home/pblunsom/packages/include/boost/assert.hpp \ - /home/pblunsom/packages/include/boost/functional/hash/detail/hash_float_generic.hpp \ - /home/pblunsom/packages/include/boost/functional/hash/extensions.hpp \ - /home/pblunsom/packages/include/boost/detail/container_fwd.hpp \ - ../../../decoder/wordid.h ../../../decoder/filelib.h \ - ../../../decoder/gzstream.h -corpus.o: corpus.cc corpus.hh \ - /home/pblunsom/packages/include/boost/shared_ptr.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/shared_ptr.hpp \ - /home/pblunsom/packages/include/boost/config.hpp \ - /home/pblunsom/packages/include/boost/config/user.hpp \ - /home/pblunsom/packages/include/boost/config/select_compiler_config.hpp \ - /home/pblunsom/packages/include/boost/config/compiler/gcc.hpp \ - /home/pblunsom/packages/include/boost/config/select_stdlib_config.hpp \ - /home/pblunsom/packages/include/boost/config/no_tr1/utility.hpp \ - /home/pblunsom/packages/include/boost/config/stdlib/libstdcpp3.hpp \ - /home/pblunsom/packages/include/boost/config/select_platform_config.hpp \ - /home/pblunsom/packages/include/boost/config/platform/linux.hpp \ - /home/pblunsom/packages/include/boost/config/posix_features.hpp \ - /home/pblunsom/packages/include/boost/config/suffix.hpp \ - /home/pblunsom/packages/include/boost/config/no_tr1/memory.hpp \ - /home/pblunsom/packages/include/boost/assert.hpp \ - /home/pblunsom/packages/include/boost/checked_delete.hpp \ - /home/pblunsom/packages/include/boost/throw_exception.hpp \ - /home/pblunsom/packages/include/boost/exception/detail/attribute_noreturn.hpp \ - /home/pblunsom/packages/include/boost/detail/workaround.hpp \ - /home/pblunsom/packages/include/boost/exception/exception.hpp \ - /home/pblunsom/packages/include/boost/current_function.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/shared_count.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/bad_weak_ptr.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_counted_base.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_has_sync.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp \ - /home/pblunsom/packages/include/boost/detail/sp_typeinfo.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_counted_impl.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_convertible.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/spinlock_pool.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/spinlock.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/spinlock_sync.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/yield_k.hpp \ - /home/pblunsom/packages/include/boost/memory_order.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/operator_bool.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/ptr_vector.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/ptr_sequence_adapter.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/reversible_ptr_container.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/throw_exception.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/scoped_deleter.hpp \ - /home/pblunsom/packages/include/boost/scoped_array.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/scoped_array.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/static_move_ptr.hpp \ - /home/pblunsom/packages/include/boost/compressed_pair.hpp \ - /home/pblunsom/packages/include/boost/detail/compressed_pair.hpp \ - /home/pblunsom/packages/include/boost/type_traits/remove_cv.hpp \ - /home/pblunsom/packages/include/boost/type_traits/broken_compiler_spec.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/lambda_support.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/lambda.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/ttp.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/msvc.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/gcc.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/workaround.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/ctps.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/cv_traits_impl.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/type_trait_def.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/template_arity_spec.hpp \ - /home/pblunsom/packages/include/boost/mpl/int.hpp \ - /home/pblunsom/packages/include/boost/mpl/int_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/adl_barrier.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/adl.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/intel.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/nttp_decl.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/nttp.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/integral_wrapper.hpp \ - /home/pblunsom/packages/include/boost/mpl/integral_c_tag.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/static_constant.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/static_cast.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/cat.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/config/config.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/template_arity_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessor/params.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/preprocessor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comma_if.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/punctuation/comma_if.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/if.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/iif.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/bool.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/facilities/empty.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/punctuation/comma.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repeat.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/repeat.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/debug/error.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/detail/auto_rec.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple/eat.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/inc.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/inc.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/overload_resolution.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/type_trait_undef.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_empty.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_convertible.hpp \ - /home/pblunsom/packages/include/boost/type_traits/intrinsics.hpp \ - /home/pblunsom/packages/include/boost/type_traits/config.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_same.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/bool_trait_def.hpp \ - /home/pblunsom/packages/include/boost/type_traits/integral_constant.hpp \ - /home/pblunsom/packages/include/boost/mpl/bool.hpp \ - /home/pblunsom/packages/include/boost/mpl/bool_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/integral_c.hpp \ - /home/pblunsom/packages/include/boost/mpl/integral_c_fwd.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/bool_trait_undef.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_reference.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_volatile.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/yes_no_type.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_array.hpp \ - /home/pblunsom/packages/include/boost/type_traits/add_reference.hpp \ - /home/pblunsom/packages/include/boost/type_traits/ice.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/ice_or.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/ice_and.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/ice_not.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/ice_eq.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_arithmetic.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_integral.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_float.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_void.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_abstract.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_class.hpp \ - /home/pblunsom/packages/include/boost/call_traits.hpp \ - /home/pblunsom/packages/include/boost/detail/call_traits.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_pointer.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_member_pointer.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_member_function_pointer.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/is_mem_fun_pointer_impl.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/default_deleter.hpp \ - /home/pblunsom/packages/include/boost/mpl/if.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/value_wknd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/integral.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/eti.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/na_spec.hpp \ - /home/pblunsom/packages/include/boost/mpl/lambda_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/void_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/na.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/na_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/lambda_arity_param.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/arity.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/dtp.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessor/enum.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessor/def_params_tail.hpp \ - /home/pblunsom/packages/include/boost/mpl/limits/arity.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/and.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/bitand.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/identity.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/facilities/identity.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/empty.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/add.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/dec.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/while.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/fold_left.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/detail/fold_left.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/expr_iif.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/adt.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/detail/is_binary.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/detail/check.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/compl.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/fold_right.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/detail/fold_right.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/reverse.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/detail/while.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple/elem.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/sub.hpp \ - /home/pblunsom/packages/include/boost/type_traits/remove_bounds.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/is_convertible.hpp \ - /home/pblunsom/packages/include/boost/mpl/and.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/use_preprocessed.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/nested_type_wknd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/include_preprocessed.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/compiler.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/stringize.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/and.hpp \ - /home/pblunsom/packages/include/boost/mpl/identity.hpp \ - /home/pblunsom/packages/include/boost/utility/enable_if.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/move.hpp \ - /home/pblunsom/packages/include/boost/static_assert.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/exception.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/clone_allocator.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/nullable.hpp \ - /home/pblunsom/packages/include/boost/mpl/eval_if.hpp \ - /home/pblunsom/packages/include/boost/range/functions.hpp \ - /home/pblunsom/packages/include/boost/range/begin.hpp \ - /home/pblunsom/packages/include/boost/range/config.hpp \ - /home/pblunsom/packages/include/boost/range/iterator.hpp \ - /home/pblunsom/packages/include/boost/range/mutable_iterator.hpp \ - /home/pblunsom/packages/include/boost/range/detail/extract_optional_type.hpp \ - /home/pblunsom/packages/include/boost/iterator/iterator_traits.hpp \ - /home/pblunsom/packages/include/boost/detail/iterator.hpp \ - /home/pblunsom/packages/include/boost/range/const_iterator.hpp \ - /home/pblunsom/packages/include/boost/type_traits/remove_const.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_const.hpp \ - /home/pblunsom/packages/include/boost/range/end.hpp \ - /home/pblunsom/packages/include/boost/range/detail/implementation_help.hpp \ - /home/pblunsom/packages/include/boost/range/detail/common.hpp \ - /home/pblunsom/packages/include/boost/range/detail/sfinae.hpp \ - /home/pblunsom/packages/include/boost/range/size.hpp \ - /home/pblunsom/packages/include/boost/range/difference_type.hpp \ - /home/pblunsom/packages/include/boost/range/distance.hpp \ - /home/pblunsom/packages/include/boost/range/empty.hpp \ - /home/pblunsom/packages/include/boost/range/rbegin.hpp \ - /home/pblunsom/packages/include/boost/range/reverse_iterator.hpp \ - /home/pblunsom/packages/include/boost/iterator/reverse_iterator.hpp \ - /home/pblunsom/packages/include/boost/iterator.hpp \ - /home/pblunsom/packages/include/boost/utility.hpp \ - /home/pblunsom/packages/include/boost/utility/addressof.hpp \ - /home/pblunsom/packages/include/boost/utility/base_from_member.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_binary_params.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple/rem.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_params.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/repeat_from_to.hpp \ - /home/pblunsom/packages/include/boost/utility/binary.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/deduce_d.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/cat.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/fold_left.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/seq.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/elem.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/size.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/transform.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/mod.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/detail/div_base.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison/less_equal.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/not.hpp \ - /home/pblunsom/packages/include/boost/next_prior.hpp \ - /home/pblunsom/packages/include/boost/noncopyable.hpp \ - /home/pblunsom/packages/include/boost/iterator/iterator_adaptor.hpp \ - /home/pblunsom/packages/include/boost/iterator/iterator_categories.hpp \ - /home/pblunsom/packages/include/boost/iterator/detail/config_def.hpp \ - /home/pblunsom/packages/include/boost/mpl/placeholders.hpp \ - /home/pblunsom/packages/include/boost/mpl/arg.hpp \ - /home/pblunsom/packages/include/boost/mpl/arg_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/na_assert.hpp \ - /home/pblunsom/packages/include/boost/mpl/assert.hpp \ - /home/pblunsom/packages/include/boost/mpl/not.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/yes_no.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/arrays.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/pp_counter.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/arity_spec.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/arg_typedef.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/arg.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/placeholders.hpp \ - /home/pblunsom/packages/include/boost/iterator/detail/config_undef.hpp \ - /home/pblunsom/packages/include/boost/iterator/iterator_facade.hpp \ - /home/pblunsom/packages/include/boost/iterator/interoperable.hpp \ - /home/pblunsom/packages/include/boost/mpl/or.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/or.hpp \ - /home/pblunsom/packages/include/boost/iterator/detail/facade_iterator_category.hpp \ - /home/pblunsom/packages/include/boost/detail/indirect_traits.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_function.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/false_result.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/is_function_ptr_helper.hpp \ - /home/pblunsom/packages/include/boost/type_traits/remove_reference.hpp \ - /home/pblunsom/packages/include/boost/type_traits/remove_pointer.hpp \ - /home/pblunsom/packages/include/boost/iterator/detail/enable_if.hpp \ - /home/pblunsom/packages/include/boost/implicit_cast.hpp \ - /home/pblunsom/packages/include/boost/type_traits/add_const.hpp \ - /home/pblunsom/packages/include/boost/type_traits/add_pointer.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_pod.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_scalar.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_enum.hpp \ - /home/pblunsom/packages/include/boost/mpl/always.hpp \ - /home/pblunsom/packages/include/boost/mpl/apply.hpp \ - /home/pblunsom/packages/include/boost/mpl/apply_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/apply_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/apply_wrap.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/has_apply.hpp \ - /home/pblunsom/packages/include/boost/mpl/has_xxx.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/type_wrapper.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/has_xxx.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/msvc_typename.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/has_apply.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/msvc_never_true.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/apply_wrap.hpp \ - /home/pblunsom/packages/include/boost/mpl/lambda.hpp \ - /home/pblunsom/packages/include/boost/mpl/bind.hpp \ - /home/pblunsom/packages/include/boost/mpl/bind_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/bind.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/bind_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/next.hpp \ - /home/pblunsom/packages/include/boost/mpl/next_prior.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/common_name_wknd.hpp \ - /home/pblunsom/packages/include/boost/mpl/protect.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/bind.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/full_lambda.hpp \ - /home/pblunsom/packages/include/boost/mpl/quote.hpp \ - /home/pblunsom/packages/include/boost/mpl/void.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/has_type.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/bcc.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/quote.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/template_arity.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/template_arity.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/full_lambda.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/apply.hpp \ - /home/pblunsom/packages/include/boost/range/rend.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/indirect_fun.hpp \ - /home/pblunsom/packages/include/boost/utility/result_of.hpp \ - /home/pblunsom/packages/include/boost/type.hpp \ - /home/pblunsom/packages/include/boost/preprocessor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/library.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/div.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/mul.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/data.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/elem.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/size.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/insert.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/push_back.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison/not_equal.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/pop_back.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/deduce_z.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/pop_front.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/push_front.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/remove.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/replace.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/reverse.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple/reverse.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison/equal.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison/greater.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison/less.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison/greater_equal.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/config/limits.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/expr_if.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/debug.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/debug/assert.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/debug/line.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration/iterate.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/slot/slot.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/slot/detail/def.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/facilities.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/facilities/apply.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/detail/is_unary.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/facilities/expand.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/facilities/intercept.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration/local.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration/self.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/append.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/at.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/rest_n.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/cat.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/enum.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/for_each_i.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/for.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/detail/for.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/filter.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/first_n.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/for_each.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/for_each_product.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/to_tuple.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple/to_list.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/size.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/transform.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/bitnor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/bitor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/bitxor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/nor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/or.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/xor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/punctuation.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/punctuation/paren.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/punctuation/paren_if.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/deduce_r.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_params_with_a_default.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_params_with_defaults.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_shifted.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_shifted_binary_params.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_shifted_params.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_trailing.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_trailing_binary_params.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_trailing_params.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/selection.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/selection/max.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/selection/min.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/enum.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/filter.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/first_n.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/detail/split.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/fold_right.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/reverse.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/for_each.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/for_each_i.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/for_each_product.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/insert.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/rest_n.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/pop_back.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/pop_front.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/push_back.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/push_front.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/remove.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/replace.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/subseq.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/to_array.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/to_tuple.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/slot.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple/to_seq.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration/detail/iter/forward1.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration/detail/bounds/lower1.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/slot/detail/shared.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration/detail/bounds/upper1.hpp \ - /home/pblunsom/packages/include/boost/utility/detail/result_of_iterate.hpp \ - /home/pblunsom/packages/include/boost/pointee.hpp \ - /home/pblunsom/packages/include/boost/detail/is_incrementable.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/void_ptr_iterator.hpp \ - gzstream.hh -gzstream.o: gzstream.cc gzstream.hh -mpi-pyp-topics.o: mpi-pyp-topics.cc \ - /home/pblunsom/packages/include/boost/mpi/communicator.hpp \ - /home/pblunsom/packages/include/boost/mpi/config.hpp \ - /home/pblunsom/packages/include/mpi.h \ - /home/pblunsom/packages/include/mpio.h \ - /home/pblunsom/packages/include/mpi.h \ - /home/pblunsom/packages/include/mpicxx.h \ - /home/pblunsom/packages/include/boost/config.hpp \ - /home/pblunsom/packages/include/boost/config/user.hpp \ - /home/pblunsom/packages/include/boost/config/select_compiler_config.hpp \ - /home/pblunsom/packages/include/boost/config/compiler/gcc.hpp \ - /home/pblunsom/packages/include/boost/config/select_stdlib_config.hpp \ - /home/pblunsom/packages/include/boost/config/no_tr1/utility.hpp \ - /home/pblunsom/packages/include/boost/config/stdlib/libstdcpp3.hpp \ - /home/pblunsom/packages/include/boost/config/select_platform_config.hpp \ - /home/pblunsom/packages/include/boost/config/platform/linux.hpp \ - /home/pblunsom/packages/include/boost/config/posix_features.hpp \ - /home/pblunsom/packages/include/boost/config/suffix.hpp \ - /home/pblunsom/packages/include/boost/config/auto_link.hpp \ - /home/pblunsom/packages/include/boost/mpi/exception.hpp \ - /home/pblunsom/packages/include/boost/throw_exception.hpp \ - /home/pblunsom/packages/include/boost/exception/detail/attribute_noreturn.hpp \ - /home/pblunsom/packages/include/boost/detail/workaround.hpp \ - /home/pblunsom/packages/include/boost/exception/exception.hpp \ - /home/pblunsom/packages/include/boost/current_function.hpp \ - /home/pblunsom/packages/include/boost/optional.hpp \ - /home/pblunsom/packages/include/boost/optional/optional.hpp \ - /home/pblunsom/packages/include/boost/assert.hpp \ - /home/pblunsom/packages/include/boost/type.hpp \ - /home/pblunsom/packages/include/boost/type_traits/alignment_of.hpp \ - /home/pblunsom/packages/include/boost/type_traits/intrinsics.hpp \ - /home/pblunsom/packages/include/boost/type_traits/config.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_same.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/bool_trait_def.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/template_arity_spec.hpp \ - /home/pblunsom/packages/include/boost/mpl/int.hpp \ - /home/pblunsom/packages/include/boost/mpl/int_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/adl_barrier.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/adl.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/msvc.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/intel.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/gcc.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/workaround.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/nttp_decl.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/nttp.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/integral_wrapper.hpp \ - /home/pblunsom/packages/include/boost/mpl/integral_c_tag.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/static_constant.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/static_cast.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/cat.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/config/config.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/template_arity_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessor/params.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/preprocessor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comma_if.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/punctuation/comma_if.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/if.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/iif.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/bool.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/facilities/empty.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/punctuation/comma.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repeat.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/repeat.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/debug/error.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/detail/auto_rec.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple/eat.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/inc.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/inc.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/lambda.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/ttp.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/ctps.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/overload_resolution.hpp \ - /home/pblunsom/packages/include/boost/type_traits/integral_constant.hpp \ - /home/pblunsom/packages/include/boost/mpl/bool.hpp \ - /home/pblunsom/packages/include/boost/mpl/bool_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/integral_c.hpp \ - /home/pblunsom/packages/include/boost/mpl/integral_c_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/lambda_support.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/bool_trait_undef.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_reference.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_volatile.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/cv_traits_impl.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/size_t_trait_def.hpp \ - /home/pblunsom/packages/include/boost/mpl/size_t.hpp \ - /home/pblunsom/packages/include/boost/mpl/size_t_fwd.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/size_t_trait_undef.hpp \ - /home/pblunsom/packages/include/boost/type_traits/type_with_alignment.hpp \ - /home/pblunsom/packages/include/boost/mpl/if.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/value_wknd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/integral.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/eti.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/na_spec.hpp \ - /home/pblunsom/packages/include/boost/mpl/lambda_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/void_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/na.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/na_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/lambda_arity_param.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/arity.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/dtp.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessor/enum.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessor/def_params_tail.hpp \ - /home/pblunsom/packages/include/boost/mpl/limits/arity.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/and.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/bitand.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/identity.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/facilities/identity.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/empty.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/add.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/dec.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/while.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/fold_left.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/detail/fold_left.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/expr_iif.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/adt.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/detail/is_binary.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/detail/check.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/compl.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/fold_right.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/detail/fold_right.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/reverse.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/detail/while.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple/elem.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/sub.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/for_each_i.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/for.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/detail/for.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple/rem.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple/to_list.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/transform.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/append.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_pod.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_void.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_scalar.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_arithmetic.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_integral.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_float.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/ice_or.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_enum.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_pointer.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_member_pointer.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_member_function_pointer.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/is_mem_fun_pointer_impl.hpp \ - /home/pblunsom/packages/include/boost/type_traits/remove_cv.hpp \ - /home/pblunsom/packages/include/boost/type_traits/broken_compiler_spec.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/type_trait_def.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/type_trait_undef.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/ice_and.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/ice_not.hpp \ - /home/pblunsom/packages/include/boost/static_assert.hpp \ - /home/pblunsom/packages/include/boost/type_traits/remove_reference.hpp \ - /home/pblunsom/packages/include/boost/mpl/not.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/nested_type_wknd.hpp \ - /home/pblunsom/packages/include/boost/detail/reference_content.hpp \ - /home/pblunsom/packages/include/boost/type_traits/has_nothrow_copy.hpp \ - /home/pblunsom/packages/include/boost/type_traits/has_trivial_copy.hpp \ - /home/pblunsom/packages/include/boost/mpl/void.hpp \ - /home/pblunsom/packages/include/boost/none.hpp \ - /home/pblunsom/packages/include/boost/none_t.hpp \ - /home/pblunsom/packages/include/boost/utility/compare_pointees.hpp \ - /home/pblunsom/packages/include/boost/optional/optional_fwd.hpp \ - /home/pblunsom/packages/include/boost/shared_ptr.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/shared_ptr.hpp \ - /home/pblunsom/packages/include/boost/config/no_tr1/memory.hpp \ - /home/pblunsom/packages/include/boost/checked_delete.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/shared_count.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/bad_weak_ptr.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_counted_base.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_has_sync.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp \ - /home/pblunsom/packages/include/boost/detail/sp_typeinfo.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_counted_impl.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_convertible.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/spinlock_pool.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/spinlock.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/spinlock_sync.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/yield_k.hpp \ - /home/pblunsom/packages/include/boost/memory_order.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/operator_bool.hpp \ - /home/pblunsom/packages/include/boost/mpi/datatype.hpp \ - /home/pblunsom/packages/include/boost/mpi/datatype_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/or.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/use_preprocessed.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/include_preprocessed.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/compiler.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/stringize.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/or.hpp \ - /home/pblunsom/packages/include/boost/mpl/and.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/and.hpp \ - /home/pblunsom/packages/include/boost/mpi/detail/mpi_datatype_cache.hpp \ - /home/pblunsom/packages/include/boost/mpi/detail/mpi_datatype_oarchive.hpp \ - /home/pblunsom/packages/include/boost/archive/detail/oserializer.hpp \ - /home/pblunsom/packages/include/boost/mpl/eval_if.hpp \ - /home/pblunsom/packages/include/boost/mpl/equal_to.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/comparison_op.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/numeric_op.hpp \ - /home/pblunsom/packages/include/boost/mpl/numeric_cast.hpp \ - /home/pblunsom/packages/include/boost/mpl/apply_wrap.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/has_apply.hpp \ - /home/pblunsom/packages/include/boost/mpl/has_xxx.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/type_wrapper.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/yes_no.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/arrays.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/has_xxx.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/msvc_typename.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/has_apply.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/msvc_never_true.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/apply_wrap.hpp \ - /home/pblunsom/packages/include/boost/mpl/tag.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/has_tag.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/numeric_cast_utils.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/forwarding.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/msvc_eti_base.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/is_msvc_eti_arg.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/equal_to.hpp \ - /home/pblunsom/packages/include/boost/mpl/greater_equal.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/greater_equal.hpp \ - /home/pblunsom/packages/include/boost/mpl/identity.hpp \ - /home/pblunsom/packages/include/boost/serialization/extended_type_info_typeid.hpp \ - /home/pblunsom/packages/include/boost/serialization/static_warning.hpp \ - /home/pblunsom/packages/include/boost/mpl/print.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_polymorphic.hpp \ - /home/pblunsom/packages/include/boost/type_traits/remove_const.hpp \ - /home/pblunsom/packages/include/boost/serialization/singleton.hpp \ - /home/pblunsom/packages/include/boost/noncopyable.hpp \ - /home/pblunsom/packages/include/boost/serialization/force_include.hpp \ - /home/pblunsom/packages/include/boost/serialization/extended_type_info.hpp \ - /home/pblunsom/packages/include/boost/serialization/config.hpp \ - /home/pblunsom/packages/include/boost/config/abi_prefix.hpp \ - /home/pblunsom/packages/include/boost/config/abi_suffix.hpp \ - /home/pblunsom/packages/include/boost/serialization/factory.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison/greater.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison/less.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison/less_equal.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/not.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison/not_equal.hpp \ - /home/pblunsom/packages/include/boost/serialization/access.hpp \ - /home/pblunsom/packages/include/boost/serialization/pfto.hpp \ - /home/pblunsom/packages/include/boost/serialization/throw_exception.hpp \ - /home/pblunsom/packages/include/boost/serialization/smart_cast.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_base_and_derived.hpp \ - /home/pblunsom/packages/include/boost/type_traits/remove_pointer.hpp \ - /home/pblunsom/packages/include/boost/serialization/assume_abstract.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_abstract.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_const.hpp \ - /home/pblunsom/packages/include/boost/type_traits/remove_extent.hpp \ - /home/pblunsom/packages/include/boost/serialization/serialization.hpp \ - /home/pblunsom/packages/include/boost/serialization/strong_typedef.hpp \ - /home/pblunsom/packages/include/boost/operators.hpp \ - /home/pblunsom/packages/include/boost/iterator.hpp \ - /home/pblunsom/packages/include/boost/serialization/nvp.hpp \ - /home/pblunsom/packages/include/boost/serialization/level.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_fundamental.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_array.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_class.hpp \ - /home/pblunsom/packages/include/boost/serialization/level_enum.hpp \ - /home/pblunsom/packages/include/boost/serialization/tracking.hpp \ - /home/pblunsom/packages/include/boost/mpl/greater.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/greater.hpp \ - /home/pblunsom/packages/include/boost/serialization/tracking_enum.hpp \ - /home/pblunsom/packages/include/boost/serialization/type_info_implementation.hpp \ - /home/pblunsom/packages/include/boost/serialization/traits.hpp \ - /home/pblunsom/packages/include/boost/serialization/split_member.hpp \ - /home/pblunsom/packages/include/boost/serialization/base_object.hpp \ - /home/pblunsom/packages/include/boost/serialization/void_cast_fwd.hpp \ - /home/pblunsom/packages/include/boost/serialization/wrapper.hpp \ - /home/pblunsom/packages/include/boost/serialization/version.hpp \ - /home/pblunsom/packages/include/boost/mpl/assert.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/pp_counter.hpp \ - /home/pblunsom/packages/include/boost/mpl/less.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/less.hpp \ - /home/pblunsom/packages/include/boost/mpl/comparison.hpp \ - /home/pblunsom/packages/include/boost/mpl/not_equal_to.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/not_equal_to.hpp \ - /home/pblunsom/packages/include/boost/mpl/less_equal.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/less_equal.hpp \ - /home/pblunsom/packages/include/boost/serialization/void_cast.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_virtual_base_of.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_base_of.hpp \ - /home/pblunsom/packages/include/boost/serialization/array.hpp \ - /home/pblunsom/packages/include/boost/mpl/always.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/arity_spec.hpp \ - /home/pblunsom/packages/include/boost/mpl/apply.hpp \ - /home/pblunsom/packages/include/boost/mpl/apply_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/apply_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/placeholders.hpp \ - /home/pblunsom/packages/include/boost/mpl/arg.hpp \ - /home/pblunsom/packages/include/boost/mpl/arg_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/na_assert.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/arg_typedef.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/arg.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/placeholders.hpp \ - /home/pblunsom/packages/include/boost/mpl/lambda.hpp \ - /home/pblunsom/packages/include/boost/mpl/bind.hpp \ - /home/pblunsom/packages/include/boost/mpl/bind_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/bind.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/bind_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/next.hpp \ - /home/pblunsom/packages/include/boost/mpl/next_prior.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/common_name_wknd.hpp \ - /home/pblunsom/packages/include/boost/mpl/protect.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/bind.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/full_lambda.hpp \ - /home/pblunsom/packages/include/boost/mpl/quote.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/has_type.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/bcc.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/quote.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/template_arity.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/template_arity.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/full_lambda.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/apply.hpp \ - /home/pblunsom/packages/include/boost/array.hpp \ - /home/pblunsom/packages/include/boost/swap.hpp \ - /home/pblunsom/packages/include/boost/utility/swap.hpp \ - /home/pblunsom/packages/include/boost/detail/iterator.hpp \ - /home/pblunsom/packages/include/boost/serialization/collection_size_type.hpp \ - /home/pblunsom/packages/include/boost/archive/archive_exception.hpp \ - /home/pblunsom/packages/include/boost/archive/detail/decl.hpp \ - /home/pblunsom/packages/include/boost/archive/detail/abi_prefix.hpp \ - /home/pblunsom/packages/include/boost/archive/detail/abi_suffix.hpp \ - /home/pblunsom/packages/include/boost/archive/detail/basic_oarchive.hpp \ - /home/pblunsom/packages/include/boost/archive/basic_archive.hpp \ - /home/pblunsom/packages/include/boost/cstdint.hpp \ - /home/pblunsom/packages/include/boost/integer_traits.hpp \ - /home/pblunsom/packages/include/boost/limits.hpp \ - /home/pblunsom/packages/include/boost/archive/detail/auto_link_archive.hpp \ - /home/pblunsom/packages/include/boost/archive/detail/basic_oserializer.hpp \ - /home/pblunsom/packages/include/boost/archive/detail/basic_serializer.hpp \ - /home/pblunsom/packages/include/boost/archive/detail/basic_pointer_oserializer.hpp \ - /home/pblunsom/packages/include/boost/archive/detail/archive_serializer_map.hpp \ - /home/pblunsom/packages/include/boost/archive/detail/check.hpp \ - /home/pblunsom/packages/include/boost/mpi/detail/ignore_skeleton_oarchive.hpp \ - /home/pblunsom/packages/include/boost/archive/detail/common_oarchive.hpp \ - /home/pblunsom/packages/include/boost/archive/detail/interface_oarchive.hpp \ - /home/pblunsom/packages/include/boost/mpi/detail/mpi_datatype_primitive.hpp \ - /home/pblunsom/packages/include/boost/serialization/detail/get_data.hpp \ - /home/pblunsom/packages/include/boost/integer.hpp \ - /home/pblunsom/packages/include/boost/integer_fwd.hpp \ - /home/pblunsom/packages/include/boost/archive/detail/register_archive.hpp \ - /home/pblunsom/packages/include/boost/utility/enable_if.hpp \ - /home/pblunsom/packages/include/boost/mpi/packed_oarchive.hpp \ - /home/pblunsom/packages/include/boost/archive/basic_binary_oarchive.hpp \ - /home/pblunsom/packages/include/boost/serialization/string.hpp \ - /home/pblunsom/packages/include/boost/mpi/detail/packed_oprimitive.hpp \ - /home/pblunsom/packages/include/boost/mpi/allocator.hpp \ - /home/pblunsom/packages/include/boost/mpi/detail/binary_buffer_oprimitive.hpp \ - /home/pblunsom/packages/include/boost/serialization/is_bitwise_serializable.hpp \ - /home/pblunsom/packages/include/boost/mpi/packed_iarchive.hpp \ - /home/pblunsom/packages/include/boost/archive/basic_binary_iarchive.hpp \ - /home/pblunsom/packages/include/boost/archive/detail/common_iarchive.hpp \ - /home/pblunsom/packages/include/boost/archive/detail/basic_iarchive.hpp \ - /home/pblunsom/packages/include/boost/archive/detail/basic_pointer_iserializer.hpp \ - /home/pblunsom/packages/include/boost/archive/detail/interface_iarchive.hpp \ - /home/pblunsom/packages/include/boost/archive/detail/iserializer.hpp \ - /home/pblunsom/packages/include/boost/detail/no_exceptions_support.hpp \ - /home/pblunsom/packages/include/boost/type_traits/has_new_operator.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/yes_no_type.hpp \ - /home/pblunsom/packages/include/boost/archive/detail/basic_iserializer.hpp \ - /home/pblunsom/packages/include/boost/archive/shared_ptr_helper.hpp \ - /home/pblunsom/packages/include/boost/serialization/shared_ptr_132.hpp \ - /home/pblunsom/packages/include/boost/serialization/split_free.hpp \ - /home/pblunsom/packages/include/boost/serialization/detail/shared_ptr_132.hpp \ - /home/pblunsom/packages/include/boost/serialization/detail/shared_count_132.hpp \ - /home/pblunsom/packages/include/boost/detail/lightweight_mutex.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/lightweight_mutex.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/lwm_pthreads.hpp \ - /home/pblunsom/packages/include/boost/mpi/detail/packed_iprimitive.hpp \ - /home/pblunsom/packages/include/boost/mpi/detail/binary_buffer_iprimitive.hpp \ - /home/pblunsom/packages/include/boost/mpi/skeleton_and_content_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpi/detail/point_to_point.hpp \ - /home/pblunsom/packages/include/boost/mpi/status.hpp \ - /home/pblunsom/packages/include/boost/mpi/request.hpp timing.h \ - clock_gettime_stub.c mpi-pyp-topics.hh \ - /home/pblunsom/packages/include/boost/ptr_container/ptr_vector.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/ptr_sequence_adapter.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/reversible_ptr_container.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/throw_exception.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/scoped_deleter.hpp \ - /home/pblunsom/packages/include/boost/scoped_array.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/scoped_array.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/static_move_ptr.hpp \ - /home/pblunsom/packages/include/boost/compressed_pair.hpp \ - /home/pblunsom/packages/include/boost/detail/compressed_pair.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_empty.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_convertible.hpp \ - /home/pblunsom/packages/include/boost/type_traits/add_reference.hpp \ - /home/pblunsom/packages/include/boost/type_traits/ice.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/ice_eq.hpp \ - /home/pblunsom/packages/include/boost/call_traits.hpp \ - /home/pblunsom/packages/include/boost/detail/call_traits.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/default_deleter.hpp \ - /home/pblunsom/packages/include/boost/type_traits/remove_bounds.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/is_convertible.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/move.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/exception.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/clone_allocator.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/nullable.hpp \ - /home/pblunsom/packages/include/boost/range/functions.hpp \ - /home/pblunsom/packages/include/boost/range/begin.hpp \ - /home/pblunsom/packages/include/boost/range/config.hpp \ - /home/pblunsom/packages/include/boost/range/iterator.hpp \ - /home/pblunsom/packages/include/boost/range/mutable_iterator.hpp \ - /home/pblunsom/packages/include/boost/range/detail/extract_optional_type.hpp \ - /home/pblunsom/packages/include/boost/iterator/iterator_traits.hpp \ - /home/pblunsom/packages/include/boost/range/const_iterator.hpp \ - /home/pblunsom/packages/include/boost/range/end.hpp \ - /home/pblunsom/packages/include/boost/range/detail/implementation_help.hpp \ - /home/pblunsom/packages/include/boost/range/detail/common.hpp \ - /home/pblunsom/packages/include/boost/range/detail/sfinae.hpp \ - /home/pblunsom/packages/include/boost/range/size.hpp \ - /home/pblunsom/packages/include/boost/range/difference_type.hpp \ - /home/pblunsom/packages/include/boost/range/distance.hpp \ - /home/pblunsom/packages/include/boost/range/empty.hpp \ - /home/pblunsom/packages/include/boost/range/rbegin.hpp \ - /home/pblunsom/packages/include/boost/range/reverse_iterator.hpp \ - /home/pblunsom/packages/include/boost/iterator/reverse_iterator.hpp \ - /home/pblunsom/packages/include/boost/utility.hpp \ - /home/pblunsom/packages/include/boost/utility/addressof.hpp \ - /home/pblunsom/packages/include/boost/utility/base_from_member.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_binary_params.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_params.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/repeat_from_to.hpp \ - /home/pblunsom/packages/include/boost/utility/binary.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/deduce_d.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/cat.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/fold_left.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/seq.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/elem.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/size.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/transform.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/mod.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/detail/div_base.hpp \ - /home/pblunsom/packages/include/boost/next_prior.hpp \ - /home/pblunsom/packages/include/boost/iterator/iterator_adaptor.hpp \ - /home/pblunsom/packages/include/boost/iterator/iterator_categories.hpp \ - /home/pblunsom/packages/include/boost/iterator/detail/config_def.hpp \ - /home/pblunsom/packages/include/boost/iterator/detail/config_undef.hpp \ - /home/pblunsom/packages/include/boost/iterator/iterator_facade.hpp \ - /home/pblunsom/packages/include/boost/iterator/interoperable.hpp \ - /home/pblunsom/packages/include/boost/iterator/detail/facade_iterator_category.hpp \ - /home/pblunsom/packages/include/boost/detail/indirect_traits.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_function.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/false_result.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/is_function_ptr_helper.hpp \ - /home/pblunsom/packages/include/boost/iterator/detail/enable_if.hpp \ - /home/pblunsom/packages/include/boost/implicit_cast.hpp \ - /home/pblunsom/packages/include/boost/type_traits/add_const.hpp \ - /home/pblunsom/packages/include/boost/type_traits/add_pointer.hpp \ - /home/pblunsom/packages/include/boost/range/rend.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/indirect_fun.hpp \ - /home/pblunsom/packages/include/boost/utility/result_of.hpp \ - /home/pblunsom/packages/include/boost/preprocessor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/library.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/div.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/mul.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/data.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/elem.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/size.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/insert.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/push_back.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/pop_back.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/deduce_z.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/pop_front.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/push_front.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/remove.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/replace.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/reverse.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple/reverse.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison/equal.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison/greater_equal.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/config/limits.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/expr_if.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/debug.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/debug/assert.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/debug/line.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration/iterate.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/slot/slot.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/slot/detail/def.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/facilities.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/facilities/apply.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/detail/is_unary.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/facilities/expand.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/facilities/intercept.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration/local.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration/self.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/at.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/rest_n.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/cat.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/enum.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/filter.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/first_n.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/for_each.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/for_each_product.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/to_tuple.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/size.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/bitnor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/bitor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/bitxor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/nor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/or.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/xor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/punctuation.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/punctuation/paren.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/punctuation/paren_if.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/deduce_r.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_params_with_a_default.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_params_with_defaults.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_shifted.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_shifted_binary_params.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_shifted_params.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_trailing.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_trailing_binary_params.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_trailing_params.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/selection.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/selection/max.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/selection/min.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/enum.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/filter.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/first_n.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/detail/split.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/fold_right.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/reverse.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/for_each.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/for_each_i.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/for_each_product.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/insert.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/rest_n.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/pop_back.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/pop_front.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/push_back.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/push_front.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/remove.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/replace.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/subseq.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/to_array.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/to_tuple.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/slot.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple/to_seq.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration/detail/iter/forward1.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration/detail/bounds/lower1.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/slot/detail/shared.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration/detail/bounds/upper1.hpp \ - /home/pblunsom/packages/include/boost/utility/detail/result_of_iterate.hpp \ - /home/pblunsom/packages/include/boost/pointee.hpp \ - /home/pblunsom/packages/include/boost/detail/is_incrementable.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/void_ptr_iterator.hpp \ - /home/pblunsom/packages/include/boost/random/uniform_real.hpp \ - /home/pblunsom/packages/include/boost/random/detail/config.hpp \ - /home/pblunsom/packages/include/boost/random/variate_generator.hpp \ - /home/pblunsom/packages/include/boost/random/uniform_01.hpp \ - /home/pblunsom/packages/include/boost/random/detail/pass_through_engine.hpp \ - /home/pblunsom/packages/include/boost/random/detail/ptr_helper.hpp \ - /home/pblunsom/packages/include/boost/random/detail/disable_warnings.hpp \ - /home/pblunsom/packages/include/boost/random/detail/enable_warnings.hpp \ - /home/pblunsom/packages/include/boost/random/detail/uniform_int_float.hpp \ - /home/pblunsom/packages/include/boost/random/mersenne_twister.hpp \ - /home/pblunsom/packages/include/boost/random/linear_congruential.hpp \ - /home/pblunsom/packages/include/boost/random/detail/const_mod.hpp \ - /home/pblunsom/packages/include/boost/random/detail/seed.hpp \ - /home/pblunsom/packages/include/boost/random/inversive_congruential.hpp \ - /home/pblunsom/packages/include/boost/random/lagged_fibonacci.hpp \ - /home/pblunsom/packages/include/boost/config/no_tr1/cmath.hpp \ - /home/pblunsom/packages/include/boost/mpi/environment.hpp mpi-pyp.hh \ - /home/pblunsom/packages/include/boost/tuple/tuple.hpp \ - /home/pblunsom/packages/include/boost/ref.hpp \ - /home/pblunsom/packages/include/boost/tuple/detail/tuple_basic.hpp \ - /home/pblunsom/packages/include/boost/type_traits/cv_traits.hpp \ - /home/pblunsom/packages/include/boost/type_traits/add_volatile.hpp \ - /home/pblunsom/packages/include/boost/type_traits/add_cv.hpp \ - /home/pblunsom/packages/include/boost/type_traits/remove_volatile.hpp \ - /home/pblunsom/packages/include/boost/type_traits/function_traits.hpp \ - /home/pblunsom/packages/include/boost/serialization/map.hpp \ - /home/pblunsom/packages/include/boost/serialization/utility.hpp \ - /home/pblunsom/packages/include/boost/serialization/collections_save_imp.hpp \ - /home/pblunsom/packages/include/boost/serialization/collections_load_imp.hpp \ - /home/pblunsom/packages/include/boost/serialization/detail/stack_constructor.hpp \ - /home/pblunsom/packages/include/boost/aligned_storage.hpp \ - /home/pblunsom/packages/include/boost/mpi.hpp \ - /home/pblunsom/packages/include/boost/mpi/collectives.hpp \ - /home/pblunsom/packages/include/boost/mpi/collectives/all_gather.hpp \ - /home/pblunsom/packages/include/boost/serialization/vector.hpp \ - /home/pblunsom/packages/include/boost/serialization/collection_traits.hpp \ - /home/pblunsom/packages/include/boost/mpi/collectives/broadcast.hpp \ - /home/pblunsom/packages/include/boost/mpi/collectives_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpi/collectives/gather.hpp \ - /home/pblunsom/packages/include/boost/mpi/collectives/all_reduce.hpp \ - /home/pblunsom/packages/include/boost/mpi/collectives/reduce.hpp \ - /home/pblunsom/packages/include/boost/mpi/detail/computation_tree.hpp \ - /home/pblunsom/packages/include/boost/mpi/operations.hpp \ - /home/pblunsom/packages/include/boost/mpi/collectives/all_to_all.hpp \ - /home/pblunsom/packages/include/boost/mpi/collectives/scatter.hpp \ - /home/pblunsom/packages/include/boost/mpi/collectives/scan.hpp \ - /home/pblunsom/packages/include/boost/mpi/graph_communicator.hpp \ - /home/pblunsom/packages/include/boost/graph/graph_traits.hpp \ - /home/pblunsom/packages/include/boost/pending/property.hpp \ - /home/pblunsom/packages/include/boost/pending/detail/property.hpp \ - /home/pblunsom/packages/include/boost/type_traits/same_traits.hpp \ - /home/pblunsom/packages/include/boost/graph/properties.hpp \ - /home/pblunsom/packages/include/boost/property_map/property_map.hpp \ - /home/pblunsom/packages/include/boost/pending/cstddef.hpp \ - /home/pblunsom/packages/include/boost/concept_check.hpp \ - /home/pblunsom/packages/include/boost/concept/assert.hpp \ - /home/pblunsom/packages/include/boost/concept/detail/general.hpp \ - /home/pblunsom/packages/include/boost/concept/detail/has_constraints.hpp \ - /home/pblunsom/packages/include/boost/type_traits/conversion_traits.hpp \ - /home/pblunsom/packages/include/boost/concept/usage.hpp \ - /home/pblunsom/packages/include/boost/concept/detail/concept_def.hpp \ - /home/pblunsom/packages/include/boost/concept/detail/concept_undef.hpp \ - /home/pblunsom/packages/include/boost/concept_archetype.hpp \ - /home/pblunsom/packages/include/boost/property_map/vector_property_map.hpp \ - /home/pblunsom/packages/include/boost/graph/property_maps/constant_property_map.hpp \ - /home/pblunsom/packages/include/boost/graph/property_maps/null_property_map.hpp \ - /home/pblunsom/packages/include/boost/iterator/counting_iterator.hpp \ - /home/pblunsom/packages/include/boost/detail/numeric_traits.hpp \ - /home/pblunsom/packages/include/boost/type_traits.hpp \ - /home/pblunsom/packages/include/boost/type_traits/has_nothrow_assign.hpp \ - /home/pblunsom/packages/include/boost/type_traits/has_trivial_assign.hpp \ - /home/pblunsom/packages/include/boost/type_traits/has_nothrow_constructor.hpp \ - /home/pblunsom/packages/include/boost/type_traits/has_trivial_constructor.hpp \ - /home/pblunsom/packages/include/boost/type_traits/has_nothrow_destructor.hpp \ - /home/pblunsom/packages/include/boost/type_traits/has_trivial_destructor.hpp \ - /home/pblunsom/packages/include/boost/type_traits/has_virtual_destructor.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_signed.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_unsigned.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_compound.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_floating_point.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_member_object_pointer.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_object.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_stateless.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_union.hpp \ - /home/pblunsom/packages/include/boost/type_traits/rank.hpp \ - /home/pblunsom/packages/include/boost/type_traits/extent.hpp \ - /home/pblunsom/packages/include/boost/type_traits/remove_all_extents.hpp \ - /home/pblunsom/packages/include/boost/type_traits/aligned_storage.hpp \ - /home/pblunsom/packages/include/boost/type_traits/floating_point_promotion.hpp \ - /home/pblunsom/packages/include/boost/type_traits/integral_promotion.hpp \ - /home/pblunsom/packages/include/boost/type_traits/promote.hpp \ - /home/pblunsom/packages/include/boost/type_traits/make_unsigned.hpp \ - /home/pblunsom/packages/include/boost/type_traits/make_signed.hpp \ - /home/pblunsom/packages/include/boost/type_traits/decay.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_complex.hpp \ - /home/pblunsom/packages/include/boost/detail/select_type.hpp \ - /home/pblunsom/packages/include/boost/graph/iteration_macros.hpp \ - /home/pblunsom/packages/include/boost/shared_array.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/shared_array.hpp \ - /home/pblunsom/packages/include/boost/mpi/group.hpp \ - /home/pblunsom/packages/include/boost/mpi/intercommunicator.hpp \ - /home/pblunsom/packages/include/boost/mpi/nonblocking.hpp \ - /home/pblunsom/packages/include/boost/mpi/skeleton_and_content.hpp \ - /home/pblunsom/packages/include/boost/mpi/detail/forward_skeleton_iarchive.hpp \ - /home/pblunsom/packages/include/boost/mpi/detail/forward_skeleton_oarchive.hpp \ - /home/pblunsom/packages/include/boost/mpi/detail/ignore_iprimitive.hpp \ - /home/pblunsom/packages/include/boost/mpi/detail/ignore_oprimitive.hpp \ - /home/pblunsom/packages/include/boost/mpi/detail/content_oarchive.hpp \ - /home/pblunsom/packages/include/boost/mpi/detail/broadcast_sc.hpp \ - /home/pblunsom/packages/include/boost/mpi/detail/communicator_sc.hpp \ - /home/pblunsom/packages/include/boost/mpi/timer.hpp pyp.hh \ - slice-sampler.h log_add.h mt19937ar.h corpus.hh -mpi-train-contexts.o: mpi-train-contexts.cc \ - /home/pblunsom/packages/include/boost/program_options/parsers.hpp \ - /home/pblunsom/packages/include/boost/program_options/config.hpp \ - /home/pblunsom/packages/include/boost/config.hpp \ - /home/pblunsom/packages/include/boost/config/user.hpp \ - /home/pblunsom/packages/include/boost/config/select_compiler_config.hpp \ - /home/pblunsom/packages/include/boost/config/compiler/gcc.hpp \ - /home/pblunsom/packages/include/boost/config/select_stdlib_config.hpp \ - /home/pblunsom/packages/include/boost/config/no_tr1/utility.hpp \ - /home/pblunsom/packages/include/boost/config/stdlib/libstdcpp3.hpp \ - /home/pblunsom/packages/include/boost/config/select_platform_config.hpp \ - /home/pblunsom/packages/include/boost/config/platform/linux.hpp \ - /home/pblunsom/packages/include/boost/config/posix_features.hpp \ - /home/pblunsom/packages/include/boost/config/suffix.hpp \ - /home/pblunsom/packages/include/boost/version.hpp \ - /home/pblunsom/packages/include/boost/config/auto_link.hpp \ - /home/pblunsom/packages/include/boost/program_options/option.hpp \ - /home/pblunsom/packages/include/boost/program_options/detail/cmdline.hpp \ - /home/pblunsom/packages/include/boost/program_options/errors.hpp \ - /home/pblunsom/packages/include/boost/program_options/cmdline.hpp \ - /home/pblunsom/packages/include/boost/program_options/options_description.hpp \ - /home/pblunsom/packages/include/boost/program_options/value_semantic.hpp \ - /home/pblunsom/packages/include/boost/any.hpp \ - /home/pblunsom/packages/include/boost/type_traits/remove_reference.hpp \ - /home/pblunsom/packages/include/boost/type_traits/broken_compiler_spec.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/lambda_support.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/lambda.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/ttp.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/msvc.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/gcc.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/workaround.hpp \ - /home/pblunsom/packages/include/boost/detail/workaround.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/ctps.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/type_trait_def.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/template_arity_spec.hpp \ - /home/pblunsom/packages/include/boost/mpl/int.hpp \ - /home/pblunsom/packages/include/boost/mpl/int_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/adl_barrier.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/adl.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/intel.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/nttp_decl.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/nttp.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/integral_wrapper.hpp \ - /home/pblunsom/packages/include/boost/mpl/integral_c_tag.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/static_constant.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/static_cast.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/cat.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/config/config.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/template_arity_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessor/params.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/preprocessor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comma_if.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/punctuation/comma_if.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/if.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/iif.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/bool.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/facilities/empty.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/punctuation/comma.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repeat.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/repeat.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/debug/error.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/detail/auto_rec.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple/eat.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/inc.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/inc.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/overload_resolution.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/type_trait_undef.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_reference.hpp \ - /home/pblunsom/packages/include/boost/type_traits/config.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/bool_trait_def.hpp \ - /home/pblunsom/packages/include/boost/type_traits/integral_constant.hpp \ - /home/pblunsom/packages/include/boost/mpl/bool.hpp \ - /home/pblunsom/packages/include/boost/mpl/bool_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/integral_c.hpp \ - /home/pblunsom/packages/include/boost/mpl/integral_c_fwd.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/bool_trait_undef.hpp \ - /home/pblunsom/packages/include/boost/throw_exception.hpp \ - /home/pblunsom/packages/include/boost/exception/detail/attribute_noreturn.hpp \ - /home/pblunsom/packages/include/boost/exception/exception.hpp \ - /home/pblunsom/packages/include/boost/current_function.hpp \ - /home/pblunsom/packages/include/boost/static_assert.hpp \ - /home/pblunsom/packages/include/boost/function/function1.hpp \ - /home/pblunsom/packages/include/boost/function/detail/maybe_include.hpp \ - /home/pblunsom/packages/include/boost/function/function_template.hpp \ - /home/pblunsom/packages/include/boost/function/detail/prologue.hpp \ - /home/pblunsom/packages/include/boost/config/no_tr1/functional.hpp \ - /home/pblunsom/packages/include/boost/function/function_base.hpp \ - /home/pblunsom/packages/include/boost/detail/sp_typeinfo.hpp \ - /home/pblunsom/packages/include/boost/assert.hpp \ - /home/pblunsom/packages/include/boost/integer.hpp \ - /home/pblunsom/packages/include/boost/integer_fwd.hpp \ - /home/pblunsom/packages/include/boost/limits.hpp \ - /home/pblunsom/packages/include/boost/cstdint.hpp \ - /home/pblunsom/packages/include/boost/integer_traits.hpp \ - /home/pblunsom/packages/include/boost/type_traits/has_trivial_copy.hpp \ - /home/pblunsom/packages/include/boost/type_traits/intrinsics.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_same.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_volatile.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/cv_traits_impl.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_pod.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_void.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_scalar.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_arithmetic.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_integral.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_float.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/ice_or.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_enum.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_pointer.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_member_pointer.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_member_function_pointer.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/is_mem_fun_pointer_impl.hpp \ - /home/pblunsom/packages/include/boost/type_traits/remove_cv.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/ice_and.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/ice_not.hpp \ - /home/pblunsom/packages/include/boost/type_traits/has_trivial_destructor.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_const.hpp \ - /home/pblunsom/packages/include/boost/type_traits/composite_traits.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_array.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_union.hpp \ - /home/pblunsom/packages/include/boost/type_traits/ice.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/yes_no_type.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/ice_eq.hpp \ - /home/pblunsom/packages/include/boost/ref.hpp \ - /home/pblunsom/packages/include/boost/utility/addressof.hpp \ - /home/pblunsom/packages/include/boost/mpl/if.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/value_wknd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/integral.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/eti.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/na_spec.hpp \ - /home/pblunsom/packages/include/boost/mpl/lambda_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/void_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/na.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/na_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/lambda_arity_param.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/arity.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/dtp.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessor/enum.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessor/def_params_tail.hpp \ - /home/pblunsom/packages/include/boost/mpl/limits/arity.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/and.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/bitand.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/identity.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/facilities/identity.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/empty.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/add.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/dec.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/while.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/fold_left.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/detail/fold_left.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/expr_iif.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/adt.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/detail/is_binary.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/detail/check.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/compl.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/fold_right.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/detail/fold_right.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/reverse.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/detail/while.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple/elem.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/sub.hpp \ - /home/pblunsom/packages/include/boost/type_traits/alignment_of.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/size_t_trait_def.hpp \ - /home/pblunsom/packages/include/boost/mpl/size_t.hpp \ - /home/pblunsom/packages/include/boost/mpl/size_t_fwd.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/size_t_trait_undef.hpp \ - /home/pblunsom/packages/include/boost/utility/enable_if.hpp \ - /home/pblunsom/packages/include/boost/function_equal.hpp \ - /home/pblunsom/packages/include/boost/function/function_fwd.hpp \ - /home/pblunsom/packages/include/boost/mem_fn.hpp \ - /home/pblunsom/packages/include/boost/bind/mem_fn.hpp \ - /home/pblunsom/packages/include/boost/get_pointer.hpp \ - /home/pblunsom/packages/include/boost/config/no_tr1/memory.hpp \ - /home/pblunsom/packages/include/boost/bind/mem_fn_template.hpp \ - /home/pblunsom/packages/include/boost/bind/mem_fn_cc.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/enum.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple/rem.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/enum_params.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_params.hpp \ - /home/pblunsom/packages/include/boost/detail/no_exceptions_support.hpp \ - /home/pblunsom/packages/include/boost/lexical_cast.hpp \ - /home/pblunsom/packages/include/boost/type_traits/make_unsigned.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_signed.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_unsigned.hpp \ - /home/pblunsom/packages/include/boost/type_traits/add_const.hpp \ - /home/pblunsom/packages/include/boost/type_traits/add_volatile.hpp \ - /home/pblunsom/packages/include/boost/call_traits.hpp \ - /home/pblunsom/packages/include/boost/detail/call_traits.hpp \ - /home/pblunsom/packages/include/boost/detail/lcast_precision.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_abstract.hpp \ - /home/pblunsom/packages/include/boost/program_options/detail/value_semantic.hpp \ - /home/pblunsom/packages/include/boost/function.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iterate.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration/iterate.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/elem.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/data.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/size.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/slot/slot.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/slot/detail/def.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration/detail/iter/forward1.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration/detail/bounds/lower1.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/slot/detail/shared.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration/detail/bounds/upper1.hpp \ - /home/pblunsom/packages/include/boost/function/detail/function_iterate.hpp \ - /home/pblunsom/packages/include/boost/shared_ptr.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/shared_ptr.hpp \ - /home/pblunsom/packages/include/boost/checked_delete.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/shared_count.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/bad_weak_ptr.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_counted_base.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_has_sync.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_counted_impl.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_convertible.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/spinlock_pool.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/spinlock.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/spinlock_sync.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/yield_k.hpp \ - /home/pblunsom/packages/include/boost/memory_order.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/operator_bool.hpp \ - /home/pblunsom/packages/include/boost/program_options/positional_options.hpp \ - /home/pblunsom/packages/include/boost/program_options/detail/parsers.hpp \ - /home/pblunsom/packages/include/boost/program_options/detail/convert.hpp \ - /home/pblunsom/packages/include/boost/program_options/variables_map.hpp \ - /home/pblunsom/packages/include/boost/scoped_ptr.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/scoped_ptr.hpp \ - /home/pblunsom/packages/include/boost/mpi/environment.hpp \ - /home/pblunsom/packages/include/boost/mpi/config.hpp \ - /home/pblunsom/packages/include/mpi.h \ - /home/pblunsom/packages/include/mpio.h \ - /home/pblunsom/packages/include/mpi.h \ - /home/pblunsom/packages/include/mpicxx.h \ - /home/pblunsom/packages/include/boost/noncopyable.hpp \ - /home/pblunsom/packages/include/boost/optional.hpp \ - /home/pblunsom/packages/include/boost/optional/optional.hpp \ - /home/pblunsom/packages/include/boost/type.hpp \ - /home/pblunsom/packages/include/boost/type_traits/type_with_alignment.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/for_each_i.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/for.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/detail/for.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple/to_list.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/transform.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/append.hpp \ - /home/pblunsom/packages/include/boost/mpl/not.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/nested_type_wknd.hpp \ - /home/pblunsom/packages/include/boost/detail/reference_content.hpp \ - /home/pblunsom/packages/include/boost/type_traits/has_nothrow_copy.hpp \ - /home/pblunsom/packages/include/boost/mpl/void.hpp \ - /home/pblunsom/packages/include/boost/none.hpp \ - /home/pblunsom/packages/include/boost/none_t.hpp \ - /home/pblunsom/packages/include/boost/utility/compare_pointees.hpp \ - /home/pblunsom/packages/include/boost/optional/optional_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpi/communicator.hpp \ - /home/pblunsom/packages/include/boost/mpi/exception.hpp \ - /home/pblunsom/packages/include/boost/mpi/datatype.hpp \ - /home/pblunsom/packages/include/boost/mpi/datatype_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/or.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/use_preprocessed.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/include_preprocessed.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/compiler.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/stringize.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/or.hpp \ - /home/pblunsom/packages/include/boost/mpl/and.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/and.hpp \ - /home/pblunsom/packages/include/boost/mpi/detail/mpi_datatype_cache.hpp \ - /home/pblunsom/packages/include/boost/mpi/detail/mpi_datatype_oarchive.hpp \ - /home/pblunsom/packages/include/boost/archive/detail/oserializer.hpp \ - /home/pblunsom/packages/include/boost/mpl/eval_if.hpp \ - /home/pblunsom/packages/include/boost/mpl/equal_to.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/comparison_op.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/numeric_op.hpp \ - /home/pblunsom/packages/include/boost/mpl/numeric_cast.hpp \ - /home/pblunsom/packages/include/boost/mpl/apply_wrap.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/has_apply.hpp \ - /home/pblunsom/packages/include/boost/mpl/has_xxx.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/type_wrapper.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/yes_no.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/arrays.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/has_xxx.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/msvc_typename.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/has_apply.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/msvc_never_true.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/apply_wrap.hpp \ - /home/pblunsom/packages/include/boost/mpl/tag.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/has_tag.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/numeric_cast_utils.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/forwarding.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/msvc_eti_base.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/is_msvc_eti_arg.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/equal_to.hpp \ - /home/pblunsom/packages/include/boost/mpl/greater_equal.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/greater_equal.hpp \ - /home/pblunsom/packages/include/boost/mpl/identity.hpp \ - /home/pblunsom/packages/include/boost/serialization/extended_type_info_typeid.hpp \ - /home/pblunsom/packages/include/boost/serialization/static_warning.hpp \ - /home/pblunsom/packages/include/boost/mpl/print.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_polymorphic.hpp \ - /home/pblunsom/packages/include/boost/type_traits/remove_const.hpp \ - /home/pblunsom/packages/include/boost/serialization/singleton.hpp \ - /home/pblunsom/packages/include/boost/serialization/force_include.hpp \ - /home/pblunsom/packages/include/boost/serialization/extended_type_info.hpp \ - /home/pblunsom/packages/include/boost/serialization/config.hpp \ - /home/pblunsom/packages/include/boost/config/abi_prefix.hpp \ - /home/pblunsom/packages/include/boost/config/abi_suffix.hpp \ - /home/pblunsom/packages/include/boost/serialization/factory.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison/greater.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison/less.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison/less_equal.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/not.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison/not_equal.hpp \ - /home/pblunsom/packages/include/boost/serialization/access.hpp \ - /home/pblunsom/packages/include/boost/serialization/pfto.hpp \ - /home/pblunsom/packages/include/boost/serialization/throw_exception.hpp \ - /home/pblunsom/packages/include/boost/serialization/smart_cast.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_base_and_derived.hpp \ - /home/pblunsom/packages/include/boost/type_traits/remove_pointer.hpp \ - /home/pblunsom/packages/include/boost/serialization/assume_abstract.hpp \ - /home/pblunsom/packages/include/boost/type_traits/remove_extent.hpp \ - /home/pblunsom/packages/include/boost/serialization/serialization.hpp \ - /home/pblunsom/packages/include/boost/serialization/strong_typedef.hpp \ - /home/pblunsom/packages/include/boost/operators.hpp \ - /home/pblunsom/packages/include/boost/iterator.hpp \ - /home/pblunsom/packages/include/boost/serialization/nvp.hpp \ - /home/pblunsom/packages/include/boost/serialization/level.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_fundamental.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_class.hpp \ - /home/pblunsom/packages/include/boost/serialization/level_enum.hpp \ - /home/pblunsom/packages/include/boost/serialization/tracking.hpp \ - /home/pblunsom/packages/include/boost/mpl/greater.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/greater.hpp \ - /home/pblunsom/packages/include/boost/serialization/tracking_enum.hpp \ - /home/pblunsom/packages/include/boost/serialization/type_info_implementation.hpp \ - /home/pblunsom/packages/include/boost/serialization/traits.hpp \ - /home/pblunsom/packages/include/boost/serialization/split_member.hpp \ - /home/pblunsom/packages/include/boost/serialization/base_object.hpp \ - /home/pblunsom/packages/include/boost/serialization/void_cast_fwd.hpp \ - /home/pblunsom/packages/include/boost/serialization/wrapper.hpp \ - /home/pblunsom/packages/include/boost/serialization/version.hpp \ - /home/pblunsom/packages/include/boost/mpl/assert.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/pp_counter.hpp \ - /home/pblunsom/packages/include/boost/mpl/less.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/less.hpp \ - /home/pblunsom/packages/include/boost/mpl/comparison.hpp \ - /home/pblunsom/packages/include/boost/mpl/not_equal_to.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/not_equal_to.hpp \ - /home/pblunsom/packages/include/boost/mpl/less_equal.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/less_equal.hpp \ - /home/pblunsom/packages/include/boost/serialization/void_cast.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_virtual_base_of.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_base_of.hpp \ - /home/pblunsom/packages/include/boost/serialization/array.hpp \ - /home/pblunsom/packages/include/boost/mpl/always.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/arity_spec.hpp \ - /home/pblunsom/packages/include/boost/mpl/apply.hpp \ - /home/pblunsom/packages/include/boost/mpl/apply_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/apply_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/placeholders.hpp \ - /home/pblunsom/packages/include/boost/mpl/arg.hpp \ - /home/pblunsom/packages/include/boost/mpl/arg_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/na_assert.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/arg_typedef.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/arg.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/placeholders.hpp \ - /home/pblunsom/packages/include/boost/mpl/lambda.hpp \ - /home/pblunsom/packages/include/boost/mpl/bind.hpp \ - /home/pblunsom/packages/include/boost/mpl/bind_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/bind.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/bind_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/next.hpp \ - /home/pblunsom/packages/include/boost/mpl/next_prior.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/common_name_wknd.hpp \ - /home/pblunsom/packages/include/boost/mpl/protect.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/bind.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/full_lambda.hpp \ - /home/pblunsom/packages/include/boost/mpl/quote.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/has_type.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/bcc.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/quote.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/template_arity.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/template_arity.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/full_lambda.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/apply.hpp \ - /home/pblunsom/packages/include/boost/array.hpp \ - /home/pblunsom/packages/include/boost/swap.hpp \ - /home/pblunsom/packages/include/boost/utility/swap.hpp \ - /home/pblunsom/packages/include/boost/detail/iterator.hpp \ - /home/pblunsom/packages/include/boost/serialization/collection_size_type.hpp \ - /home/pblunsom/packages/include/boost/archive/archive_exception.hpp \ - /home/pblunsom/packages/include/boost/archive/detail/decl.hpp \ - /home/pblunsom/packages/include/boost/archive/detail/abi_prefix.hpp \ - /home/pblunsom/packages/include/boost/archive/detail/abi_suffix.hpp \ - /home/pblunsom/packages/include/boost/archive/detail/basic_oarchive.hpp \ - /home/pblunsom/packages/include/boost/archive/basic_archive.hpp \ - /home/pblunsom/packages/include/boost/archive/detail/auto_link_archive.hpp \ - /home/pblunsom/packages/include/boost/archive/detail/basic_oserializer.hpp \ - /home/pblunsom/packages/include/boost/archive/detail/basic_serializer.hpp \ - /home/pblunsom/packages/include/boost/archive/detail/basic_pointer_oserializer.hpp \ - /home/pblunsom/packages/include/boost/archive/detail/archive_serializer_map.hpp \ - /home/pblunsom/packages/include/boost/archive/detail/check.hpp \ - /home/pblunsom/packages/include/boost/mpi/detail/ignore_skeleton_oarchive.hpp \ - /home/pblunsom/packages/include/boost/archive/detail/common_oarchive.hpp \ - /home/pblunsom/packages/include/boost/archive/detail/interface_oarchive.hpp \ - /home/pblunsom/packages/include/boost/mpi/detail/mpi_datatype_primitive.hpp \ - /home/pblunsom/packages/include/boost/serialization/detail/get_data.hpp \ - /home/pblunsom/packages/include/boost/archive/detail/register_archive.hpp \ - /home/pblunsom/packages/include/boost/mpi/packed_oarchive.hpp \ - /home/pblunsom/packages/include/boost/archive/basic_binary_oarchive.hpp \ - /home/pblunsom/packages/include/boost/serialization/string.hpp \ - /home/pblunsom/packages/include/boost/mpi/detail/packed_oprimitive.hpp \ - /home/pblunsom/packages/include/boost/mpi/allocator.hpp \ - /home/pblunsom/packages/include/boost/mpi/detail/binary_buffer_oprimitive.hpp \ - /home/pblunsom/packages/include/boost/serialization/is_bitwise_serializable.hpp \ - /home/pblunsom/packages/include/boost/mpi/packed_iarchive.hpp \ - /home/pblunsom/packages/include/boost/archive/basic_binary_iarchive.hpp \ - /home/pblunsom/packages/include/boost/archive/detail/common_iarchive.hpp \ - /home/pblunsom/packages/include/boost/archive/detail/basic_iarchive.hpp \ - /home/pblunsom/packages/include/boost/archive/detail/basic_pointer_iserializer.hpp \ - /home/pblunsom/packages/include/boost/archive/detail/interface_iarchive.hpp \ - /home/pblunsom/packages/include/boost/archive/detail/iserializer.hpp \ - /home/pblunsom/packages/include/boost/type_traits/has_new_operator.hpp \ - /home/pblunsom/packages/include/boost/archive/detail/basic_iserializer.hpp \ - /home/pblunsom/packages/include/boost/archive/shared_ptr_helper.hpp \ - /home/pblunsom/packages/include/boost/serialization/shared_ptr_132.hpp \ - /home/pblunsom/packages/include/boost/serialization/split_free.hpp \ - /home/pblunsom/packages/include/boost/serialization/detail/shared_ptr_132.hpp \ - /home/pblunsom/packages/include/boost/serialization/detail/shared_count_132.hpp \ - /home/pblunsom/packages/include/boost/detail/lightweight_mutex.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/lightweight_mutex.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/lwm_pthreads.hpp \ - /home/pblunsom/packages/include/boost/mpi/detail/packed_iprimitive.hpp \ - /home/pblunsom/packages/include/boost/mpi/detail/binary_buffer_iprimitive.hpp \ - /home/pblunsom/packages/include/boost/mpi/skeleton_and_content_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpi/detail/point_to_point.hpp \ - /home/pblunsom/packages/include/boost/mpi/status.hpp \ - /home/pblunsom/packages/include/boost/mpi/request.hpp mpi-pyp-topics.hh \ - /home/pblunsom/packages/include/boost/ptr_container/ptr_vector.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/ptr_sequence_adapter.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/reversible_ptr_container.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/throw_exception.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/scoped_deleter.hpp \ - /home/pblunsom/packages/include/boost/scoped_array.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/scoped_array.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/static_move_ptr.hpp \ - /home/pblunsom/packages/include/boost/compressed_pair.hpp \ - /home/pblunsom/packages/include/boost/detail/compressed_pair.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_empty.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_convertible.hpp \ - /home/pblunsom/packages/include/boost/type_traits/add_reference.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/default_deleter.hpp \ - /home/pblunsom/packages/include/boost/type_traits/remove_bounds.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/is_convertible.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/move.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/exception.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/clone_allocator.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/nullable.hpp \ - /home/pblunsom/packages/include/boost/range/functions.hpp \ - /home/pblunsom/packages/include/boost/range/begin.hpp \ - /home/pblunsom/packages/include/boost/range/config.hpp \ - /home/pblunsom/packages/include/boost/range/iterator.hpp \ - /home/pblunsom/packages/include/boost/range/mutable_iterator.hpp \ - /home/pblunsom/packages/include/boost/range/detail/extract_optional_type.hpp \ - /home/pblunsom/packages/include/boost/iterator/iterator_traits.hpp \ - /home/pblunsom/packages/include/boost/range/const_iterator.hpp \ - /home/pblunsom/packages/include/boost/range/end.hpp \ - /home/pblunsom/packages/include/boost/range/detail/implementation_help.hpp \ - /home/pblunsom/packages/include/boost/range/detail/common.hpp \ - /home/pblunsom/packages/include/boost/range/detail/sfinae.hpp \ - /home/pblunsom/packages/include/boost/range/size.hpp \ - /home/pblunsom/packages/include/boost/range/difference_type.hpp \ - /home/pblunsom/packages/include/boost/range/distance.hpp \ - /home/pblunsom/packages/include/boost/range/empty.hpp \ - /home/pblunsom/packages/include/boost/range/rbegin.hpp \ - /home/pblunsom/packages/include/boost/range/reverse_iterator.hpp \ - /home/pblunsom/packages/include/boost/iterator/reverse_iterator.hpp \ - /home/pblunsom/packages/include/boost/utility.hpp \ - /home/pblunsom/packages/include/boost/utility/base_from_member.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_binary_params.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/repeat_from_to.hpp \ - /home/pblunsom/packages/include/boost/utility/binary.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/deduce_d.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/cat.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/fold_left.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/seq.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/elem.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/size.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/transform.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/mod.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/detail/div_base.hpp \ - /home/pblunsom/packages/include/boost/next_prior.hpp \ - /home/pblunsom/packages/include/boost/iterator/iterator_adaptor.hpp \ - /home/pblunsom/packages/include/boost/iterator/iterator_categories.hpp \ - /home/pblunsom/packages/include/boost/iterator/detail/config_def.hpp \ - /home/pblunsom/packages/include/boost/iterator/detail/config_undef.hpp \ - /home/pblunsom/packages/include/boost/iterator/iterator_facade.hpp \ - /home/pblunsom/packages/include/boost/iterator/interoperable.hpp \ - /home/pblunsom/packages/include/boost/iterator/detail/facade_iterator_category.hpp \ - /home/pblunsom/packages/include/boost/detail/indirect_traits.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_function.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/false_result.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/is_function_ptr_helper.hpp \ - /home/pblunsom/packages/include/boost/iterator/detail/enable_if.hpp \ - /home/pblunsom/packages/include/boost/implicit_cast.hpp \ - /home/pblunsom/packages/include/boost/type_traits/add_pointer.hpp \ - /home/pblunsom/packages/include/boost/range/rend.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/indirect_fun.hpp \ - /home/pblunsom/packages/include/boost/utility/result_of.hpp \ - /home/pblunsom/packages/include/boost/preprocessor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/library.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/div.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/mul.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/insert.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/push_back.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/pop_back.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/deduce_z.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/pop_front.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/push_front.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/remove.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/replace.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/reverse.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple/reverse.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison/equal.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison/greater_equal.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/config/limits.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/expr_if.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/debug.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/debug/assert.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/debug/line.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/facilities.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/facilities/apply.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/detail/is_unary.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/facilities/expand.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/facilities/intercept.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration/local.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration/self.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/at.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/rest_n.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/cat.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/enum.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/filter.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/first_n.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/for_each.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/for_each_product.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/to_tuple.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/size.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/bitnor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/bitor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/bitxor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/nor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/or.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/xor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/punctuation.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/punctuation/paren.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/punctuation/paren_if.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/deduce_r.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_params_with_a_default.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_params_with_defaults.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_shifted.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_shifted_binary_params.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_shifted_params.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_trailing.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_trailing_binary_params.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_trailing_params.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/selection.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/selection/max.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/selection/min.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/enum.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/filter.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/first_n.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/detail/split.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/fold_right.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/reverse.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/for_each.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/for_each_i.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/for_each_product.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/insert.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/rest_n.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/pop_back.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/pop_front.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/push_back.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/push_front.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/remove.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/replace.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/subseq.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/to_array.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/to_tuple.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/slot.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple/to_seq.hpp \ - /home/pblunsom/packages/include/boost/utility/detail/result_of_iterate.hpp \ - /home/pblunsom/packages/include/boost/pointee.hpp \ - /home/pblunsom/packages/include/boost/detail/is_incrementable.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/void_ptr_iterator.hpp \ - /home/pblunsom/packages/include/boost/random/uniform_real.hpp \ - /home/pblunsom/packages/include/boost/random/detail/config.hpp \ - /home/pblunsom/packages/include/boost/random/variate_generator.hpp \ - /home/pblunsom/packages/include/boost/random/uniform_01.hpp \ - /home/pblunsom/packages/include/boost/random/detail/pass_through_engine.hpp \ - /home/pblunsom/packages/include/boost/random/detail/ptr_helper.hpp \ - /home/pblunsom/packages/include/boost/random/detail/disable_warnings.hpp \ - /home/pblunsom/packages/include/boost/random/detail/enable_warnings.hpp \ - /home/pblunsom/packages/include/boost/random/detail/uniform_int_float.hpp \ - /home/pblunsom/packages/include/boost/random/mersenne_twister.hpp \ - /home/pblunsom/packages/include/boost/random/linear_congruential.hpp \ - /home/pblunsom/packages/include/boost/random/detail/const_mod.hpp \ - /home/pblunsom/packages/include/boost/random/detail/seed.hpp \ - /home/pblunsom/packages/include/boost/random/inversive_congruential.hpp \ - /home/pblunsom/packages/include/boost/random/lagged_fibonacci.hpp \ - /home/pblunsom/packages/include/boost/config/no_tr1/cmath.hpp mpi-pyp.hh \ - /home/pblunsom/packages/include/boost/tuple/tuple.hpp \ - /home/pblunsom/packages/include/boost/tuple/detail/tuple_basic.hpp \ - /home/pblunsom/packages/include/boost/type_traits/cv_traits.hpp \ - /home/pblunsom/packages/include/boost/type_traits/add_cv.hpp \ - /home/pblunsom/packages/include/boost/type_traits/remove_volatile.hpp \ - /home/pblunsom/packages/include/boost/type_traits/function_traits.hpp \ - /home/pblunsom/packages/include/boost/serialization/map.hpp \ - /home/pblunsom/packages/include/boost/serialization/utility.hpp \ - /home/pblunsom/packages/include/boost/serialization/collections_save_imp.hpp \ - /home/pblunsom/packages/include/boost/serialization/collections_load_imp.hpp \ - /home/pblunsom/packages/include/boost/serialization/detail/stack_constructor.hpp \ - /home/pblunsom/packages/include/boost/aligned_storage.hpp \ - /home/pblunsom/packages/include/boost/mpi.hpp \ - /home/pblunsom/packages/include/boost/mpi/collectives.hpp \ - /home/pblunsom/packages/include/boost/mpi/collectives/all_gather.hpp \ - /home/pblunsom/packages/include/boost/serialization/vector.hpp \ - /home/pblunsom/packages/include/boost/serialization/collection_traits.hpp \ - /home/pblunsom/packages/include/boost/mpi/collectives/broadcast.hpp \ - /home/pblunsom/packages/include/boost/mpi/collectives_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpi/collectives/gather.hpp \ - /home/pblunsom/packages/include/boost/mpi/collectives/all_reduce.hpp \ - /home/pblunsom/packages/include/boost/mpi/collectives/reduce.hpp \ - /home/pblunsom/packages/include/boost/mpi/detail/computation_tree.hpp \ - /home/pblunsom/packages/include/boost/mpi/operations.hpp \ - /home/pblunsom/packages/include/boost/mpi/collectives/all_to_all.hpp \ - /home/pblunsom/packages/include/boost/mpi/collectives/scatter.hpp \ - /home/pblunsom/packages/include/boost/mpi/collectives/scan.hpp \ - /home/pblunsom/packages/include/boost/mpi/graph_communicator.hpp \ - /home/pblunsom/packages/include/boost/graph/graph_traits.hpp \ - /home/pblunsom/packages/include/boost/pending/property.hpp \ - /home/pblunsom/packages/include/boost/pending/detail/property.hpp \ - /home/pblunsom/packages/include/boost/type_traits/same_traits.hpp \ - /home/pblunsom/packages/include/boost/graph/properties.hpp \ - /home/pblunsom/packages/include/boost/property_map/property_map.hpp \ - /home/pblunsom/packages/include/boost/pending/cstddef.hpp \ - /home/pblunsom/packages/include/boost/concept_check.hpp \ - /home/pblunsom/packages/include/boost/concept/assert.hpp \ - /home/pblunsom/packages/include/boost/concept/detail/general.hpp \ - /home/pblunsom/packages/include/boost/concept/detail/has_constraints.hpp \ - /home/pblunsom/packages/include/boost/type_traits/conversion_traits.hpp \ - /home/pblunsom/packages/include/boost/concept/usage.hpp \ - /home/pblunsom/packages/include/boost/concept/detail/concept_def.hpp \ - /home/pblunsom/packages/include/boost/concept/detail/concept_undef.hpp \ - /home/pblunsom/packages/include/boost/concept_archetype.hpp \ - /home/pblunsom/packages/include/boost/property_map/vector_property_map.hpp \ - /home/pblunsom/packages/include/boost/graph/property_maps/constant_property_map.hpp \ - /home/pblunsom/packages/include/boost/graph/property_maps/null_property_map.hpp \ - /home/pblunsom/packages/include/boost/iterator/counting_iterator.hpp \ - /home/pblunsom/packages/include/boost/detail/numeric_traits.hpp \ - /home/pblunsom/packages/include/boost/type_traits.hpp \ - /home/pblunsom/packages/include/boost/type_traits/has_nothrow_assign.hpp \ - /home/pblunsom/packages/include/boost/type_traits/has_trivial_assign.hpp \ - /home/pblunsom/packages/include/boost/type_traits/has_nothrow_constructor.hpp \ - /home/pblunsom/packages/include/boost/type_traits/has_trivial_constructor.hpp \ - /home/pblunsom/packages/include/boost/type_traits/has_nothrow_destructor.hpp \ - /home/pblunsom/packages/include/boost/type_traits/has_virtual_destructor.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_compound.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_floating_point.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_member_object_pointer.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_object.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_stateless.hpp \ - /home/pblunsom/packages/include/boost/type_traits/rank.hpp \ - /home/pblunsom/packages/include/boost/type_traits/extent.hpp \ - /home/pblunsom/packages/include/boost/type_traits/remove_all_extents.hpp \ - /home/pblunsom/packages/include/boost/type_traits/aligned_storage.hpp \ - /home/pblunsom/packages/include/boost/type_traits/floating_point_promotion.hpp \ - /home/pblunsom/packages/include/boost/type_traits/integral_promotion.hpp \ - /home/pblunsom/packages/include/boost/type_traits/promote.hpp \ - /home/pblunsom/packages/include/boost/type_traits/make_signed.hpp \ - /home/pblunsom/packages/include/boost/type_traits/decay.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_complex.hpp \ - /home/pblunsom/packages/include/boost/detail/select_type.hpp \ - /home/pblunsom/packages/include/boost/graph/iteration_macros.hpp \ - /home/pblunsom/packages/include/boost/shared_array.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/shared_array.hpp \ - /home/pblunsom/packages/include/boost/mpi/group.hpp \ - /home/pblunsom/packages/include/boost/mpi/intercommunicator.hpp \ - /home/pblunsom/packages/include/boost/mpi/nonblocking.hpp \ - /home/pblunsom/packages/include/boost/mpi/skeleton_and_content.hpp \ - /home/pblunsom/packages/include/boost/mpi/detail/forward_skeleton_iarchive.hpp \ - /home/pblunsom/packages/include/boost/mpi/detail/forward_skeleton_oarchive.hpp \ - /home/pblunsom/packages/include/boost/mpi/detail/ignore_iprimitive.hpp \ - /home/pblunsom/packages/include/boost/mpi/detail/ignore_oprimitive.hpp \ - /home/pblunsom/packages/include/boost/mpi/detail/content_oarchive.hpp \ - /home/pblunsom/packages/include/boost/mpi/detail/broadcast_sc.hpp \ - /home/pblunsom/packages/include/boost/mpi/detail/communicator_sc.hpp \ - /home/pblunsom/packages/include/boost/mpi/timer.hpp pyp.hh \ - slice-sampler.h log_add.h mt19937ar.h corpus.hh mpi-corpus.hh \ - contexts_corpus.hh contexts_lexer.h ../../../decoder/dict.h \ - /home/pblunsom/packages/include/boost/functional/hash.hpp \ - /home/pblunsom/packages/include/boost/functional/hash/hash.hpp \ - /home/pblunsom/packages/include/boost/functional/hash/hash_fwd.hpp \ - /home/pblunsom/packages/include/boost/functional/hash/detail/hash_float.hpp \ - /home/pblunsom/packages/include/boost/functional/hash/detail/float_functions.hpp \ - /home/pblunsom/packages/include/boost/functional/hash/detail/limits.hpp \ - /home/pblunsom/packages/include/boost/integer/static_log2.hpp \ - /home/pblunsom/packages/include/boost/functional/hash/detail/hash_float_generic.hpp \ - /home/pblunsom/packages/include/boost/functional/hash/extensions.hpp \ - /home/pblunsom/packages/include/boost/detail/container_fwd.hpp \ - ../../../decoder/wordid.h gzstream.hh -pyp-topics.o: pyp-topics.cc timing.h clock_gettime_stub.c pyp-topics.hh \ - /home/pblunsom/packages/include/boost/ptr_container/ptr_vector.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/ptr_sequence_adapter.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/reversible_ptr_container.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/throw_exception.hpp \ - /home/pblunsom/packages/include/boost/assert.hpp \ - /home/pblunsom/packages/include/boost/config.hpp \ - /home/pblunsom/packages/include/boost/config/user.hpp \ - /home/pblunsom/packages/include/boost/config/select_compiler_config.hpp \ - /home/pblunsom/packages/include/boost/config/compiler/gcc.hpp \ - /home/pblunsom/packages/include/boost/config/select_stdlib_config.hpp \ - /home/pblunsom/packages/include/boost/config/no_tr1/utility.hpp \ - /home/pblunsom/packages/include/boost/config/stdlib/libstdcpp3.hpp \ - /home/pblunsom/packages/include/boost/config/select_platform_config.hpp \ - /home/pblunsom/packages/include/boost/config/platform/linux.hpp \ - /home/pblunsom/packages/include/boost/config/posix_features.hpp \ - /home/pblunsom/packages/include/boost/config/suffix.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/scoped_deleter.hpp \ - /home/pblunsom/packages/include/boost/scoped_array.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/scoped_array.hpp \ - /home/pblunsom/packages/include/boost/checked_delete.hpp \ - /home/pblunsom/packages/include/boost/detail/workaround.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/operator_bool.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/static_move_ptr.hpp \ - /home/pblunsom/packages/include/boost/compressed_pair.hpp \ - /home/pblunsom/packages/include/boost/detail/compressed_pair.hpp \ - /home/pblunsom/packages/include/boost/type_traits/remove_cv.hpp \ - /home/pblunsom/packages/include/boost/type_traits/broken_compiler_spec.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/lambda_support.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/lambda.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/ttp.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/msvc.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/gcc.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/workaround.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/ctps.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/cv_traits_impl.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/type_trait_def.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/template_arity_spec.hpp \ - /home/pblunsom/packages/include/boost/mpl/int.hpp \ - /home/pblunsom/packages/include/boost/mpl/int_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/adl_barrier.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/adl.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/intel.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/nttp_decl.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/nttp.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/integral_wrapper.hpp \ - /home/pblunsom/packages/include/boost/mpl/integral_c_tag.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/static_constant.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/static_cast.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/cat.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/config/config.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/template_arity_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessor/params.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/preprocessor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comma_if.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/punctuation/comma_if.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/if.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/iif.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/bool.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/facilities/empty.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/punctuation/comma.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repeat.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/repeat.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/debug/error.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/detail/auto_rec.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple/eat.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/inc.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/inc.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/overload_resolution.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/type_trait_undef.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_empty.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_convertible.hpp \ - /home/pblunsom/packages/include/boost/type_traits/intrinsics.hpp \ - /home/pblunsom/packages/include/boost/type_traits/config.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_same.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/bool_trait_def.hpp \ - /home/pblunsom/packages/include/boost/type_traits/integral_constant.hpp \ - /home/pblunsom/packages/include/boost/mpl/bool.hpp \ - /home/pblunsom/packages/include/boost/mpl/bool_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/integral_c.hpp \ - /home/pblunsom/packages/include/boost/mpl/integral_c_fwd.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/bool_trait_undef.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_reference.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_volatile.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/yes_no_type.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_array.hpp \ - /home/pblunsom/packages/include/boost/type_traits/add_reference.hpp \ - /home/pblunsom/packages/include/boost/type_traits/ice.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/ice_or.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/ice_and.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/ice_not.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/ice_eq.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_arithmetic.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_integral.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_float.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_void.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_abstract.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_class.hpp \ - /home/pblunsom/packages/include/boost/call_traits.hpp \ - /home/pblunsom/packages/include/boost/detail/call_traits.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_pointer.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_member_pointer.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_member_function_pointer.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/is_mem_fun_pointer_impl.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/default_deleter.hpp \ - /home/pblunsom/packages/include/boost/mpl/if.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/value_wknd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/integral.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/eti.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/na_spec.hpp \ - /home/pblunsom/packages/include/boost/mpl/lambda_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/void_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/na.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/na_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/lambda_arity_param.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/arity.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/dtp.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessor/enum.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessor/def_params_tail.hpp \ - /home/pblunsom/packages/include/boost/mpl/limits/arity.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/and.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/bitand.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/identity.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/facilities/identity.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/empty.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/add.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/dec.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/while.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/fold_left.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/detail/fold_left.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/expr_iif.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/adt.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/detail/is_binary.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/detail/check.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/compl.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/fold_right.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/detail/fold_right.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/reverse.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/detail/while.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple/elem.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/sub.hpp \ - /home/pblunsom/packages/include/boost/type_traits/remove_bounds.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/is_convertible.hpp \ - /home/pblunsom/packages/include/boost/mpl/and.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/use_preprocessed.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/nested_type_wknd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/include_preprocessed.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/compiler.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/stringize.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/and.hpp \ - /home/pblunsom/packages/include/boost/mpl/identity.hpp \ - /home/pblunsom/packages/include/boost/utility/enable_if.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/move.hpp \ - /home/pblunsom/packages/include/boost/static_assert.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/exception.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/clone_allocator.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/nullable.hpp \ - /home/pblunsom/packages/include/boost/mpl/eval_if.hpp \ - /home/pblunsom/packages/include/boost/range/functions.hpp \ - /home/pblunsom/packages/include/boost/range/begin.hpp \ - /home/pblunsom/packages/include/boost/range/config.hpp \ - /home/pblunsom/packages/include/boost/range/iterator.hpp \ - /home/pblunsom/packages/include/boost/range/mutable_iterator.hpp \ - /home/pblunsom/packages/include/boost/range/detail/extract_optional_type.hpp \ - /home/pblunsom/packages/include/boost/iterator/iterator_traits.hpp \ - /home/pblunsom/packages/include/boost/detail/iterator.hpp \ - /home/pblunsom/packages/include/boost/range/const_iterator.hpp \ - /home/pblunsom/packages/include/boost/type_traits/remove_const.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_const.hpp \ - /home/pblunsom/packages/include/boost/range/end.hpp \ - /home/pblunsom/packages/include/boost/range/detail/implementation_help.hpp \ - /home/pblunsom/packages/include/boost/range/detail/common.hpp \ - /home/pblunsom/packages/include/boost/range/detail/sfinae.hpp \ - /home/pblunsom/packages/include/boost/range/size.hpp \ - /home/pblunsom/packages/include/boost/range/difference_type.hpp \ - /home/pblunsom/packages/include/boost/range/distance.hpp \ - /home/pblunsom/packages/include/boost/range/empty.hpp \ - /home/pblunsom/packages/include/boost/range/rbegin.hpp \ - /home/pblunsom/packages/include/boost/range/reverse_iterator.hpp \ - /home/pblunsom/packages/include/boost/iterator/reverse_iterator.hpp \ - /home/pblunsom/packages/include/boost/iterator.hpp \ - /home/pblunsom/packages/include/boost/utility.hpp \ - /home/pblunsom/packages/include/boost/utility/addressof.hpp \ - /home/pblunsom/packages/include/boost/utility/base_from_member.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_binary_params.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple/rem.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_params.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/repeat_from_to.hpp \ - /home/pblunsom/packages/include/boost/utility/binary.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/deduce_d.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/cat.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/fold_left.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/seq.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/elem.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/size.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/transform.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/mod.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/detail/div_base.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison/less_equal.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/not.hpp \ - /home/pblunsom/packages/include/boost/next_prior.hpp \ - /home/pblunsom/packages/include/boost/noncopyable.hpp \ - /home/pblunsom/packages/include/boost/iterator/iterator_adaptor.hpp \ - /home/pblunsom/packages/include/boost/iterator/iterator_categories.hpp \ - /home/pblunsom/packages/include/boost/iterator/detail/config_def.hpp \ - /home/pblunsom/packages/include/boost/mpl/placeholders.hpp \ - /home/pblunsom/packages/include/boost/mpl/arg.hpp \ - /home/pblunsom/packages/include/boost/mpl/arg_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/na_assert.hpp \ - /home/pblunsom/packages/include/boost/mpl/assert.hpp \ - /home/pblunsom/packages/include/boost/mpl/not.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/yes_no.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/arrays.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/pp_counter.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/arity_spec.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/arg_typedef.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/arg.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/placeholders.hpp \ - /home/pblunsom/packages/include/boost/iterator/detail/config_undef.hpp \ - /home/pblunsom/packages/include/boost/iterator/iterator_facade.hpp \ - /home/pblunsom/packages/include/boost/iterator/interoperable.hpp \ - /home/pblunsom/packages/include/boost/mpl/or.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/or.hpp \ - /home/pblunsom/packages/include/boost/iterator/detail/facade_iterator_category.hpp \ - /home/pblunsom/packages/include/boost/detail/indirect_traits.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_function.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/false_result.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/is_function_ptr_helper.hpp \ - /home/pblunsom/packages/include/boost/type_traits/remove_reference.hpp \ - /home/pblunsom/packages/include/boost/type_traits/remove_pointer.hpp \ - /home/pblunsom/packages/include/boost/iterator/detail/enable_if.hpp \ - /home/pblunsom/packages/include/boost/implicit_cast.hpp \ - /home/pblunsom/packages/include/boost/type_traits/add_const.hpp \ - /home/pblunsom/packages/include/boost/type_traits/add_pointer.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_pod.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_scalar.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_enum.hpp \ - /home/pblunsom/packages/include/boost/mpl/always.hpp \ - /home/pblunsom/packages/include/boost/mpl/apply.hpp \ - /home/pblunsom/packages/include/boost/mpl/apply_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/apply_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/apply_wrap.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/has_apply.hpp \ - /home/pblunsom/packages/include/boost/mpl/has_xxx.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/type_wrapper.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/has_xxx.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/msvc_typename.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/has_apply.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/msvc_never_true.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/apply_wrap.hpp \ - /home/pblunsom/packages/include/boost/mpl/lambda.hpp \ - /home/pblunsom/packages/include/boost/mpl/bind.hpp \ - /home/pblunsom/packages/include/boost/mpl/bind_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/bind.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/bind_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/next.hpp \ - /home/pblunsom/packages/include/boost/mpl/next_prior.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/common_name_wknd.hpp \ - /home/pblunsom/packages/include/boost/mpl/protect.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/bind.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/full_lambda.hpp \ - /home/pblunsom/packages/include/boost/mpl/quote.hpp \ - /home/pblunsom/packages/include/boost/mpl/void.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/has_type.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/bcc.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/quote.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/template_arity.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/template_arity.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/full_lambda.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/apply.hpp \ - /home/pblunsom/packages/include/boost/range/rend.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/indirect_fun.hpp \ - /home/pblunsom/packages/include/boost/utility/result_of.hpp \ - /home/pblunsom/packages/include/boost/type.hpp \ - /home/pblunsom/packages/include/boost/preprocessor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/library.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/div.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/mul.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/data.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/elem.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/size.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/insert.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/push_back.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison/not_equal.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/pop_back.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/deduce_z.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/pop_front.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/push_front.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/remove.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/replace.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/reverse.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple/reverse.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison/equal.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison/greater.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison/less.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison/greater_equal.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/config/limits.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/expr_if.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/debug.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/debug/assert.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/debug/line.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration/iterate.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/slot/slot.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/slot/detail/def.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/facilities.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/facilities/apply.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/detail/is_unary.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/facilities/expand.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/facilities/intercept.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration/local.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration/self.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/append.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/at.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/rest_n.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/cat.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/enum.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/for_each_i.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/for.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/detail/for.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/filter.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/first_n.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/for_each.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/for_each_product.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/to_tuple.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple/to_list.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/size.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/transform.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/bitnor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/bitor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/bitxor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/nor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/or.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/xor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/punctuation.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/punctuation/paren.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/punctuation/paren_if.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/deduce_r.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_params_with_a_default.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_params_with_defaults.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_shifted.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_shifted_binary_params.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_shifted_params.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_trailing.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_trailing_binary_params.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_trailing_params.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/selection.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/selection/max.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/selection/min.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/enum.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/filter.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/first_n.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/detail/split.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/fold_right.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/reverse.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/for_each.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/for_each_i.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/for_each_product.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/insert.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/rest_n.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/pop_back.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/pop_front.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/push_back.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/push_front.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/remove.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/replace.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/subseq.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/to_array.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/to_tuple.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/slot.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple/to_seq.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration/detail/iter/forward1.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration/detail/bounds/lower1.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/slot/detail/shared.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration/detail/bounds/upper1.hpp \ - /home/pblunsom/packages/include/boost/utility/detail/result_of_iterate.hpp \ - /home/pblunsom/packages/include/boost/pointee.hpp \ - /home/pblunsom/packages/include/boost/detail/is_incrementable.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/void_ptr_iterator.hpp \ - /home/pblunsom/packages/include/boost/random/uniform_real.hpp \ - /home/pblunsom/packages/include/boost/limits.hpp \ - /home/pblunsom/packages/include/boost/random/detail/config.hpp \ - /home/pblunsom/packages/include/boost/random/variate_generator.hpp \ - /home/pblunsom/packages/include/boost/random/uniform_01.hpp \ - /home/pblunsom/packages/include/boost/random/detail/pass_through_engine.hpp \ - /home/pblunsom/packages/include/boost/random/detail/ptr_helper.hpp \ - /home/pblunsom/packages/include/boost/random/detail/disable_warnings.hpp \ - /home/pblunsom/packages/include/boost/random/detail/enable_warnings.hpp \ - /home/pblunsom/packages/include/boost/random/detail/uniform_int_float.hpp \ - /home/pblunsom/packages/include/boost/random/mersenne_twister.hpp \ - /home/pblunsom/packages/include/boost/integer_traits.hpp \ - /home/pblunsom/packages/include/boost/cstdint.hpp \ - /home/pblunsom/packages/include/boost/random/linear_congruential.hpp \ - /home/pblunsom/packages/include/boost/random/detail/const_mod.hpp \ - /home/pblunsom/packages/include/boost/random/detail/seed.hpp pyp.hh \ - slice-sampler.h log_add.h mt19937ar.h corpus.hh \ - /home/pblunsom/packages/include/boost/shared_ptr.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/shared_ptr.hpp \ - /home/pblunsom/packages/include/boost/config/no_tr1/memory.hpp \ - /home/pblunsom/packages/include/boost/throw_exception.hpp \ - /home/pblunsom/packages/include/boost/exception/detail/attribute_noreturn.hpp \ - /home/pblunsom/packages/include/boost/exception/exception.hpp \ - /home/pblunsom/packages/include/boost/current_function.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/shared_count.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/bad_weak_ptr.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_counted_base.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_has_sync.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp \ - /home/pblunsom/packages/include/boost/detail/sp_typeinfo.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_counted_impl.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_convertible.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/spinlock_pool.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/spinlock.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/spinlock_sync.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/yield_k.hpp \ - /home/pblunsom/packages/include/boost/memory_order.hpp workers.hh \ - /home/pblunsom/packages/include/boost/bind.hpp \ - /home/pblunsom/packages/include/boost/bind/bind.hpp \ - /home/pblunsom/packages/include/boost/ref.hpp \ - /home/pblunsom/packages/include/boost/mem_fn.hpp \ - /home/pblunsom/packages/include/boost/bind/mem_fn.hpp \ - /home/pblunsom/packages/include/boost/get_pointer.hpp \ - /home/pblunsom/packages/include/boost/bind/mem_fn_template.hpp \ - /home/pblunsom/packages/include/boost/bind/mem_fn_cc.hpp \ - /home/pblunsom/packages/include/boost/is_placeholder.hpp \ - /home/pblunsom/packages/include/boost/bind/arg.hpp \ - /home/pblunsom/packages/include/boost/visit_each.hpp \ - /home/pblunsom/packages/include/boost/bind/storage.hpp \ - /home/pblunsom/packages/include/boost/bind/bind_template.hpp \ - /home/pblunsom/packages/include/boost/bind/bind_cc.hpp \ - /home/pblunsom/packages/include/boost/bind/bind_mf_cc.hpp \ - /home/pblunsom/packages/include/boost/bind/bind_mf2_cc.hpp \ - /home/pblunsom/packages/include/boost/bind/placeholders.hpp \ - /home/pblunsom/packages/include/boost/function.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iterate.hpp \ - /home/pblunsom/packages/include/boost/function/detail/prologue.hpp \ - /home/pblunsom/packages/include/boost/config/no_tr1/functional.hpp \ - /home/pblunsom/packages/include/boost/function/function_base.hpp \ - /home/pblunsom/packages/include/boost/integer.hpp \ - /home/pblunsom/packages/include/boost/integer_fwd.hpp \ - /home/pblunsom/packages/include/boost/type_traits/has_trivial_copy.hpp \ - /home/pblunsom/packages/include/boost/type_traits/has_trivial_destructor.hpp \ - /home/pblunsom/packages/include/boost/type_traits/composite_traits.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_union.hpp \ - /home/pblunsom/packages/include/boost/type_traits/alignment_of.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/size_t_trait_def.hpp \ - /home/pblunsom/packages/include/boost/mpl/size_t.hpp \ - /home/pblunsom/packages/include/boost/mpl/size_t_fwd.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/size_t_trait_undef.hpp \ - /home/pblunsom/packages/include/boost/function_equal.hpp \ - /home/pblunsom/packages/include/boost/function/function_fwd.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/enum.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/enum_params.hpp \ - /home/pblunsom/packages/include/boost/function/detail/function_iterate.hpp \ - /home/pblunsom/packages/include/boost/function/detail/maybe_include.hpp \ - /home/pblunsom/packages/include/boost/function/function_template.hpp \ - /home/pblunsom/packages/include/boost/detail/no_exceptions_support.hpp \ - /home/pblunsom/packages/include/boost/thread/thread.hpp \ - /home/pblunsom/packages/include/boost/thread/detail/platform.hpp \ - /home/pblunsom/packages/include/boost/config/requires_threads.hpp \ - /home/pblunsom/packages/include/boost/thread/pthread/thread_data.hpp \ - /home/pblunsom/packages/include/boost/thread/detail/config.hpp \ - /home/pblunsom/packages/include/boost/thread/detail/platform.hpp \ - /home/pblunsom/packages/include/boost/config/auto_link.hpp \ - /home/pblunsom/packages/include/boost/thread/exceptions.hpp \ - /home/pblunsom/packages/include/boost/config/abi_prefix.hpp \ - /home/pblunsom/packages/include/boost/config/abi_suffix.hpp \ - /home/pblunsom/packages/include/boost/enable_shared_from_this.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/enable_shared_from_this.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/weak_ptr.hpp \ - /home/pblunsom/packages/include/boost/thread/mutex.hpp \ - /home/pblunsom/packages/include/boost/thread/pthread/mutex.hpp \ - /home/pblunsom/packages/include/boost/thread/locks.hpp \ - /home/pblunsom/packages/include/boost/thread/detail/move.hpp \ - /home/pblunsom/packages/include/boost/thread/thread_time.hpp \ - /home/pblunsom/packages/include/boost/date_time/microsec_time_clock.hpp \ - /home/pblunsom/packages/include/boost/date_time/compiler_config.hpp \ - /home/pblunsom/packages/include/boost/date_time/locale_config.hpp \ - /home/pblunsom/packages/include/boost/date_time/c_time.hpp \ - /home/pblunsom/packages/include/boost/date_time/time_clock.hpp \ - /home/pblunsom/packages/include/boost/date_time/filetime_functions.hpp \ - /home/pblunsom/packages/include/boost/date_time/posix_time/posix_time_types.hpp \ - /home/pblunsom/packages/include/boost/date_time/posix_time/ptime.hpp \ - /home/pblunsom/packages/include/boost/date_time/posix_time/posix_time_system.hpp \ - /home/pblunsom/packages/include/boost/date_time/posix_time/posix_time_config.hpp \ - /home/pblunsom/packages/include/boost/config/no_tr1/cmath.hpp \ - /home/pblunsom/packages/include/boost/date_time/time_duration.hpp \ - /home/pblunsom/packages/include/boost/operators.hpp \ - /home/pblunsom/packages/include/boost/date_time/time_defs.hpp \ - /home/pblunsom/packages/include/boost/date_time/special_defs.hpp \ - /home/pblunsom/packages/include/boost/date_time/time_resolution_traits.hpp \ - /home/pblunsom/packages/include/boost/date_time/int_adapter.hpp \ - /home/pblunsom/packages/include/boost/date_time/gregorian/gregorian_types.hpp \ - /home/pblunsom/packages/include/boost/date_time/date.hpp \ - /home/pblunsom/packages/include/boost/date_time/year_month_day.hpp \ - /home/pblunsom/packages/include/boost/date_time/period.hpp \ - /home/pblunsom/packages/include/boost/date_time/gregorian/greg_calendar.hpp \ - /home/pblunsom/packages/include/boost/date_time/gregorian/greg_weekday.hpp \ - /home/pblunsom/packages/include/boost/date_time/constrained_value.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_base_of.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_base_and_derived.hpp \ - /home/pblunsom/packages/include/boost/date_time/date_defs.hpp \ - /home/pblunsom/packages/include/boost/date_time/gregorian/greg_day_of_year.hpp \ - /home/pblunsom/packages/include/boost/date_time/gregorian_calendar.hpp \ - /home/pblunsom/packages/include/boost/date_time/gregorian_calendar.ipp \ - /home/pblunsom/packages/include/boost/date_time/gregorian/greg_ymd.hpp \ - /home/pblunsom/packages/include/boost/date_time/gregorian/greg_day.hpp \ - /home/pblunsom/packages/include/boost/date_time/gregorian/greg_year.hpp \ - /home/pblunsom/packages/include/boost/date_time/gregorian/greg_month.hpp \ - /home/pblunsom/packages/include/boost/date_time/gregorian/greg_duration.hpp \ - /home/pblunsom/packages/include/boost/date_time/date_duration.hpp \ - /home/pblunsom/packages/include/boost/date_time/date_duration_types.hpp \ - /home/pblunsom/packages/include/boost/date_time/gregorian/greg_duration_types.hpp \ - /home/pblunsom/packages/include/boost/date_time/gregorian/greg_date.hpp \ - /home/pblunsom/packages/include/boost/date_time/adjust_functors.hpp \ - /home/pblunsom/packages/include/boost/date_time/wrapping_int.hpp \ - /home/pblunsom/packages/include/boost/date_time/date_generators.hpp \ - /home/pblunsom/packages/include/boost/date_time/date_clock_device.hpp \ - /home/pblunsom/packages/include/boost/date_time/date_iterator.hpp \ - /home/pblunsom/packages/include/boost/date_time/time_system_split.hpp \ - /home/pblunsom/packages/include/boost/date_time/time_system_counted.hpp \ - /home/pblunsom/packages/include/boost/date_time/time.hpp \ - /home/pblunsom/packages/include/boost/date_time/posix_time/date_duration_operators.hpp \ - /home/pblunsom/packages/include/boost/date_time/posix_time/posix_time_duration.hpp \ - /home/pblunsom/packages/include/boost/date_time/posix_time/time_period.hpp \ - /home/pblunsom/packages/include/boost/date_time/time_iterator.hpp \ - /home/pblunsom/packages/include/boost/date_time/dst_rules.hpp \ - /home/pblunsom/packages/include/boost/thread/xtime.hpp \ - /home/pblunsom/packages/include/boost/date_time/posix_time/conversion.hpp \ - /home/pblunsom/packages/include/boost/date_time/gregorian/conversion.hpp \ - /home/pblunsom/packages/include/boost/thread/pthread/timespec.hpp \ - /home/pblunsom/packages/include/boost/thread/pthread/pthread_mutex_scoped_lock.hpp \ - /home/pblunsom/packages/include/boost/optional.hpp \ - /home/pblunsom/packages/include/boost/optional/optional.hpp \ - /home/pblunsom/packages/include/boost/type_traits/type_with_alignment.hpp \ - /home/pblunsom/packages/include/boost/detail/reference_content.hpp \ - /home/pblunsom/packages/include/boost/type_traits/has_nothrow_copy.hpp \ - /home/pblunsom/packages/include/boost/none.hpp \ - /home/pblunsom/packages/include/boost/none_t.hpp \ - /home/pblunsom/packages/include/boost/utility/compare_pointees.hpp \ - /home/pblunsom/packages/include/boost/optional/optional_fwd.hpp \ - /home/pblunsom/packages/include/boost/thread/pthread/condition_variable_fwd.hpp \ - /home/pblunsom/packages/include/boost/thread/detail/thread.hpp \ - /home/pblunsom/packages/include/boost/thread/detail/thread_heap_alloc.hpp \ - /home/pblunsom/packages/include/boost/thread/pthread/thread_heap_alloc.hpp \ - /home/pblunsom/packages/include/boost/thread/detail/thread_interruption.hpp \ - /home/pblunsom/packages/include/boost/thread/detail/thread_group.hpp \ - /home/pblunsom/packages/include/boost/thread/shared_mutex.hpp \ - /home/pblunsom/packages/include/boost/thread/pthread/shared_mutex.hpp \ - /home/pblunsom/packages/include/boost/thread/condition_variable.hpp \ - /home/pblunsom/packages/include/boost/thread/pthread/condition_variable.hpp \ - /home/pblunsom/packages/include/boost/thread/pthread/thread_data.hpp \ - /home/pblunsom/packages/include/boost/thread/future.hpp \ - /home/pblunsom/packages/include/boost/exception_ptr.hpp \ - /home/pblunsom/packages/include/boost/exception/detail/exception_ptr.hpp \ - /home/pblunsom/packages/include/boost/scoped_ptr.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/scoped_ptr.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_fundamental.hpp \ - /home/pblunsom/packages/include/boost/thread/condition.hpp -train-contexts.o: train-contexts.cc \ - /home/pblunsom/packages/include/boost/program_options/parsers.hpp \ - /home/pblunsom/packages/include/boost/program_options/config.hpp \ - /home/pblunsom/packages/include/boost/config.hpp \ - /home/pblunsom/packages/include/boost/config/user.hpp \ - /home/pblunsom/packages/include/boost/config/select_compiler_config.hpp \ - /home/pblunsom/packages/include/boost/config/compiler/gcc.hpp \ - /home/pblunsom/packages/include/boost/config/select_stdlib_config.hpp \ - /home/pblunsom/packages/include/boost/config/no_tr1/utility.hpp \ - /home/pblunsom/packages/include/boost/config/stdlib/libstdcpp3.hpp \ - /home/pblunsom/packages/include/boost/config/select_platform_config.hpp \ - /home/pblunsom/packages/include/boost/config/platform/linux.hpp \ - /home/pblunsom/packages/include/boost/config/posix_features.hpp \ - /home/pblunsom/packages/include/boost/config/suffix.hpp \ - /home/pblunsom/packages/include/boost/version.hpp \ - /home/pblunsom/packages/include/boost/config/auto_link.hpp \ - /home/pblunsom/packages/include/boost/program_options/option.hpp \ - /home/pblunsom/packages/include/boost/program_options/detail/cmdline.hpp \ - /home/pblunsom/packages/include/boost/program_options/errors.hpp \ - /home/pblunsom/packages/include/boost/program_options/cmdline.hpp \ - /home/pblunsom/packages/include/boost/program_options/options_description.hpp \ - /home/pblunsom/packages/include/boost/program_options/value_semantic.hpp \ - /home/pblunsom/packages/include/boost/any.hpp \ - /home/pblunsom/packages/include/boost/type_traits/remove_reference.hpp \ - /home/pblunsom/packages/include/boost/type_traits/broken_compiler_spec.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/lambda_support.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/lambda.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/ttp.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/msvc.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/gcc.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/workaround.hpp \ - /home/pblunsom/packages/include/boost/detail/workaround.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/ctps.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/type_trait_def.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/template_arity_spec.hpp \ - /home/pblunsom/packages/include/boost/mpl/int.hpp \ - /home/pblunsom/packages/include/boost/mpl/int_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/adl_barrier.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/adl.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/intel.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/nttp_decl.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/nttp.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/integral_wrapper.hpp \ - /home/pblunsom/packages/include/boost/mpl/integral_c_tag.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/static_constant.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/static_cast.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/cat.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/config/config.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/template_arity_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessor/params.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/preprocessor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comma_if.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/punctuation/comma_if.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/if.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/iif.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/bool.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/facilities/empty.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/punctuation/comma.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repeat.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/repeat.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/debug/error.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/detail/auto_rec.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple/eat.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/inc.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/inc.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/overload_resolution.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/type_trait_undef.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_reference.hpp \ - /home/pblunsom/packages/include/boost/type_traits/config.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/bool_trait_def.hpp \ - /home/pblunsom/packages/include/boost/type_traits/integral_constant.hpp \ - /home/pblunsom/packages/include/boost/mpl/bool.hpp \ - /home/pblunsom/packages/include/boost/mpl/bool_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/integral_c.hpp \ - /home/pblunsom/packages/include/boost/mpl/integral_c_fwd.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/bool_trait_undef.hpp \ - /home/pblunsom/packages/include/boost/throw_exception.hpp \ - /home/pblunsom/packages/include/boost/exception/detail/attribute_noreturn.hpp \ - /home/pblunsom/packages/include/boost/exception/exception.hpp \ - /home/pblunsom/packages/include/boost/current_function.hpp \ - /home/pblunsom/packages/include/boost/static_assert.hpp \ - /home/pblunsom/packages/include/boost/function/function1.hpp \ - /home/pblunsom/packages/include/boost/function/detail/maybe_include.hpp \ - /home/pblunsom/packages/include/boost/function/function_template.hpp \ - /home/pblunsom/packages/include/boost/function/detail/prologue.hpp \ - /home/pblunsom/packages/include/boost/config/no_tr1/functional.hpp \ - /home/pblunsom/packages/include/boost/function/function_base.hpp \ - /home/pblunsom/packages/include/boost/detail/sp_typeinfo.hpp \ - /home/pblunsom/packages/include/boost/assert.hpp \ - /home/pblunsom/packages/include/boost/integer.hpp \ - /home/pblunsom/packages/include/boost/integer_fwd.hpp \ - /home/pblunsom/packages/include/boost/limits.hpp \ - /home/pblunsom/packages/include/boost/cstdint.hpp \ - /home/pblunsom/packages/include/boost/integer_traits.hpp \ - /home/pblunsom/packages/include/boost/type_traits/has_trivial_copy.hpp \ - /home/pblunsom/packages/include/boost/type_traits/intrinsics.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_same.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_volatile.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/cv_traits_impl.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_pod.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_void.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_scalar.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_arithmetic.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_integral.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_float.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/ice_or.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_enum.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_pointer.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_member_pointer.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_member_function_pointer.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/is_mem_fun_pointer_impl.hpp \ - /home/pblunsom/packages/include/boost/type_traits/remove_cv.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/ice_and.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/ice_not.hpp \ - /home/pblunsom/packages/include/boost/type_traits/has_trivial_destructor.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_const.hpp \ - /home/pblunsom/packages/include/boost/type_traits/composite_traits.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_array.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_union.hpp \ - /home/pblunsom/packages/include/boost/type_traits/ice.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/yes_no_type.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/ice_eq.hpp \ - /home/pblunsom/packages/include/boost/ref.hpp \ - /home/pblunsom/packages/include/boost/utility/addressof.hpp \ - /home/pblunsom/packages/include/boost/mpl/if.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/value_wknd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/integral.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/eti.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/na_spec.hpp \ - /home/pblunsom/packages/include/boost/mpl/lambda_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/void_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/na.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/na_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/lambda_arity_param.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/arity.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/dtp.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessor/enum.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessor/def_params_tail.hpp \ - /home/pblunsom/packages/include/boost/mpl/limits/arity.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/and.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/bitand.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/identity.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/facilities/identity.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/empty.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/add.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/dec.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/while.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/fold_left.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/detail/fold_left.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/expr_iif.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/adt.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/detail/is_binary.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/detail/check.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/compl.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/fold_right.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/detail/fold_right.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/reverse.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/detail/while.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple/elem.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/sub.hpp \ - /home/pblunsom/packages/include/boost/type_traits/alignment_of.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/size_t_trait_def.hpp \ - /home/pblunsom/packages/include/boost/mpl/size_t.hpp \ - /home/pblunsom/packages/include/boost/mpl/size_t_fwd.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/size_t_trait_undef.hpp \ - /home/pblunsom/packages/include/boost/utility/enable_if.hpp \ - /home/pblunsom/packages/include/boost/function_equal.hpp \ - /home/pblunsom/packages/include/boost/function/function_fwd.hpp \ - /home/pblunsom/packages/include/boost/mem_fn.hpp \ - /home/pblunsom/packages/include/boost/bind/mem_fn.hpp \ - /home/pblunsom/packages/include/boost/get_pointer.hpp \ - /home/pblunsom/packages/include/boost/config/no_tr1/memory.hpp \ - /home/pblunsom/packages/include/boost/bind/mem_fn_template.hpp \ - /home/pblunsom/packages/include/boost/bind/mem_fn_cc.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/enum.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple/rem.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/enum_params.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_params.hpp \ - /home/pblunsom/packages/include/boost/detail/no_exceptions_support.hpp \ - /home/pblunsom/packages/include/boost/lexical_cast.hpp \ - /home/pblunsom/packages/include/boost/type_traits/make_unsigned.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_signed.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_unsigned.hpp \ - /home/pblunsom/packages/include/boost/type_traits/add_const.hpp \ - /home/pblunsom/packages/include/boost/type_traits/add_volatile.hpp \ - /home/pblunsom/packages/include/boost/call_traits.hpp \ - /home/pblunsom/packages/include/boost/detail/call_traits.hpp \ - /home/pblunsom/packages/include/boost/detail/lcast_precision.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_abstract.hpp \ - /home/pblunsom/packages/include/boost/program_options/detail/value_semantic.hpp \ - /home/pblunsom/packages/include/boost/function.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iterate.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration/iterate.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/elem.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/data.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/size.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/slot/slot.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/slot/detail/def.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration/detail/iter/forward1.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration/detail/bounds/lower1.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/slot/detail/shared.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration/detail/bounds/upper1.hpp \ - /home/pblunsom/packages/include/boost/function/detail/function_iterate.hpp \ - /home/pblunsom/packages/include/boost/shared_ptr.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/shared_ptr.hpp \ - /home/pblunsom/packages/include/boost/checked_delete.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/shared_count.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/bad_weak_ptr.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_counted_base.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_has_sync.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_counted_impl.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_convertible.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/spinlock_pool.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/spinlock.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/spinlock_sync.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/yield_k.hpp \ - /home/pblunsom/packages/include/boost/memory_order.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/operator_bool.hpp \ - /home/pblunsom/packages/include/boost/program_options/positional_options.hpp \ - /home/pblunsom/packages/include/boost/program_options/detail/parsers.hpp \ - /home/pblunsom/packages/include/boost/program_options/detail/convert.hpp \ - /home/pblunsom/packages/include/boost/program_options/variables_map.hpp \ - /home/pblunsom/packages/include/boost/scoped_ptr.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/scoped_ptr.hpp \ - pyp-topics.hh \ - /home/pblunsom/packages/include/boost/ptr_container/ptr_vector.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/ptr_sequence_adapter.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/reversible_ptr_container.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/throw_exception.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/scoped_deleter.hpp \ - /home/pblunsom/packages/include/boost/scoped_array.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/scoped_array.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/static_move_ptr.hpp \ - /home/pblunsom/packages/include/boost/compressed_pair.hpp \ - /home/pblunsom/packages/include/boost/detail/compressed_pair.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_empty.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_convertible.hpp \ - /home/pblunsom/packages/include/boost/type_traits/add_reference.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_class.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/default_deleter.hpp \ - /home/pblunsom/packages/include/boost/type_traits/remove_bounds.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/is_convertible.hpp \ - /home/pblunsom/packages/include/boost/mpl/and.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/use_preprocessed.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/nested_type_wknd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/include_preprocessed.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/compiler.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/stringize.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/and.hpp \ - /home/pblunsom/packages/include/boost/mpl/identity.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/move.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/exception.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/clone_allocator.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/nullable.hpp \ - /home/pblunsom/packages/include/boost/mpl/eval_if.hpp \ - /home/pblunsom/packages/include/boost/range/functions.hpp \ - /home/pblunsom/packages/include/boost/range/begin.hpp \ - /home/pblunsom/packages/include/boost/range/config.hpp \ - /home/pblunsom/packages/include/boost/range/iterator.hpp \ - /home/pblunsom/packages/include/boost/range/mutable_iterator.hpp \ - /home/pblunsom/packages/include/boost/range/detail/extract_optional_type.hpp \ - /home/pblunsom/packages/include/boost/iterator/iterator_traits.hpp \ - /home/pblunsom/packages/include/boost/detail/iterator.hpp \ - /home/pblunsom/packages/include/boost/range/const_iterator.hpp \ - /home/pblunsom/packages/include/boost/type_traits/remove_const.hpp \ - /home/pblunsom/packages/include/boost/range/end.hpp \ - /home/pblunsom/packages/include/boost/range/detail/implementation_help.hpp \ - /home/pblunsom/packages/include/boost/range/detail/common.hpp \ - /home/pblunsom/packages/include/boost/range/detail/sfinae.hpp \ - /home/pblunsom/packages/include/boost/range/size.hpp \ - /home/pblunsom/packages/include/boost/range/difference_type.hpp \ - /home/pblunsom/packages/include/boost/range/distance.hpp \ - /home/pblunsom/packages/include/boost/range/empty.hpp \ - /home/pblunsom/packages/include/boost/range/rbegin.hpp \ - /home/pblunsom/packages/include/boost/range/reverse_iterator.hpp \ - /home/pblunsom/packages/include/boost/iterator/reverse_iterator.hpp \ - /home/pblunsom/packages/include/boost/iterator.hpp \ - /home/pblunsom/packages/include/boost/utility.hpp \ - /home/pblunsom/packages/include/boost/utility/base_from_member.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_binary_params.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/repeat_from_to.hpp \ - /home/pblunsom/packages/include/boost/utility/binary.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/deduce_d.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/cat.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/fold_left.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/seq.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/elem.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/size.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/transform.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/mod.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/detail/div_base.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison/less_equal.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/not.hpp \ - /home/pblunsom/packages/include/boost/next_prior.hpp \ - /home/pblunsom/packages/include/boost/noncopyable.hpp \ - /home/pblunsom/packages/include/boost/iterator/iterator_adaptor.hpp \ - /home/pblunsom/packages/include/boost/iterator/iterator_categories.hpp \ - /home/pblunsom/packages/include/boost/iterator/detail/config_def.hpp \ - /home/pblunsom/packages/include/boost/mpl/placeholders.hpp \ - /home/pblunsom/packages/include/boost/mpl/arg.hpp \ - /home/pblunsom/packages/include/boost/mpl/arg_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/na_assert.hpp \ - /home/pblunsom/packages/include/boost/mpl/assert.hpp \ - /home/pblunsom/packages/include/boost/mpl/not.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/yes_no.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/arrays.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/pp_counter.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/arity_spec.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/arg_typedef.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/arg.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/placeholders.hpp \ - /home/pblunsom/packages/include/boost/iterator/detail/config_undef.hpp \ - /home/pblunsom/packages/include/boost/iterator/iterator_facade.hpp \ - /home/pblunsom/packages/include/boost/iterator/interoperable.hpp \ - /home/pblunsom/packages/include/boost/mpl/or.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/or.hpp \ - /home/pblunsom/packages/include/boost/iterator/detail/facade_iterator_category.hpp \ - /home/pblunsom/packages/include/boost/detail/indirect_traits.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_function.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/false_result.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/is_function_ptr_helper.hpp \ - /home/pblunsom/packages/include/boost/type_traits/remove_pointer.hpp \ - /home/pblunsom/packages/include/boost/iterator/detail/enable_if.hpp \ - /home/pblunsom/packages/include/boost/implicit_cast.hpp \ - /home/pblunsom/packages/include/boost/type_traits/add_pointer.hpp \ - /home/pblunsom/packages/include/boost/mpl/always.hpp \ - /home/pblunsom/packages/include/boost/mpl/apply.hpp \ - /home/pblunsom/packages/include/boost/mpl/apply_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/apply_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/apply_wrap.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/has_apply.hpp \ - /home/pblunsom/packages/include/boost/mpl/has_xxx.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/type_wrapper.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/has_xxx.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/msvc_typename.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/has_apply.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/msvc_never_true.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/apply_wrap.hpp \ - /home/pblunsom/packages/include/boost/mpl/lambda.hpp \ - /home/pblunsom/packages/include/boost/mpl/bind.hpp \ - /home/pblunsom/packages/include/boost/mpl/bind_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/bind.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/bind_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/next.hpp \ - /home/pblunsom/packages/include/boost/mpl/next_prior.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/common_name_wknd.hpp \ - /home/pblunsom/packages/include/boost/mpl/protect.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/bind.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/full_lambda.hpp \ - /home/pblunsom/packages/include/boost/mpl/quote.hpp \ - /home/pblunsom/packages/include/boost/mpl/void.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/has_type.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/bcc.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/quote.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/template_arity.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/template_arity.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/full_lambda.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/apply.hpp \ - /home/pblunsom/packages/include/boost/range/rend.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/indirect_fun.hpp \ - /home/pblunsom/packages/include/boost/utility/result_of.hpp \ - /home/pblunsom/packages/include/boost/type.hpp \ - /home/pblunsom/packages/include/boost/preprocessor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/library.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/div.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/mul.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/insert.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/push_back.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison/not_equal.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/pop_back.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/deduce_z.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/pop_front.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/push_front.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/remove.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/replace.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/reverse.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple/reverse.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison/equal.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison/greater.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison/less.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison/greater_equal.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/config/limits.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/expr_if.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/debug.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/debug/assert.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/debug/line.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/facilities.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/facilities/apply.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/detail/is_unary.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/facilities/expand.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/facilities/intercept.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration/local.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration/self.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/append.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/at.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/rest_n.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/cat.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/enum.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/for_each_i.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/for.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/detail/for.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/filter.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/first_n.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/for_each.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/for_each_product.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/to_tuple.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple/to_list.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/size.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/transform.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/bitnor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/bitor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/bitxor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/nor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/or.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/xor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/punctuation.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/punctuation/paren.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/punctuation/paren_if.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/deduce_r.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_params_with_a_default.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_params_with_defaults.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_shifted.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_shifted_binary_params.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_shifted_params.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_trailing.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_trailing_binary_params.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_trailing_params.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/selection.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/selection/max.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/selection/min.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/enum.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/filter.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/first_n.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/detail/split.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/fold_right.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/reverse.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/for_each.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/for_each_i.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/for_each_product.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/insert.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/rest_n.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/pop_back.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/pop_front.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/push_back.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/push_front.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/remove.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/replace.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/subseq.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/to_array.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/to_tuple.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/slot.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple/to_seq.hpp \ - /home/pblunsom/packages/include/boost/utility/detail/result_of_iterate.hpp \ - /home/pblunsom/packages/include/boost/pointee.hpp \ - /home/pblunsom/packages/include/boost/detail/is_incrementable.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/void_ptr_iterator.hpp \ - /home/pblunsom/packages/include/boost/random/uniform_real.hpp \ - /home/pblunsom/packages/include/boost/random/detail/config.hpp \ - /home/pblunsom/packages/include/boost/random/variate_generator.hpp \ - /home/pblunsom/packages/include/boost/random/uniform_01.hpp \ - /home/pblunsom/packages/include/boost/random/detail/pass_through_engine.hpp \ - /home/pblunsom/packages/include/boost/random/detail/ptr_helper.hpp \ - /home/pblunsom/packages/include/boost/random/detail/disable_warnings.hpp \ - /home/pblunsom/packages/include/boost/random/detail/enable_warnings.hpp \ - /home/pblunsom/packages/include/boost/random/detail/uniform_int_float.hpp \ - /home/pblunsom/packages/include/boost/random/mersenne_twister.hpp \ - /home/pblunsom/packages/include/boost/random/linear_congruential.hpp \ - /home/pblunsom/packages/include/boost/random/detail/const_mod.hpp \ - /home/pblunsom/packages/include/boost/random/detail/seed.hpp pyp.hh \ - slice-sampler.h log_add.h mt19937ar.h corpus.hh workers.hh \ - /home/pblunsom/packages/include/boost/bind.hpp \ - /home/pblunsom/packages/include/boost/bind/bind.hpp \ - /home/pblunsom/packages/include/boost/is_placeholder.hpp \ - /home/pblunsom/packages/include/boost/bind/arg.hpp \ - /home/pblunsom/packages/include/boost/visit_each.hpp \ - /home/pblunsom/packages/include/boost/bind/storage.hpp \ - /home/pblunsom/packages/include/boost/bind/bind_template.hpp \ - /home/pblunsom/packages/include/boost/bind/bind_cc.hpp \ - /home/pblunsom/packages/include/boost/bind/bind_mf_cc.hpp \ - /home/pblunsom/packages/include/boost/bind/bind_mf2_cc.hpp \ - /home/pblunsom/packages/include/boost/bind/placeholders.hpp \ - /home/pblunsom/packages/include/boost/thread/thread.hpp \ - /home/pblunsom/packages/include/boost/thread/detail/platform.hpp \ - /home/pblunsom/packages/include/boost/config/requires_threads.hpp \ - /home/pblunsom/packages/include/boost/thread/pthread/thread_data.hpp \ - /home/pblunsom/packages/include/boost/thread/detail/config.hpp \ - /home/pblunsom/packages/include/boost/thread/detail/platform.hpp \ - /home/pblunsom/packages/include/boost/thread/exceptions.hpp \ - /home/pblunsom/packages/include/boost/config/abi_prefix.hpp \ - /home/pblunsom/packages/include/boost/config/abi_suffix.hpp \ - /home/pblunsom/packages/include/boost/enable_shared_from_this.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/enable_shared_from_this.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/weak_ptr.hpp \ - /home/pblunsom/packages/include/boost/thread/mutex.hpp \ - /home/pblunsom/packages/include/boost/thread/pthread/mutex.hpp \ - /home/pblunsom/packages/include/boost/thread/locks.hpp \ - /home/pblunsom/packages/include/boost/thread/detail/move.hpp \ - /home/pblunsom/packages/include/boost/thread/thread_time.hpp \ - /home/pblunsom/packages/include/boost/date_time/microsec_time_clock.hpp \ - /home/pblunsom/packages/include/boost/date_time/compiler_config.hpp \ - /home/pblunsom/packages/include/boost/date_time/locale_config.hpp \ - /home/pblunsom/packages/include/boost/date_time/c_time.hpp \ - /home/pblunsom/packages/include/boost/date_time/time_clock.hpp \ - /home/pblunsom/packages/include/boost/date_time/filetime_functions.hpp \ - /home/pblunsom/packages/include/boost/date_time/posix_time/posix_time_types.hpp \ - /home/pblunsom/packages/include/boost/date_time/posix_time/ptime.hpp \ - /home/pblunsom/packages/include/boost/date_time/posix_time/posix_time_system.hpp \ - /home/pblunsom/packages/include/boost/date_time/posix_time/posix_time_config.hpp \ - /home/pblunsom/packages/include/boost/config/no_tr1/cmath.hpp \ - /home/pblunsom/packages/include/boost/date_time/time_duration.hpp \ - /home/pblunsom/packages/include/boost/operators.hpp \ - /home/pblunsom/packages/include/boost/date_time/time_defs.hpp \ - /home/pblunsom/packages/include/boost/date_time/special_defs.hpp \ - /home/pblunsom/packages/include/boost/date_time/time_resolution_traits.hpp \ - /home/pblunsom/packages/include/boost/date_time/int_adapter.hpp \ - /home/pblunsom/packages/include/boost/date_time/gregorian/gregorian_types.hpp \ - /home/pblunsom/packages/include/boost/date_time/date.hpp \ - /home/pblunsom/packages/include/boost/date_time/year_month_day.hpp \ - /home/pblunsom/packages/include/boost/date_time/period.hpp \ - /home/pblunsom/packages/include/boost/date_time/gregorian/greg_calendar.hpp \ - /home/pblunsom/packages/include/boost/date_time/gregorian/greg_weekday.hpp \ - /home/pblunsom/packages/include/boost/date_time/constrained_value.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_base_of.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_base_and_derived.hpp \ - /home/pblunsom/packages/include/boost/date_time/date_defs.hpp \ - /home/pblunsom/packages/include/boost/date_time/gregorian/greg_day_of_year.hpp \ - /home/pblunsom/packages/include/boost/date_time/gregorian_calendar.hpp \ - /home/pblunsom/packages/include/boost/date_time/gregorian_calendar.ipp \ - /home/pblunsom/packages/include/boost/date_time/gregorian/greg_ymd.hpp \ - /home/pblunsom/packages/include/boost/date_time/gregorian/greg_day.hpp \ - /home/pblunsom/packages/include/boost/date_time/gregorian/greg_year.hpp \ - /home/pblunsom/packages/include/boost/date_time/gregorian/greg_month.hpp \ - /home/pblunsom/packages/include/boost/date_time/gregorian/greg_duration.hpp \ - /home/pblunsom/packages/include/boost/date_time/date_duration.hpp \ - /home/pblunsom/packages/include/boost/date_time/date_duration_types.hpp \ - /home/pblunsom/packages/include/boost/date_time/gregorian/greg_duration_types.hpp \ - /home/pblunsom/packages/include/boost/date_time/gregorian/greg_date.hpp \ - /home/pblunsom/packages/include/boost/date_time/adjust_functors.hpp \ - /home/pblunsom/packages/include/boost/date_time/wrapping_int.hpp \ - /home/pblunsom/packages/include/boost/date_time/date_generators.hpp \ - /home/pblunsom/packages/include/boost/date_time/date_clock_device.hpp \ - /home/pblunsom/packages/include/boost/date_time/date_iterator.hpp \ - /home/pblunsom/packages/include/boost/date_time/time_system_split.hpp \ - /home/pblunsom/packages/include/boost/date_time/time_system_counted.hpp \ - /home/pblunsom/packages/include/boost/date_time/time.hpp \ - /home/pblunsom/packages/include/boost/date_time/posix_time/date_duration_operators.hpp \ - /home/pblunsom/packages/include/boost/date_time/posix_time/posix_time_duration.hpp \ - /home/pblunsom/packages/include/boost/date_time/posix_time/time_period.hpp \ - /home/pblunsom/packages/include/boost/date_time/time_iterator.hpp \ - /home/pblunsom/packages/include/boost/date_time/dst_rules.hpp \ - /home/pblunsom/packages/include/boost/thread/xtime.hpp \ - /home/pblunsom/packages/include/boost/date_time/posix_time/conversion.hpp \ - /home/pblunsom/packages/include/boost/date_time/gregorian/conversion.hpp \ - /home/pblunsom/packages/include/boost/thread/pthread/timespec.hpp \ - /home/pblunsom/packages/include/boost/thread/pthread/pthread_mutex_scoped_lock.hpp \ - /home/pblunsom/packages/include/boost/optional.hpp \ - /home/pblunsom/packages/include/boost/optional/optional.hpp \ - /home/pblunsom/packages/include/boost/type_traits/type_with_alignment.hpp \ - /home/pblunsom/packages/include/boost/detail/reference_content.hpp \ - /home/pblunsom/packages/include/boost/type_traits/has_nothrow_copy.hpp \ - /home/pblunsom/packages/include/boost/none.hpp \ - /home/pblunsom/packages/include/boost/none_t.hpp \ - /home/pblunsom/packages/include/boost/utility/compare_pointees.hpp \ - /home/pblunsom/packages/include/boost/optional/optional_fwd.hpp \ - /home/pblunsom/packages/include/boost/thread/pthread/condition_variable_fwd.hpp \ - /home/pblunsom/packages/include/boost/thread/detail/thread.hpp \ - /home/pblunsom/packages/include/boost/thread/detail/thread_heap_alloc.hpp \ - /home/pblunsom/packages/include/boost/thread/pthread/thread_heap_alloc.hpp \ - /home/pblunsom/packages/include/boost/thread/detail/thread_interruption.hpp \ - /home/pblunsom/packages/include/boost/thread/detail/thread_group.hpp \ - /home/pblunsom/packages/include/boost/thread/shared_mutex.hpp \ - /home/pblunsom/packages/include/boost/thread/pthread/shared_mutex.hpp \ - /home/pblunsom/packages/include/boost/thread/condition_variable.hpp \ - /home/pblunsom/packages/include/boost/thread/pthread/condition_variable.hpp \ - /home/pblunsom/packages/include/boost/thread/pthread/thread_data.hpp \ - /home/pblunsom/packages/include/boost/thread/future.hpp \ - /home/pblunsom/packages/include/boost/exception_ptr.hpp \ - /home/pblunsom/packages/include/boost/exception/detail/exception_ptr.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_fundamental.hpp \ - /home/pblunsom/packages/include/boost/thread/condition.hpp timing.h \ - clock_gettime_stub.c contexts_corpus.hh contexts_lexer.h \ - ../../../decoder/dict.h \ - /home/pblunsom/packages/include/boost/functional/hash.hpp \ - /home/pblunsom/packages/include/boost/functional/hash/hash.hpp \ - /home/pblunsom/packages/include/boost/functional/hash/hash_fwd.hpp \ - /home/pblunsom/packages/include/boost/functional/hash/detail/hash_float.hpp \ - /home/pblunsom/packages/include/boost/functional/hash/detail/float_functions.hpp \ - /home/pblunsom/packages/include/boost/functional/hash/detail/limits.hpp \ - /home/pblunsom/packages/include/boost/integer/static_log2.hpp \ - /home/pblunsom/packages/include/boost/functional/hash/detail/hash_float_generic.hpp \ - /home/pblunsom/packages/include/boost/functional/hash/extensions.hpp \ - /home/pblunsom/packages/include/boost/detail/container_fwd.hpp \ - ../../../decoder/wordid.h gzstream.hh -train.o: train.cc \ - /home/pblunsom/packages/include/boost/program_options/parsers.hpp \ - /home/pblunsom/packages/include/boost/program_options/config.hpp \ - /home/pblunsom/packages/include/boost/config.hpp \ - /home/pblunsom/packages/include/boost/config/user.hpp \ - /home/pblunsom/packages/include/boost/config/select_compiler_config.hpp \ - /home/pblunsom/packages/include/boost/config/compiler/gcc.hpp \ - /home/pblunsom/packages/include/boost/config/select_stdlib_config.hpp \ - /home/pblunsom/packages/include/boost/config/no_tr1/utility.hpp \ - /home/pblunsom/packages/include/boost/config/stdlib/libstdcpp3.hpp \ - /home/pblunsom/packages/include/boost/config/select_platform_config.hpp \ - /home/pblunsom/packages/include/boost/config/platform/linux.hpp \ - /home/pblunsom/packages/include/boost/config/posix_features.hpp \ - /home/pblunsom/packages/include/boost/config/suffix.hpp \ - /home/pblunsom/packages/include/boost/version.hpp \ - /home/pblunsom/packages/include/boost/config/auto_link.hpp \ - /home/pblunsom/packages/include/boost/program_options/option.hpp \ - /home/pblunsom/packages/include/boost/program_options/detail/cmdline.hpp \ - /home/pblunsom/packages/include/boost/program_options/errors.hpp \ - /home/pblunsom/packages/include/boost/program_options/cmdline.hpp \ - /home/pblunsom/packages/include/boost/program_options/options_description.hpp \ - /home/pblunsom/packages/include/boost/program_options/value_semantic.hpp \ - /home/pblunsom/packages/include/boost/any.hpp \ - /home/pblunsom/packages/include/boost/type_traits/remove_reference.hpp \ - /home/pblunsom/packages/include/boost/type_traits/broken_compiler_spec.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/lambda_support.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/lambda.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/ttp.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/msvc.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/gcc.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/workaround.hpp \ - /home/pblunsom/packages/include/boost/detail/workaround.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/ctps.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/type_trait_def.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/template_arity_spec.hpp \ - /home/pblunsom/packages/include/boost/mpl/int.hpp \ - /home/pblunsom/packages/include/boost/mpl/int_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/adl_barrier.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/adl.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/intel.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/nttp_decl.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/nttp.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/integral_wrapper.hpp \ - /home/pblunsom/packages/include/boost/mpl/integral_c_tag.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/static_constant.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/static_cast.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/cat.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/config/config.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/template_arity_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessor/params.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/preprocessor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comma_if.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/punctuation/comma_if.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/if.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/iif.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/bool.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/facilities/empty.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/punctuation/comma.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repeat.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/repeat.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/debug/error.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/detail/auto_rec.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple/eat.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/inc.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/inc.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/overload_resolution.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/type_trait_undef.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_reference.hpp \ - /home/pblunsom/packages/include/boost/type_traits/config.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/bool_trait_def.hpp \ - /home/pblunsom/packages/include/boost/type_traits/integral_constant.hpp \ - /home/pblunsom/packages/include/boost/mpl/bool.hpp \ - /home/pblunsom/packages/include/boost/mpl/bool_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/integral_c.hpp \ - /home/pblunsom/packages/include/boost/mpl/integral_c_fwd.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/bool_trait_undef.hpp \ - /home/pblunsom/packages/include/boost/throw_exception.hpp \ - /home/pblunsom/packages/include/boost/exception/detail/attribute_noreturn.hpp \ - /home/pblunsom/packages/include/boost/exception/exception.hpp \ - /home/pblunsom/packages/include/boost/current_function.hpp \ - /home/pblunsom/packages/include/boost/static_assert.hpp \ - /home/pblunsom/packages/include/boost/function/function1.hpp \ - /home/pblunsom/packages/include/boost/function/detail/maybe_include.hpp \ - /home/pblunsom/packages/include/boost/function/function_template.hpp \ - /home/pblunsom/packages/include/boost/function/detail/prologue.hpp \ - /home/pblunsom/packages/include/boost/config/no_tr1/functional.hpp \ - /home/pblunsom/packages/include/boost/function/function_base.hpp \ - /home/pblunsom/packages/include/boost/detail/sp_typeinfo.hpp \ - /home/pblunsom/packages/include/boost/assert.hpp \ - /home/pblunsom/packages/include/boost/integer.hpp \ - /home/pblunsom/packages/include/boost/integer_fwd.hpp \ - /home/pblunsom/packages/include/boost/limits.hpp \ - /home/pblunsom/packages/include/boost/cstdint.hpp \ - /home/pblunsom/packages/include/boost/integer_traits.hpp \ - /home/pblunsom/packages/include/boost/type_traits/has_trivial_copy.hpp \ - /home/pblunsom/packages/include/boost/type_traits/intrinsics.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_same.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_volatile.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/cv_traits_impl.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_pod.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_void.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_scalar.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_arithmetic.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_integral.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_float.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/ice_or.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_enum.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_pointer.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_member_pointer.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_member_function_pointer.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/is_mem_fun_pointer_impl.hpp \ - /home/pblunsom/packages/include/boost/type_traits/remove_cv.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/ice_and.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/ice_not.hpp \ - /home/pblunsom/packages/include/boost/type_traits/has_trivial_destructor.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_const.hpp \ - /home/pblunsom/packages/include/boost/type_traits/composite_traits.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_array.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_union.hpp \ - /home/pblunsom/packages/include/boost/type_traits/ice.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/yes_no_type.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/ice_eq.hpp \ - /home/pblunsom/packages/include/boost/ref.hpp \ - /home/pblunsom/packages/include/boost/utility/addressof.hpp \ - /home/pblunsom/packages/include/boost/mpl/if.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/value_wknd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/integral.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/eti.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/na_spec.hpp \ - /home/pblunsom/packages/include/boost/mpl/lambda_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/void_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/na.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/na_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/lambda_arity_param.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/arity.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/dtp.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessor/enum.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessor/def_params_tail.hpp \ - /home/pblunsom/packages/include/boost/mpl/limits/arity.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/and.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/bitand.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/identity.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/facilities/identity.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/empty.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/add.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/dec.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/while.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/fold_left.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/detail/fold_left.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/expr_iif.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/adt.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/detail/is_binary.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/detail/check.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/compl.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/fold_right.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/detail/fold_right.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/reverse.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/detail/while.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple/elem.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/sub.hpp \ - /home/pblunsom/packages/include/boost/type_traits/alignment_of.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/size_t_trait_def.hpp \ - /home/pblunsom/packages/include/boost/mpl/size_t.hpp \ - /home/pblunsom/packages/include/boost/mpl/size_t_fwd.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/size_t_trait_undef.hpp \ - /home/pblunsom/packages/include/boost/utility/enable_if.hpp \ - /home/pblunsom/packages/include/boost/function_equal.hpp \ - /home/pblunsom/packages/include/boost/function/function_fwd.hpp \ - /home/pblunsom/packages/include/boost/mem_fn.hpp \ - /home/pblunsom/packages/include/boost/bind/mem_fn.hpp \ - /home/pblunsom/packages/include/boost/get_pointer.hpp \ - /home/pblunsom/packages/include/boost/config/no_tr1/memory.hpp \ - /home/pblunsom/packages/include/boost/bind/mem_fn_template.hpp \ - /home/pblunsom/packages/include/boost/bind/mem_fn_cc.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/enum.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple/rem.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/enum_params.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_params.hpp \ - /home/pblunsom/packages/include/boost/detail/no_exceptions_support.hpp \ - /home/pblunsom/packages/include/boost/lexical_cast.hpp \ - /home/pblunsom/packages/include/boost/type_traits/make_unsigned.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_signed.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_unsigned.hpp \ - /home/pblunsom/packages/include/boost/type_traits/add_const.hpp \ - /home/pblunsom/packages/include/boost/type_traits/add_volatile.hpp \ - /home/pblunsom/packages/include/boost/call_traits.hpp \ - /home/pblunsom/packages/include/boost/detail/call_traits.hpp \ - /home/pblunsom/packages/include/boost/detail/lcast_precision.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_abstract.hpp \ - /home/pblunsom/packages/include/boost/program_options/detail/value_semantic.hpp \ - /home/pblunsom/packages/include/boost/function.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iterate.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration/iterate.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/elem.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/data.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/size.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/slot/slot.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/slot/detail/def.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration/detail/iter/forward1.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration/detail/bounds/lower1.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/slot/detail/shared.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration/detail/bounds/upper1.hpp \ - /home/pblunsom/packages/include/boost/function/detail/function_iterate.hpp \ - /home/pblunsom/packages/include/boost/shared_ptr.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/shared_ptr.hpp \ - /home/pblunsom/packages/include/boost/checked_delete.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/shared_count.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/bad_weak_ptr.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_counted_base.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_has_sync.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_counted_impl.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_convertible.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/spinlock_pool.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/spinlock.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/spinlock_sync.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/yield_k.hpp \ - /home/pblunsom/packages/include/boost/memory_order.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/detail/operator_bool.hpp \ - /home/pblunsom/packages/include/boost/program_options/positional_options.hpp \ - /home/pblunsom/packages/include/boost/program_options/detail/parsers.hpp \ - /home/pblunsom/packages/include/boost/program_options/detail/convert.hpp \ - /home/pblunsom/packages/include/boost/program_options/variables_map.hpp \ - /home/pblunsom/packages/include/boost/scoped_ptr.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/scoped_ptr.hpp \ - pyp-topics.hh \ - /home/pblunsom/packages/include/boost/ptr_container/ptr_vector.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/ptr_sequence_adapter.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/reversible_ptr_container.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/throw_exception.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/scoped_deleter.hpp \ - /home/pblunsom/packages/include/boost/scoped_array.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/scoped_array.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/static_move_ptr.hpp \ - /home/pblunsom/packages/include/boost/compressed_pair.hpp \ - /home/pblunsom/packages/include/boost/detail/compressed_pair.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_empty.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_convertible.hpp \ - /home/pblunsom/packages/include/boost/type_traits/add_reference.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_class.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/default_deleter.hpp \ - /home/pblunsom/packages/include/boost/type_traits/remove_bounds.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/is_convertible.hpp \ - /home/pblunsom/packages/include/boost/mpl/and.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/use_preprocessed.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/nested_type_wknd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/include_preprocessed.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/compiler.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/stringize.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/and.hpp \ - /home/pblunsom/packages/include/boost/mpl/identity.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/move.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/exception.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/clone_allocator.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/nullable.hpp \ - /home/pblunsom/packages/include/boost/mpl/eval_if.hpp \ - /home/pblunsom/packages/include/boost/range/functions.hpp \ - /home/pblunsom/packages/include/boost/range/begin.hpp \ - /home/pblunsom/packages/include/boost/range/config.hpp \ - /home/pblunsom/packages/include/boost/range/iterator.hpp \ - /home/pblunsom/packages/include/boost/range/mutable_iterator.hpp \ - /home/pblunsom/packages/include/boost/range/detail/extract_optional_type.hpp \ - /home/pblunsom/packages/include/boost/iterator/iterator_traits.hpp \ - /home/pblunsom/packages/include/boost/detail/iterator.hpp \ - /home/pblunsom/packages/include/boost/range/const_iterator.hpp \ - /home/pblunsom/packages/include/boost/type_traits/remove_const.hpp \ - /home/pblunsom/packages/include/boost/range/end.hpp \ - /home/pblunsom/packages/include/boost/range/detail/implementation_help.hpp \ - /home/pblunsom/packages/include/boost/range/detail/common.hpp \ - /home/pblunsom/packages/include/boost/range/detail/sfinae.hpp \ - /home/pblunsom/packages/include/boost/range/size.hpp \ - /home/pblunsom/packages/include/boost/range/difference_type.hpp \ - /home/pblunsom/packages/include/boost/range/distance.hpp \ - /home/pblunsom/packages/include/boost/range/empty.hpp \ - /home/pblunsom/packages/include/boost/range/rbegin.hpp \ - /home/pblunsom/packages/include/boost/range/reverse_iterator.hpp \ - /home/pblunsom/packages/include/boost/iterator/reverse_iterator.hpp \ - /home/pblunsom/packages/include/boost/iterator.hpp \ - /home/pblunsom/packages/include/boost/utility.hpp \ - /home/pblunsom/packages/include/boost/utility/base_from_member.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_binary_params.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/repeat_from_to.hpp \ - /home/pblunsom/packages/include/boost/utility/binary.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/deduce_d.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/cat.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/fold_left.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/seq.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/elem.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/size.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/transform.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/mod.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/detail/div_base.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison/less_equal.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/not.hpp \ - /home/pblunsom/packages/include/boost/next_prior.hpp \ - /home/pblunsom/packages/include/boost/noncopyable.hpp \ - /home/pblunsom/packages/include/boost/iterator/iterator_adaptor.hpp \ - /home/pblunsom/packages/include/boost/iterator/iterator_categories.hpp \ - /home/pblunsom/packages/include/boost/iterator/detail/config_def.hpp \ - /home/pblunsom/packages/include/boost/mpl/placeholders.hpp \ - /home/pblunsom/packages/include/boost/mpl/arg.hpp \ - /home/pblunsom/packages/include/boost/mpl/arg_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/na_assert.hpp \ - /home/pblunsom/packages/include/boost/mpl/assert.hpp \ - /home/pblunsom/packages/include/boost/mpl/not.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/yes_no.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/arrays.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/pp_counter.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/arity_spec.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/arg_typedef.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/arg.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/placeholders.hpp \ - /home/pblunsom/packages/include/boost/iterator/detail/config_undef.hpp \ - /home/pblunsom/packages/include/boost/iterator/iterator_facade.hpp \ - /home/pblunsom/packages/include/boost/iterator/interoperable.hpp \ - /home/pblunsom/packages/include/boost/mpl/or.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/or.hpp \ - /home/pblunsom/packages/include/boost/iterator/detail/facade_iterator_category.hpp \ - /home/pblunsom/packages/include/boost/detail/indirect_traits.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_function.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/false_result.hpp \ - /home/pblunsom/packages/include/boost/type_traits/detail/is_function_ptr_helper.hpp \ - /home/pblunsom/packages/include/boost/type_traits/remove_pointer.hpp \ - /home/pblunsom/packages/include/boost/iterator/detail/enable_if.hpp \ - /home/pblunsom/packages/include/boost/implicit_cast.hpp \ - /home/pblunsom/packages/include/boost/type_traits/add_pointer.hpp \ - /home/pblunsom/packages/include/boost/mpl/always.hpp \ - /home/pblunsom/packages/include/boost/mpl/apply.hpp \ - /home/pblunsom/packages/include/boost/mpl/apply_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/apply_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/apply_wrap.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/has_apply.hpp \ - /home/pblunsom/packages/include/boost/mpl/has_xxx.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/type_wrapper.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/has_xxx.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/msvc_typename.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/has_apply.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/msvc_never_true.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/apply_wrap.hpp \ - /home/pblunsom/packages/include/boost/mpl/lambda.hpp \ - /home/pblunsom/packages/include/boost/mpl/bind.hpp \ - /home/pblunsom/packages/include/boost/mpl/bind_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/bind.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/bind_fwd.hpp \ - /home/pblunsom/packages/include/boost/mpl/next.hpp \ - /home/pblunsom/packages/include/boost/mpl/next_prior.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/common_name_wknd.hpp \ - /home/pblunsom/packages/include/boost/mpl/protect.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/bind.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/full_lambda.hpp \ - /home/pblunsom/packages/include/boost/mpl/quote.hpp \ - /home/pblunsom/packages/include/boost/mpl/void.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/has_type.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/config/bcc.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/quote.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/template_arity.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/template_arity.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/full_lambda.hpp \ - /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/apply.hpp \ - /home/pblunsom/packages/include/boost/range/rend.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/indirect_fun.hpp \ - /home/pblunsom/packages/include/boost/utility/result_of.hpp \ - /home/pblunsom/packages/include/boost/type.hpp \ - /home/pblunsom/packages/include/boost/preprocessor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/library.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/div.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/arithmetic/mul.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/insert.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/push_back.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison/not_equal.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/pop_back.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/deduce_z.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/pop_front.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/push_front.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/remove.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/replace.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/array/reverse.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple/reverse.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison/equal.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison/greater.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison/less.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/comparison/greater_equal.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/config/limits.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/control/expr_if.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/debug.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/debug/assert.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/debug/line.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/facilities.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/facilities/apply.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/detail/is_unary.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/facilities/expand.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/facilities/intercept.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration/local.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/iteration/self.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/append.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/at.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/rest_n.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/cat.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/enum.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/for_each_i.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/for.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/detail/for.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/filter.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/first_n.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/for_each.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/for_each_product.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/to_tuple.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple/to_list.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/size.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/list/transform.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/bitnor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/bitor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/bitxor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/nor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/or.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/logical/xor.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/punctuation.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/punctuation/paren.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/punctuation/paren_if.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/deduce_r.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_params_with_a_default.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_params_with_defaults.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_shifted.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_shifted_binary_params.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_shifted_params.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_trailing.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_trailing_binary_params.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_trailing_params.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/selection.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/selection/max.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/selection/min.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/enum.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/filter.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/first_n.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/detail/split.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/fold_right.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/reverse.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/for_each.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/for_each_i.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/for_each_product.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/insert.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/rest_n.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/pop_back.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/pop_front.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/push_back.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/push_front.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/remove.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/replace.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/subseq.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/to_array.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/seq/to_tuple.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/slot.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple.hpp \ - /home/pblunsom/packages/include/boost/preprocessor/tuple/to_seq.hpp \ - /home/pblunsom/packages/include/boost/utility/detail/result_of_iterate.hpp \ - /home/pblunsom/packages/include/boost/pointee.hpp \ - /home/pblunsom/packages/include/boost/detail/is_incrementable.hpp \ - /home/pblunsom/packages/include/boost/ptr_container/detail/void_ptr_iterator.hpp \ - /home/pblunsom/packages/include/boost/random/uniform_real.hpp \ - /home/pblunsom/packages/include/boost/random/detail/config.hpp \ - /home/pblunsom/packages/include/boost/random/variate_generator.hpp \ - /home/pblunsom/packages/include/boost/random/uniform_01.hpp \ - /home/pblunsom/packages/include/boost/random/detail/pass_through_engine.hpp \ - /home/pblunsom/packages/include/boost/random/detail/ptr_helper.hpp \ - /home/pblunsom/packages/include/boost/random/detail/disable_warnings.hpp \ - /home/pblunsom/packages/include/boost/random/detail/enable_warnings.hpp \ - /home/pblunsom/packages/include/boost/random/detail/uniform_int_float.hpp \ - /home/pblunsom/packages/include/boost/random/mersenne_twister.hpp \ - /home/pblunsom/packages/include/boost/random/linear_congruential.hpp \ - /home/pblunsom/packages/include/boost/random/detail/const_mod.hpp \ - /home/pblunsom/packages/include/boost/random/detail/seed.hpp pyp.hh \ - slice-sampler.h log_add.h mt19937ar.h corpus.hh workers.hh \ - /home/pblunsom/packages/include/boost/bind.hpp \ - /home/pblunsom/packages/include/boost/bind/bind.hpp \ - /home/pblunsom/packages/include/boost/is_placeholder.hpp \ - /home/pblunsom/packages/include/boost/bind/arg.hpp \ - /home/pblunsom/packages/include/boost/visit_each.hpp \ - /home/pblunsom/packages/include/boost/bind/storage.hpp \ - /home/pblunsom/packages/include/boost/bind/bind_template.hpp \ - /home/pblunsom/packages/include/boost/bind/bind_cc.hpp \ - /home/pblunsom/packages/include/boost/bind/bind_mf_cc.hpp \ - /home/pblunsom/packages/include/boost/bind/bind_mf2_cc.hpp \ - /home/pblunsom/packages/include/boost/bind/placeholders.hpp \ - /home/pblunsom/packages/include/boost/thread/thread.hpp \ - /home/pblunsom/packages/include/boost/thread/detail/platform.hpp \ - /home/pblunsom/packages/include/boost/config/requires_threads.hpp \ - /home/pblunsom/packages/include/boost/thread/pthread/thread_data.hpp \ - /home/pblunsom/packages/include/boost/thread/detail/config.hpp \ - /home/pblunsom/packages/include/boost/thread/detail/platform.hpp \ - /home/pblunsom/packages/include/boost/thread/exceptions.hpp \ - /home/pblunsom/packages/include/boost/config/abi_prefix.hpp \ - /home/pblunsom/packages/include/boost/config/abi_suffix.hpp \ - /home/pblunsom/packages/include/boost/enable_shared_from_this.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/enable_shared_from_this.hpp \ - /home/pblunsom/packages/include/boost/smart_ptr/weak_ptr.hpp \ - /home/pblunsom/packages/include/boost/thread/mutex.hpp \ - /home/pblunsom/packages/include/boost/thread/pthread/mutex.hpp \ - /home/pblunsom/packages/include/boost/thread/locks.hpp \ - /home/pblunsom/packages/include/boost/thread/detail/move.hpp \ - /home/pblunsom/packages/include/boost/thread/thread_time.hpp \ - /home/pblunsom/packages/include/boost/date_time/microsec_time_clock.hpp \ - /home/pblunsom/packages/include/boost/date_time/compiler_config.hpp \ - /home/pblunsom/packages/include/boost/date_time/locale_config.hpp \ - /home/pblunsom/packages/include/boost/date_time/c_time.hpp \ - /home/pblunsom/packages/include/boost/date_time/time_clock.hpp \ - /home/pblunsom/packages/include/boost/date_time/filetime_functions.hpp \ - /home/pblunsom/packages/include/boost/date_time/posix_time/posix_time_types.hpp \ - /home/pblunsom/packages/include/boost/date_time/posix_time/ptime.hpp \ - /home/pblunsom/packages/include/boost/date_time/posix_time/posix_time_system.hpp \ - /home/pblunsom/packages/include/boost/date_time/posix_time/posix_time_config.hpp \ - /home/pblunsom/packages/include/boost/config/no_tr1/cmath.hpp \ - /home/pblunsom/packages/include/boost/date_time/time_duration.hpp \ - /home/pblunsom/packages/include/boost/operators.hpp \ - /home/pblunsom/packages/include/boost/date_time/time_defs.hpp \ - /home/pblunsom/packages/include/boost/date_time/special_defs.hpp \ - /home/pblunsom/packages/include/boost/date_time/time_resolution_traits.hpp \ - /home/pblunsom/packages/include/boost/date_time/int_adapter.hpp \ - /home/pblunsom/packages/include/boost/date_time/gregorian/gregorian_types.hpp \ - /home/pblunsom/packages/include/boost/date_time/date.hpp \ - /home/pblunsom/packages/include/boost/date_time/year_month_day.hpp \ - /home/pblunsom/packages/include/boost/date_time/period.hpp \ - /home/pblunsom/packages/include/boost/date_time/gregorian/greg_calendar.hpp \ - /home/pblunsom/packages/include/boost/date_time/gregorian/greg_weekday.hpp \ - /home/pblunsom/packages/include/boost/date_time/constrained_value.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_base_of.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_base_and_derived.hpp \ - /home/pblunsom/packages/include/boost/date_time/date_defs.hpp \ - /home/pblunsom/packages/include/boost/date_time/gregorian/greg_day_of_year.hpp \ - /home/pblunsom/packages/include/boost/date_time/gregorian_calendar.hpp \ - /home/pblunsom/packages/include/boost/date_time/gregorian_calendar.ipp \ - /home/pblunsom/packages/include/boost/date_time/gregorian/greg_ymd.hpp \ - /home/pblunsom/packages/include/boost/date_time/gregorian/greg_day.hpp \ - /home/pblunsom/packages/include/boost/date_time/gregorian/greg_year.hpp \ - /home/pblunsom/packages/include/boost/date_time/gregorian/greg_month.hpp \ - /home/pblunsom/packages/include/boost/date_time/gregorian/greg_duration.hpp \ - /home/pblunsom/packages/include/boost/date_time/date_duration.hpp \ - /home/pblunsom/packages/include/boost/date_time/date_duration_types.hpp \ - /home/pblunsom/packages/include/boost/date_time/gregorian/greg_duration_types.hpp \ - /home/pblunsom/packages/include/boost/date_time/gregorian/greg_date.hpp \ - /home/pblunsom/packages/include/boost/date_time/adjust_functors.hpp \ - /home/pblunsom/packages/include/boost/date_time/wrapping_int.hpp \ - /home/pblunsom/packages/include/boost/date_time/date_generators.hpp \ - /home/pblunsom/packages/include/boost/date_time/date_clock_device.hpp \ - /home/pblunsom/packages/include/boost/date_time/date_iterator.hpp \ - /home/pblunsom/packages/include/boost/date_time/time_system_split.hpp \ - /home/pblunsom/packages/include/boost/date_time/time_system_counted.hpp \ - /home/pblunsom/packages/include/boost/date_time/time.hpp \ - /home/pblunsom/packages/include/boost/date_time/posix_time/date_duration_operators.hpp \ - /home/pblunsom/packages/include/boost/date_time/posix_time/posix_time_duration.hpp \ - /home/pblunsom/packages/include/boost/date_time/posix_time/time_period.hpp \ - /home/pblunsom/packages/include/boost/date_time/time_iterator.hpp \ - /home/pblunsom/packages/include/boost/date_time/dst_rules.hpp \ - /home/pblunsom/packages/include/boost/thread/xtime.hpp \ - /home/pblunsom/packages/include/boost/date_time/posix_time/conversion.hpp \ - /home/pblunsom/packages/include/boost/date_time/gregorian/conversion.hpp \ - /home/pblunsom/packages/include/boost/thread/pthread/timespec.hpp \ - /home/pblunsom/packages/include/boost/thread/pthread/pthread_mutex_scoped_lock.hpp \ - /home/pblunsom/packages/include/boost/optional.hpp \ - /home/pblunsom/packages/include/boost/optional/optional.hpp \ - /home/pblunsom/packages/include/boost/type_traits/type_with_alignment.hpp \ - /home/pblunsom/packages/include/boost/detail/reference_content.hpp \ - /home/pblunsom/packages/include/boost/type_traits/has_nothrow_copy.hpp \ - /home/pblunsom/packages/include/boost/none.hpp \ - /home/pblunsom/packages/include/boost/none_t.hpp \ - /home/pblunsom/packages/include/boost/utility/compare_pointees.hpp \ - /home/pblunsom/packages/include/boost/optional/optional_fwd.hpp \ - /home/pblunsom/packages/include/boost/thread/pthread/condition_variable_fwd.hpp \ - /home/pblunsom/packages/include/boost/thread/detail/thread.hpp \ - /home/pblunsom/packages/include/boost/thread/detail/thread_heap_alloc.hpp \ - /home/pblunsom/packages/include/boost/thread/pthread/thread_heap_alloc.hpp \ - /home/pblunsom/packages/include/boost/thread/detail/thread_interruption.hpp \ - /home/pblunsom/packages/include/boost/thread/detail/thread_group.hpp \ - /home/pblunsom/packages/include/boost/thread/shared_mutex.hpp \ - /home/pblunsom/packages/include/boost/thread/pthread/shared_mutex.hpp \ - /home/pblunsom/packages/include/boost/thread/condition_variable.hpp \ - /home/pblunsom/packages/include/boost/thread/pthread/condition_variable.hpp \ - /home/pblunsom/packages/include/boost/thread/pthread/thread_data.hpp \ - /home/pblunsom/packages/include/boost/thread/future.hpp \ - /home/pblunsom/packages/include/boost/exception_ptr.hpp \ - /home/pblunsom/packages/include/boost/exception/detail/exception_ptr.hpp \ - /home/pblunsom/packages/include/boost/type_traits/is_fundamental.hpp \ - /home/pblunsom/packages/include/boost/thread/condition.hpp timing.h \ - clock_gettime_stub.c contexts_corpus.hh contexts_lexer.h \ - ../../../decoder/dict.h \ - /home/pblunsom/packages/include/boost/functional/hash.hpp \ - /home/pblunsom/packages/include/boost/functional/hash/hash.hpp \ - /home/pblunsom/packages/include/boost/functional/hash/hash_fwd.hpp \ - /home/pblunsom/packages/include/boost/functional/hash/detail/hash_float.hpp \ - /home/pblunsom/packages/include/boost/functional/hash/detail/float_functions.hpp \ - /home/pblunsom/packages/include/boost/functional/hash/detail/limits.hpp \ - /home/pblunsom/packages/include/boost/integer/static_log2.hpp \ - /home/pblunsom/packages/include/boost/functional/hash/detail/hash_float_generic.hpp \ - /home/pblunsom/packages/include/boost/functional/hash/extensions.hpp \ - /home/pblunsom/packages/include/boost/detail/container_fwd.hpp \ - ../../../decoder/wordid.h gzstream.hh -clock_gettime_stub.o: clock_gettime_stub.c -gammadist.o: gammadist.c gammadist.h mt19937ar.h -mt19937ar.o: mt19937ar.c mt19937ar.h diff --git a/gi/pyp-topics/src/mpi-corpus.hh b/gi/pyp-topics/src/mpi-corpus.hh deleted file mode 100644 index f5c478a9..00000000 --- a/gi/pyp-topics/src/mpi-corpus.hh +++ /dev/null @@ -1,69 +0,0 @@ -#ifndef _MPI_CORPUS_HH -#define _MPI_CORPUS_HH - -#include <vector> -#include <string> -#include <map> -#include <tr1/unordered_map> - -#include <boost/ptr_container/ptr_vector.hpp> -#include <boost/mpi/environment.hpp> -#include <boost/mpi/communicator.hpp> - -#include "contexts_corpus.hh" - - -//////////////////////////////////////////////////////////////// -// MPICorpus -//////////////////////////////////////////////////////////////// - -class MPICorpus : public ContextsCorpus { -public: -  MPICorpus() : ContextsCorpus() { -    boost::mpi::communicator world; -    m_rank = world.rank(); -    m_size = world.size(); -    m_start = -1; -    m_end = -1; -  } -  virtual ~MPICorpus() {} - -  virtual unsigned read_contexts(const std::string &filename,  -                                 BackoffGenerator* backoff_gen=0, -                                 bool filter_singeltons=false, -                                 bool binary_contexts=false) { -    unsigned result = ContextsCorpus::read_contexts(filename, backoff_gen, filter_singeltons, binary_contexts); - -    if (m_rank == 0) std::cerr << "\tLoad balancing terms per mpi segment:" << std::endl; -    float segment_size = num_terms() / m_size; -    float term_threshold = segment_size; -    int seen_terms = 0; -    std::vector<int> end_points; -    for (int i=0; i < num_documents(); ++i) { -      seen_terms += m_documents.at(i).size(); -      if (seen_terms >= term_threshold) { -        end_points.push_back(i+1); -        term_threshold += segment_size; -        if (m_rank == 0) std::cerr << "\t\t" << i+1 << ": " <<  seen_terms << " terms, " << 100*seen_terms / (float)num_terms() << "%" << std::endl; -      } -    } -    m_start = (m_rank == 0 ? 0 : end_points.at(m_rank-1)); -    m_end = (m_rank == m_size-1 ? num_documents() : end_points.at(m_rank)); - -    return result; -  } - -  void -  bounds(int* start, int* end) const { -    *start = m_start; -    *end = m_end; -  } - - - -protected: -  int m_rank, m_size; -  int m_start, m_end; -}; - -#endif // _MPI_CORPUS_HH diff --git a/gi/pyp-topics/src/mpi-pyp-topics.cc b/gi/pyp-topics/src/mpi-pyp-topics.cc deleted file mode 100644 index d6e22af6..00000000 --- a/gi/pyp-topics/src/mpi-pyp-topics.cc +++ /dev/null @@ -1,466 +0,0 @@ -#include <boost/mpi/communicator.hpp> - -#include "timing.h" -#include "mpi-pyp-topics.hh" - -//#include <boost/date_time/posix_time/posix_time_types.hpp> -void MPIPYPTopics::sample_corpus(const MPICorpus& corpus, int samples, -                              int freq_cutoff_start, int freq_cutoff_end, -                              int freq_cutoff_interval, -                              int max_contexts_per_document) { -  Timer timer; - -  //int documents = corpus.num_documents(); -  /* -  m_mpi_start = 0; -  m_mpi_end = documents; -  if (m_size != 1) { -      assert(documents < std::numeric_limits<int>::max()); -      m_mpi_start = (documents / m_size) * m_rank; -      if (m_rank == m_size-1) m_mpi_end = documents; -      else m_mpi_end = (documents / m_size)*(m_rank+1); -  } -  */ -  corpus.bounds(&m_mpi_start, &m_mpi_end); -  int local_documents = m_mpi_end - m_mpi_start; - -  if (!m_backoff.get()) { -    m_word_pyps.clear(); -    m_word_pyps.push_back(MPIPYPs()); -  } - -  if (m_am_root) std::cerr << "\n Training with " << m_word_pyps.size()-1 << " backoff level" -    << (m_word_pyps.size()>1 ? ":" : "s:") << std::endl; - -  for (int i=0; i<(int)m_word_pyps.size(); ++i) { -    m_word_pyps.at(i).reserve(m_num_topics); -    for (int j=0; j<m_num_topics; ++j) -      m_word_pyps.at(i).push_back(new MPIPYP<int>(0.5, 1.0)); -  } -  if (m_am_root) std::cerr << std::endl; - -  m_document_pyps.reserve(local_documents); -  //m_document_pyps.reserve(corpus.num_documents()); -  //for (int j=0; j<corpus.num_documents(); ++j) -  for (int j=0; j<local_documents; ++j) -    m_document_pyps.push_back(new PYP<int>(0.5, 1.0)); - -  m_topic_p0 = 1.0/m_num_topics; -  m_term_p0 = 1.0/corpus.num_types(); -  m_backoff_p0 = 1.0/corpus.num_documents(); - -  if (m_am_root) std::cerr << " Documents: " << corpus.num_documents() << "("  -    << local_documents << ")" << " Terms: " << corpus.num_types() << std::endl; - -  int frequency_cutoff = freq_cutoff_start; -  if (m_am_root) std::cerr << " Context frequency cutoff set to " << frequency_cutoff << std::endl; - -  timer.Reset(); -  // Initialisation pass -  int document_id=0, topic_counter=0; -  for (int i=0; i<local_documents; ++i) { -    document_id = i+m_mpi_start; - -  //for (Corpus::const_iterator corpusIt=corpus.begin(); -  //     corpusIt != corpus.end(); ++corpusIt, ++document_id) { -    m_corpus_topics.push_back(DocumentTopics(corpus.at(document_id).size(), 0)); - -    int term_index=0; -    for (Document::const_iterator docIt=corpus.at(document_id).begin(); -         docIt != corpus.at(document_id).end(); ++docIt, ++term_index) { -      topic_counter++; -      Term term = *docIt; - -      // sample a new_topic -      //int new_topic = (topic_counter % m_num_topics); -      int freq = corpus.context_count(term); -      int new_topic = -1; -      if (freq > frequency_cutoff -          && (!max_contexts_per_document || term_index < max_contexts_per_document)) { -        new_topic = sample(i, term); -        //new_topic = document_id % m_num_topics; - -        // add the new topic to the PYPs -        increment(term, new_topic); - -        if (m_use_topic_pyp) { -          F p0 = m_topic_pyp.prob(new_topic, m_topic_p0); -          int table_delta = m_document_pyps.at(i).increment(new_topic, p0); -          if (table_delta) -            m_topic_pyp.increment(new_topic, m_topic_p0, rnd); -        } -        else m_document_pyps.at(i).increment(new_topic, m_topic_p0); -      } - -      m_corpus_topics.at(i).at(term_index) = new_topic; -    } -  } - -  // Synchronise the topic->word counds across the processes. -  synchronise(); - -  if (m_am_root) std::cerr << "  Initialized in " << timer.Elapsed() << " seconds\n"; - -  int* randomDocIndices = new int[local_documents]; -  for (int i = 0; i < local_documents; ++i) -	  randomDocIndices[i] = i; - -  // Sampling phase -  for (int curr_sample=0; curr_sample < samples; ++curr_sample) { -    if (freq_cutoff_interval > 0 && curr_sample != 1 -        && curr_sample % freq_cutoff_interval == 1 -        && frequency_cutoff > freq_cutoff_end) { -      frequency_cutoff--; -      if (m_am_root) std::cerr << "\n Context frequency cutoff set to " << frequency_cutoff << std::endl; -    } - -    if (m_am_root) std::cerr << "\n  -- Sample " << curr_sample << " "; std::cerr.flush(); - -    // Randomize the corpus indexing array -    int tmp; -    int processed_terms=0; -    for (int i = (local_documents-1); i > 0; --i) { -      //i+1 since j \in [0,i] but rnd() \in [0,1) -    	int j = (int)(rnd() * (i+1)); -      assert(j >= 0 && j <= i); -     	tmp = randomDocIndices[i]; -    	randomDocIndices[i] = randomDocIndices[j]; -    	randomDocIndices[j] = tmp; -    } - -    // for each document in the corpus -    for (int rand_doc=0; rand_doc<local_documents; ++rand_doc) { -    	int doc_index = randomDocIndices[rand_doc]; -    	int document_id = doc_index + m_mpi_start; -      const Document& doc = corpus.at(document_id); - -      // for each term in the document -      int term_index=0; -      Document::const_iterator docEnd = doc.end(); -      for (Document::const_iterator docIt=doc.begin(); -           docIt != docEnd; ++docIt, ++term_index) { - -        if (max_contexts_per_document && term_index > max_contexts_per_document) -          break; -         -        Term term = *docIt; -        int freq = corpus.context_count(term); -        if (freq < frequency_cutoff) -          continue; - -        processed_terms++; - -        // remove the prevous topic from the PYPs -        int current_topic = m_corpus_topics.at(doc_index).at(term_index); -        // a negative label mean that term hasn't been sampled yet -        if (current_topic >= 0) { -          decrement(term, current_topic); - -          int table_delta = m_document_pyps.at(doc_index).decrement(current_topic); -          if (m_use_topic_pyp && table_delta < 0) -            m_topic_pyp.decrement(current_topic, rnd); -        } - -        // sample a new_topic -        int new_topic = sample(doc_index, term); - -        // add the new topic to the PYPs -        m_corpus_topics.at(doc_index).at(term_index) = new_topic; -        increment(term, new_topic); - -        if (m_use_topic_pyp) { -          F p0 = m_topic_pyp.prob(new_topic, m_topic_p0); -          int table_delta = m_document_pyps.at(doc_index).increment(new_topic, p0); -          if (table_delta) -            m_topic_pyp.increment(new_topic, m_topic_p0, rnd); -        } -        else m_document_pyps.at(doc_index).increment(new_topic, m_topic_p0); -      } -      if (document_id && document_id % 10000 == 0) { -        if (m_am_root) std::cerr << "."; std::cerr.flush(); -      } -    } -    std::cerr << "|"; std::cerr.flush();   - -    // Synchronise the topic->word counds across the processes. -    synchronise(); - -    if (m_am_root) std::cerr << " ||| sampled " << processed_terms << " terms."; - -    if (curr_sample != 0 && curr_sample % 10 == 0) { -      if (m_am_root) std::cerr << " ||| time=" << (timer.Elapsed() / 10.0) << " sec/sample" << std::endl; -      timer.Reset(); -      if (m_am_root) std::cerr << "     ... Resampling hyperparameters"; std::cerr.flush(); - -      // resample the hyperparamters -      F log_p=0.0; -      for (std::vector<MPIPYPs>::iterator levelIt=m_word_pyps.begin(); -           levelIt != m_word_pyps.end(); ++levelIt) { -        for (MPIPYPs::iterator pypIt=levelIt->begin(); -             pypIt != levelIt->end(); ++pypIt) { -          pypIt->resample_prior(rnd); -          log_p += pypIt->log_restaurant_prob(); -        } -      } - -      for (PYPs::iterator pypIt=m_document_pyps.begin(); -           pypIt != m_document_pyps.end(); ++pypIt) { -        pypIt->resample_prior(rnd); -        log_p += pypIt->log_restaurant_prob(); -      } - -      if (m_use_topic_pyp) { -        m_topic_pyp.resample_prior(rnd); -        log_p += m_topic_pyp.log_restaurant_prob(); -      } - -      std::cerr.precision(10); -      if (m_am_root) std::cerr << " ||| LLH=" << log_p << " ||| resampling time=" << timer.Elapsed() << " sec" << std::endl; -      timer.Reset(); - -      int k=0; -      if (m_am_root) std::cerr << "Topics distribution: "; -      std::cerr.precision(2); -      for (MPIPYPs::iterator pypIt=m_word_pyps.front().begin(); -           pypIt != m_word_pyps.front().end(); ++pypIt, ++k) { -        if (m_am_root && k % 5 == 0) std::cerr << std::endl << '\t'; -        if (m_am_root) std::cerr << "<" << k << ":" << pypIt->num_customers() << "," -          << pypIt->num_types() << "," << m_topic_pyp.prob(k, m_topic_p0) << "> "; -      } -      std::cerr.precision(4); -      if (m_am_root) std::cerr << std::endl; -    } -  } -  delete [] randomDocIndices; -} - -void MPIPYPTopics::synchronise() { -  // Synchronise the topic->word counds across the processes. -  //for (std::vector<MPIPYPs>::iterator levelIt=m_word_pyps.begin(); -  //     levelIt != m_word_pyps.end(); ++levelIt) { -//  std::vector<MPIPYPs>::iterator levelIt=m_word_pyps.begin();  -//  { -//    for (MPIPYPs::iterator pypIt=levelIt->begin(); pypIt != levelIt->end(); ++pypIt) { -    for (size_t label=0; label < m_word_pyps.at(0).size(); ++label) { -      MPIPYP<int>& pyp = m_word_pyps.at(0).at(label); - -      //if (!m_am_root) boost::mpi::communicator().barrier(); -      //std::cerr << "Before Sync Process " << m_rank << ":"; -      //pyp.debug_info(std::cerr); std::cerr << std::endl; -      //if (m_am_root) boost::mpi::communicator().barrier(); - -      MPIPYP<int>::dish_delta_type delta; -      pyp.synchronise(&delta); - -      for (MPIPYP<int>::dish_delta_type::const_iterator it=delta.begin(); it != delta.end(); ++it) { -        int count = it->second; -        if (count > 0) -          for (int i=0; i < count; ++i) increment(it->first, label); -        if (count < 0) -          for (int i=0; i > count; --i) decrement(it->first, label); -      } -      pyp.reset_deltas(); - -      //if (!m_am_root) boost::mpi::communicator().barrier(); -      //std::cerr << "After Sync Process " << m_rank << ":"; -      //pyp.debug_info(std::cerr); std::cerr << std::endl; -      //if (m_am_root) boost::mpi::communicator().barrier(); -    } -//  } -    // Synchronise the hierarchical topic pyp -    MPIPYP<int>::dish_delta_type topic_delta; -    m_topic_pyp.synchronise(&topic_delta); -    for (MPIPYP<int>::dish_delta_type::const_iterator it=topic_delta.begin(); it != topic_delta.end(); ++it) { -      int count = it->second; -      if (count > 0) -        for (int i=0; i < count; ++i)  -          m_topic_pyp.increment(it->first, m_topic_p0, rnd); -      if (count < 0) -        for (int i=0; i > count; --i)  -          m_topic_pyp.decrement(it->first, rnd); -    } -    m_topic_pyp.reset_deltas(); -} - -void MPIPYPTopics::decrement(const Term& term, int topic, int level) { -  //std::cerr << "MPIPYPTopics::decrement(" << term << "," << topic << "," << level << ")" << std::endl; -  m_word_pyps.at(level).at(topic).decrement(term, rnd); -  if (m_backoff.get()) { -    Term backoff_term = (*m_backoff)[term]; -    if (!m_backoff->is_null(backoff_term)) -      decrement(backoff_term, topic, level+1); -  } -} - -void MPIPYPTopics::increment(const Term& term, int topic, int level) { -  //std::cerr << "MPIPYPTopics::increment(" << term << "," << topic << "," << level << ")" << std::endl; -  m_word_pyps.at(level).at(topic).increment(term, word_pyps_p0(term, topic, level), rnd); - -  if (m_backoff.get()) { -    Term backoff_term = (*m_backoff)[term]; -    if (!m_backoff->is_null(backoff_term)) -      increment(backoff_term, topic, level+1); -  } -} - -int MPIPYPTopics::sample(const DocumentId& doc, const Term& term) { -  // First pass: collect probs -  F sum=0.0; -  std::vector<F> sums; -  for (int k=0; k<m_num_topics; ++k) { -    F p_w_k = prob(term, k); - -    F topic_prob = m_topic_p0; -    if (m_use_topic_pyp) topic_prob = m_topic_pyp.prob(k, m_topic_p0); - -    //F p_k_d = m_document_pyps[doc].prob(k, topic_prob); -    F p_k_d = m_document_pyps.at(doc).unnormalised_prob(k, topic_prob); - -    sum += (p_w_k*p_k_d); -    sums.push_back(sum); -  } -  // Second pass: sample a topic -  F cutoff = rnd() * sum; -  for (int k=0; k<m_num_topics; ++k) { -    if (cutoff <= sums[k]) -      return k; -  } -  std::cerr << cutoff << " " << sum << std::endl; -  assert(false); -} - -MPIPYPTopics::F MPIPYPTopics::word_pyps_p0(const Term& term, int topic, int level) const { -  //for (int i=0; i<level+1; ++i) std::cerr << "  "; -  //std::cerr << "MPIPYPTopics::word_pyps_p0(" << term << "," << topic << "," << level << ")" << std::endl; - -  F p0 = m_term_p0; -  if (m_backoff.get()) { -    //static F fudge=m_backoff_p0; // TODO - -    Term backoff_term = (*m_backoff)[term]; -    if (!m_backoff->is_null(backoff_term)) { -      assert (level < m_backoff->order()); -      //p0 = (1.0/(double)m_backoff->terms_at_level(level))*prob(backoff_term, topic, level+1); -      p0 = prob(backoff_term, topic, level+1); -    } -    else -      p0 = m_term_p0; -  } -  //for (int i=0; i<level+1; ++i) std::cerr << "  "; -  //std::cerr << "MPIPYPTopics::word_pyps_p0(" << term << "," << topic << "," << level << ") = " << p0 << std::endl; -  return p0; -} - -MPIPYPTopics::F MPIPYPTopics::prob(const Term& term, int topic, int level) const { -  //for (int i=0; i<level+1; ++i) std::cerr << "  "; -  //std::cerr << "MPIPYPTopics::prob(" << term << "," << topic << "," << level << " " << factor << ")" << std::endl; - -  F p0 = word_pyps_p0(term, topic, level); -  F p_w_k = m_word_pyps.at(level).at(topic).prob(term, p0); - -  //for (int i=0; i<level+1; ++i) std::cerr << "  "; -  //std::cerr << "MPIPYPTopics::prob(" << term << "," << topic << "," << level << ") = " << p_w_k << std::endl; - -  return p_w_k; -} - -int MPIPYPTopics::max_topic() const { -  if (!m_use_topic_pyp) -    return -1; - -  F current_max=0.0; -  int current_topic=-1; -  for (int k=0; k<m_num_topics; ++k) { -    F prob = m_topic_pyp.prob(k, m_topic_p0); -    if (prob > current_max) { -      current_max = prob; -      current_topic = k; -    } -  } -  assert(current_topic >= 0); -  assert(current_max >= 0); -  return current_max; -} - -std::pair<int,MPIPYPTopics::F> MPIPYPTopics::max(const DocumentId& true_doc) const { -  //std::cerr << "MPIPYPTopics::max(" << doc << "," << term << ")" << std::endl; -  // collect probs -  F current_max=0.0; -  DocumentId local_doc = true_doc - m_mpi_start; -  int current_topic=-1; -  for (int k=0; k<m_num_topics; ++k) { -    //F p_w_k = prob(term, k); - -    F topic_prob = m_topic_p0; -    if (m_use_topic_pyp) -      topic_prob = m_topic_pyp.prob(k, m_topic_p0); - -    F prob = 0; -    if (local_doc < 0) prob = topic_prob; -    else               prob = m_document_pyps.at(local_doc).prob(k, topic_prob); - -    if (prob > current_max) { -      current_max = prob; -      current_topic = k; -    } -  } -  assert(current_topic >= 0); -  assert(current_max >= 0); -  return std::make_pair(current_topic, current_max); -} - -std::pair<int,MPIPYPTopics::F> MPIPYPTopics::max(const DocumentId& true_doc, const Term& term) const { -  //std::cerr << "MPIPYPTopics::max(" << doc << "," << term << ")" << std::endl; -  // collect probs -  F current_max=0.0; -  DocumentId local_doc = true_doc - m_mpi_start; -  int current_topic=-1; -  for (int k=0; k<m_num_topics; ++k) { -    F p_w_k = prob(term, k); - -    F topic_prob = m_topic_p0; -    if (m_use_topic_pyp) -      topic_prob = m_topic_pyp.prob(k, m_topic_p0); - -    F p_k_d = 0; -    if (local_doc < 0) p_k_d = topic_prob; -    else               p_k_d = m_document_pyps.at(local_doc).prob(k, topic_prob); - -    F prob = (p_w_k*p_k_d); -    if (prob > current_max) { -      current_max = prob; -      current_topic = k; -    } -  } -  assert(current_topic >= 0); -  assert(current_max >= 0); -  return std::make_pair(current_topic, current_max); -} - -std::ostream& MPIPYPTopics::print_document_topics(std::ostream& out) const { -  for (CorpusTopics::const_iterator corpusIt=m_corpus_topics.begin(); -       corpusIt != m_corpus_topics.end(); ++corpusIt) { -    int term_index=0; -    for (DocumentTopics::const_iterator docIt=corpusIt->begin(); -         docIt != corpusIt->end(); ++docIt, ++term_index) { -      if (term_index) out << " "; -      out << *docIt; -    } -    out << std::endl; -  } -  return out; -} - -std::ostream& MPIPYPTopics::print_topic_terms(std::ostream& out) const { -  for (PYPs::const_iterator pypsIt=m_word_pyps.front().begin(); -       pypsIt != m_word_pyps.front().end(); ++pypsIt) { -    int term_index=0; -    for (PYP<int>::const_iterator termIt=pypsIt->begin(); -         termIt != pypsIt->end(); ++termIt, ++term_index) { -      if (term_index) out << " "; -      out << termIt->first << ":" << termIt->second; -    } -    out << std::endl; -  } -  return out; -} diff --git a/gi/pyp-topics/src/mpi-pyp-topics.hh b/gi/pyp-topics/src/mpi-pyp-topics.hh deleted file mode 100644 index d96bc4e5..00000000 --- a/gi/pyp-topics/src/mpi-pyp-topics.hh +++ /dev/null @@ -1,106 +0,0 @@ -#ifndef MPI_PYP_TOPICS_HH -#define MPI_PYP_TOPICS_HH - -#include <vector> -#include <iostream> - -#include <boost/ptr_container/ptr_vector.hpp> -#include <boost/random/uniform_real.hpp> -#include <boost/random/variate_generator.hpp> -#include <boost/random/mersenne_twister.hpp> -#include <boost/random/inversive_congruential.hpp> -#include <boost/random/linear_congruential.hpp> -#include <boost/random/lagged_fibonacci.hpp> -#include <boost/mpi/environment.hpp> -#include <boost/mpi/communicator.hpp> - - -#include "mpi-pyp.hh" -#include "mpi-corpus.hh" - -class MPIPYPTopics { -public: -  typedef std::vector<int> DocumentTopics; -  typedef std::vector<DocumentTopics> CorpusTopics; -  typedef double F; - -public: -  MPIPYPTopics(int num_topics, bool use_topic_pyp=false, unsigned long seed = 0)  -    : m_num_topics(num_topics), m_word_pyps(1),  -    m_topic_pyp(0.5,1.0), m_use_topic_pyp(use_topic_pyp), -    m_seed(seed), -    uni_dist(0,1), rng(seed == 0 ? (unsigned long)this : seed),  -    rnd(rng, uni_dist), m_mpi_start(-1), m_mpi_end(-1) { -      boost::mpi::communicator m_world; -      m_rank = m_world.rank();  -      m_size = m_world.size(); -      m_am_root = (m_rank == 0); -    } - -  void sample_corpus(const MPICorpus& corpus, int samples, -                     int freq_cutoff_start=0, int freq_cutoff_end=0,  -                     int freq_cutoff_interval=0, -                     int max_contexts_per_document=0); - -  int sample(const DocumentId& doc, const Term& term); -  std::pair<int,F> max(const DocumentId& doc, const Term& term) const; -  std::pair<int,F> max(const DocumentId& doc) const; -  int max_topic() const; - -  void set_backoff(const std::string& filename) { -    m_backoff.reset(new TermBackoff); -    m_backoff->read(filename); -    m_word_pyps.clear(); -    m_word_pyps.resize(m_backoff->order(), MPIPYPs()); -  } -  void set_backoff(TermBackoffPtr backoff) { -    m_backoff = backoff; -    m_word_pyps.clear(); -    m_word_pyps.resize(m_backoff->order(), MPIPYPs()); -  } - -  F prob(const Term& term, int topic, int level=0) const; -  void decrement(const Term& term, int topic, int level=0); -  void increment(const Term& term, int topic, int level=0); - -  std::ostream& print_document_topics(std::ostream& out) const; -  std::ostream& print_topic_terms(std::ostream& out) const; - -  void synchronise(); - -private: -  F word_pyps_p0(const Term& term, int topic, int level) const; - -  int m_num_topics; -  F m_term_p0, m_topic_p0, m_backoff_p0; - -  CorpusTopics m_corpus_topics; -  typedef boost::ptr_vector< PYP<int> > PYPs; -  typedef boost::ptr_vector< MPIPYP<int> > MPIPYPs; -  PYPs m_document_pyps; -  std::vector<MPIPYPs> m_word_pyps; -  MPIPYP<int> m_topic_pyp; -  bool m_use_topic_pyp; - -  unsigned long m_seed; - -  //typedef boost::mt19937 base_generator_type; -  //typedef boost::hellekalek1995 base_generator_type; -  typedef boost::lagged_fibonacci607 base_generator_type; -  typedef boost::uniform_real<> uni_dist_type; -  typedef boost::variate_generator<base_generator_type&, uni_dist_type> gen_type; - -  uni_dist_type uni_dist; -  base_generator_type rng; //this gets the seed -  gen_type rnd; //instantiate: rnd(rng, uni_dist) -                //call: rnd() generates uniform on [0,1) - -  TermBackoffPtr m_backoff; - -  boost::mpi::communicator m_world; -  bool m_am_root; -  int m_rank, m_size; -  int m_mpi_start, m_mpi_end; -}; - -#endif // PYP_TOPICS_HH diff --git a/gi/pyp-topics/src/mpi-pyp.hh b/gi/pyp-topics/src/mpi-pyp.hh deleted file mode 100644 index c2341b9e..00000000 --- a/gi/pyp-topics/src/mpi-pyp.hh +++ /dev/null @@ -1,447 +0,0 @@ -#ifndef _mpipyp_hh -#define _mpipyp_hh - -#include <math.h> -#include <map> -#include <tr1/unordered_map> -//#include <google/sparse_hash_map> - -#include <boost/random/uniform_real.hpp> -#include <boost/random/variate_generator.hpp> -#include <boost/random/mersenne_twister.hpp> -#include <boost/tuple/tuple.hpp> -#include <boost/serialization/map.hpp> -#include <boost/mpi.hpp> -#include <boost/mpi/environment.hpp> -#include <boost/mpi/communicator.hpp> -#include <boost/mpi/operations.hpp> - - -#include "pyp.hh" - -// -// Pitman-Yor process with customer and table tracking -// - -template <typename Dish, typename Hash=std::tr1::hash<Dish> > -class MPIPYP : public PYP<Dish, Hash> { -public: -  typedef std::map<Dish, int> dish_delta_type; - -  MPIPYP(double a, double b, Hash hash=Hash()); - -  template < typename Uniform01 > -    int increment(Dish d, double p0, Uniform01& rnd); -  template < typename Uniform01 > -    int decrement(Dish d, Uniform01& rnd); - -  void clear(); -  void reset_deltas(); - -  void synchronise(dish_delta_type* result); - -private: -  typedef std::map<Dish, typename PYP<Dish,Hash>::TableCounter> table_delta_type; - -  dish_delta_type m_count_delta; -  table_delta_type m_table_delta; -}; - -template <typename Dish, typename Hash> -MPIPYP<Dish,Hash>::MPIPYP(double a, double b, Hash h) -: PYP<Dish,Hash>(a, b, 0, h) {} - -template <typename Dish, typename Hash> -  template <typename Uniform01> -int  -MPIPYP<Dish,Hash>::increment(Dish dish, double p0, Uniform01& rnd) { -  //std::cerr << "-----INCREMENT DISH " << dish << std::endl; -  int delta = 0; -  int table_joined=-1; -  typename PYP<Dish,Hash>::TableCounter &tc = PYP<Dish,Hash>::_dish_tables[dish]; - -  // seated on a new or existing table? -  int c = PYP<Dish,Hash>::count(dish);  -  int t = PYP<Dish,Hash>::num_tables(dish);  -  int T = PYP<Dish,Hash>::num_tables(); -  double& a = PYP<Dish,Hash>::_a; -  double& b = PYP<Dish,Hash>::_b; -  double pshare = (c > 0) ? (c - a*t) : 0.0; -  double pnew = (b + a*T) * p0; -  if (pshare < 0.0) { -    std::cerr << pshare << " " << c << " " << a << " " << t << std::endl; -    assert(false); -  } - -  if (rnd() < pnew / (pshare + pnew)) { -    // assign to a new table -    tc.tables += 1; -    tc.table_histogram[1] += 1; -    PYP<Dish,Hash>::_total_tables += 1; -    delta = 1; -    table_joined = 1; -  } -  else { -    // randomly assign to an existing table -    // remove constant denominator from inner loop -    double r = rnd() * (c - a*t); -    for (std::map<int,int>::iterator -         hit = tc.table_histogram.begin(); -         hit != tc.table_histogram.end(); ++hit) { -      r -= ((hit->first - a) * hit->second); -      if (r <= 0) { -        tc.table_histogram[hit->first+1] += 1; -        hit->second -= 1; -        table_joined = hit->first+1; -        if (hit->second == 0) -          tc.table_histogram.erase(hit); -        break; -      } -    } -    if (r > 0) { -      std::cerr << r << " " << c << " " << a << " " << t << std::endl; -      assert(false); -    } -    delta = 0; -  } - -  std::tr1::unordered_map<Dish,int,Hash>::operator[](dish) += 1; -  //google::sparse_hash_map<Dish,int,Hash>::operator[](dish) += 1; -  PYP<Dish,Hash>::_total_customers += 1; - -  // MPI Delta handling -  // track the customer entering -  typename dish_delta_type::iterator customer_it;  -  bool customer_insert_result;  -  boost::tie(customer_it, customer_insert_result)  -    = m_count_delta.insert(std::make_pair(dish,0));  - -  customer_it->second += 1; -  if (customer_it->second == 0) -    m_count_delta.erase(customer_it); - -  // increment the histogram bar for the table joined -  /* -  typename PYP<Dish,Hash>::TableCounter &delta_tc = m_table_delta[dish]; - -  std::map<int,int> &histogram = delta_tc.table_histogram; -  assert (table_joined > 0); - -  typename std::map<int,int>::iterator table_it; bool table_insert_result;  -  boost::tie(table_it, table_insert_result) = histogram.insert(std::make_pair(table_joined,0));  -  table_it->second += 1; -  if (delta == 0) { -    // decrement the histogram bar for the table left  -    typename std::map<int,int>::iterator left_table_it;  -    boost::tie(left_table_it, table_insert_result)  -      = histogram.insert(std::make_pair(table_joined-1,0));  -    left_table_it->second -= 1; -    if (left_table_it->second == 0) histogram.erase(left_table_it); -  } -  else delta_tc.tables += 1; - -  if (table_it->second == 0) histogram.erase(table_it); - -    //std::cerr << "Added (" << delta << ") " << dish << " to table " << table_joined << "\n";  -    //std::cerr << "Dish " << dish << " has " << count(dish) << " customers, and is sitting at " << PYP<Dish,Hash>::num_tables(dish) << " tables.\n";  -    //for (std::map<int,int>::const_iterator  -    //     hit = delta_tc.table_histogram.begin(); -    //     hit != delta_tc.table_histogram.end(); ++hit) { -    //  std::cerr << "    " << hit->second << " tables with " << hit->first << " customers." << std::endl;  -    //} -    //std::cerr << "Added (" << delta << ") " << dish << " to table " << table_joined << "\n";  -    //std::cerr << "Dish " << dish << " has " << count(dish) << " customers, and is sitting at " << PYP<Dish,Hash>::num_tables(dish) << " tables.\n";  -    int x_num_customers=0, x_num_table=0; -    for (std::map<int,int>::const_iterator  -         hit = delta_tc.table_histogram.begin(); -         hit != delta_tc.table_histogram.end(); ++hit) { -      x_num_table += hit->second; -      x_num_customers += (hit->second*hit->first); -    } -    int tmp_c = PYP<Dish,Hash>::count(dish); -    int tmp_t = PYP<Dish,Hash>::num_tables(dish); -    assert (x_num_customers <= tmp_c);  -    assert (x_num_table <= tmp_t);  - -  if (delta_tc.table_histogram.empty()) { -    assert (delta_tc.tables == 0); -    m_table_delta.erase(dish); -  } -  */ - -  //PYP<Dish,Hash>::debug_info(std::cerr); -  //std::cerr << "   Dish " << dish << " has count " << PYP<Dish,Hash>::count(dish) << " tables " << PYP<Dish,Hash>::num_tables(dish) << std::endl; - -  return delta; -} - -template <typename Dish, typename Hash> -  template <typename Uniform01> -int  -MPIPYP<Dish,Hash>::decrement(Dish dish, Uniform01& rnd) -{ -  //std::cerr << "-----DECREMENT DISH " << dish << std::endl; -  typename std::tr1::unordered_map<Dish, int>::iterator dcit = find(dish); -  //typename google::sparse_hash_map<Dish, int>::iterator dcit = find(dish); -  if (dcit == PYP<Dish,Hash>::end()) { -    std::cerr << dish << std::endl; -    assert(false); -  }  - -  int delta = 0, table_left=-1; - -  typename std::tr1::unordered_map<Dish, typename PYP<Dish,Hash>::TableCounter>::iterator dtit  -    = PYP<Dish,Hash>::_dish_tables.find(dish); -  //typename google::sparse_hash_map<Dish, TableCounter>::iterator dtit = _dish_tables.find(dish); -  if (dtit == PYP<Dish,Hash>::_dish_tables.end()) { -    std::cerr << dish << std::endl; -    assert(false); -  }  -  typename PYP<Dish,Hash>::TableCounter &tc = dtit->second; - -  double r = rnd() * PYP<Dish,Hash>::count(dish); -  for (std::map<int,int>::iterator hit = tc.table_histogram.begin(); -       hit != tc.table_histogram.end(); ++hit) { -    r -= (hit->first * hit->second); -    if (r <= 0) { -      table_left = hit->first; -      if (hit->first > 1) { -        tc.table_histogram[hit->first-1] += 1; -      } -      else { -        delta = -1; -        tc.tables -= 1; -        PYP<Dish,Hash>::_total_tables -= 1; -      } - -      hit->second -= 1; -      if (hit->second == 0) tc.table_histogram.erase(hit); -      break; -    } -  } -  if (r > 0) { -    std::cerr << r << " " << PYP<Dish,Hash>::count(dish) << " " << PYP<Dish,Hash>::_a << " "  -      << PYP<Dish,Hash>::num_tables(dish) << std::endl; -    assert(false); -  } - -  // remove the customer -  dcit->second -= 1; -  PYP<Dish,Hash>::_total_customers -= 1; -  assert(dcit->second >= 0); -  if (dcit->second == 0) { -    PYP<Dish,Hash>::erase(dcit); -    PYP<Dish,Hash>::_dish_tables.erase(dtit); -  } - -  // MPI Delta processing -  typename dish_delta_type::iterator it;  -  bool insert_result;  -  boost::tie(it, insert_result) = m_count_delta.insert(std::make_pair(dish,0));  -  it->second -= 1; -  if (it->second == 0) m_count_delta.erase(it); - -  assert (table_left > 0); -  typename PYP<Dish,Hash>::TableCounter& delta_tc = m_table_delta[dish]; -  if (table_left > 1) { -    std::map<int,int>::iterator tit; -    boost::tie(tit, insert_result) = delta_tc.table_histogram.insert(std::make_pair(table_left-1,0)); -    tit->second += 1; -    if (tit->second == 0) delta_tc.table_histogram.erase(tit); -  } -  else delta_tc.tables -= 1; - -  std::map<int,int>::iterator tit; -  boost::tie(tit, insert_result) = delta_tc.table_histogram.insert(std::make_pair(table_left,0)); -  tit->second -= 1; -  if (tit->second == 0) delta_tc.table_histogram.erase(tit); - -  //  std::cerr << "Dish " << dish << " has " << count(dish) << " customers, and is sitting at " << PYP<Dish,Hash>::num_tables(dish) << " tables.\n";  -  //  for (std::map<int,int>::const_iterator  -  //       hit = delta_tc.table_histogram.begin(); -  //       hit != delta_tc.table_histogram.end(); ++hit) { -  //    std::cerr << "    " << hit->second << " tables with " << hit->first << " customers." << std::endl;  -  //  } -    int x_num_customers=0, x_num_table=0; -    for (std::map<int,int>::const_iterator  -         hit = delta_tc.table_histogram.begin(); -         hit != delta_tc.table_histogram.end(); ++hit) { -      x_num_table += hit->second; -      x_num_customers += (hit->second*hit->first); -    } -    int tmp_c = PYP<Dish,Hash>::count(dish); -    int tmp_t = PYP<Dish,Hash>::num_tables(dish); -    assert (x_num_customers <= tmp_c);  -    assert (x_num_table <= tmp_t);  - -  if (delta_tc.table_histogram.empty()) { -  //  std::cerr << "   DELETING " << dish << std::endl; -    assert (delta_tc.tables == 0); -    m_table_delta.erase(dish); -  } - -  //PYP<Dish,Hash>::debug_info(std::cerr); -  //std::cerr << "   Dish " << dish << " has count " << PYP<Dish,Hash>::count(dish) << " tables " << PYP<Dish,Hash>::num_tables(dish) << std::endl; -  return delta; -} - -template <typename Dish, typename Hash> -void  -MPIPYP<Dish,Hash>::clear() { -  PYP<Dish,Hash>::clear(); -  reset_deltas(); -} - -template <typename Dish, typename Hash> -void  -MPIPYP<Dish,Hash>::reset_deltas() {  -  m_count_delta.clear();  -  m_table_delta.clear(); -} - -template <typename Dish> -struct sum_maps { -  typedef std::map<Dish,int> map_type; -  map_type& operator() (map_type& l, map_type const & r) const { -    for (typename map_type::const_iterator it=r.begin(); it != r.end(); it++) -      l[it->first] += it->second; -    return l; -  } -}; - -template <typename Dish> -struct subtract_maps { -  typedef std::map<Dish,int> map_type; -  map_type& operator() (map_type& l, map_type const & r) const { -    for (typename map_type::const_iterator it=r.begin(); it != r.end(); it++) -      l[it->first] -= it->second; -    return l; -  } -}; - -// Needed Boost definitions -namespace boost {  -  namespace mpi { -    template <> -    struct is_commutative< sum_maps<int>, std::map<int,int> > : mpl::true_ {}; -  } - -  namespace serialization { -    template<class Archive> -    void serialize(Archive & ar, PYP<int>::TableCounter& t, const unsigned int version) { -      ar & t.table_histogram; -      ar & t.tables; -    } - -  } // namespace serialization -} // namespace boost - -template <typename A, typename B, typename C> -struct triple { -  triple() {} -  triple(const A& a, const B& b, const C& c) : first(a), second(b), third(c) {} -  A first; -  B second; -  C third; - -  template<class Archive> -  void serialize(Archive &ar, const unsigned int version){ -      ar & first; -      ar & second; -      ar & third; -  } -}; - -BOOST_IS_BITWISE_SERIALIZABLE(MPIPYP<int>::dish_delta_type) -BOOST_CLASS_TRACKING(MPIPYP<int>::dish_delta_type,track_never) - -template <typename Dish, typename Hash> -void  -MPIPYP<Dish,Hash>::synchronise(dish_delta_type* result) { -  boost::mpi::communicator world;  -  //int rank = world.rank(), size = world.size(); - -  boost::mpi::all_reduce(world, m_count_delta, *result, sum_maps<Dish>()); -  subtract_maps<Dish>()(*result, m_count_delta); -  -/* -  // communicate the customer count deltas -  dish_delta_type global_dish_delta; -  boost::mpi::all_reduce(world, m_count_delta, global_dish_delta, sum_maps<Dish>()); - -  // update this restaurant -  for (typename dish_delta_type::const_iterator it=global_dish_delta.begin();  -       it != global_dish_delta.end(); ++it) { -    int global_delta = it->second - m_count_delta[it->first]; -    if (global_delta == 0) continue; -    typename std::tr1::unordered_map<Dish,int,Hash>::iterator dit; bool inserted; -    boost::tie(dit, inserted)  -      = std::tr1::unordered_map<Dish,int,Hash>::insert(std::make_pair(it->first, 0)); -    dit->second += global_delta; -    assert(dit->second >= 0); -    if (dit->second == 0) { -      std::tr1::unordered_map<Dish,int,Hash>::erase(dit); -    } - -    PYP<Dish,Hash>::_total_customers += (it->second - m_count_delta[it->first]); -    int tmp = PYP<Dish,Hash>::_total_customers; -    assert(tmp >= 0); -    //std::cerr << "Process " << rank << " adding " <<  (it->second - m_count_delta[it->first]) << " of customer " << it->first << std::endl; -  } -*/ -/* -  // communicate the table count deltas -  for (int process = 0; process < size; ++process) { -    typename std::vector< triple<Dish, int, int> > message; -    if (rank == process) { -      // broadcast deltas -      for (typename table_delta_type::const_iterator dish_it=m_table_delta.begin();  -           dish_it != m_table_delta.end(); ++dish_it) { -        //assert (dish_it->second.tables > 0); -        for (std::map<int,int>::const_iterator it=dish_it->second.table_histogram.begin();  -             it != dish_it->second.table_histogram.end(); ++it) { -          triple<Dish, int, int> m(dish_it->first, it->first, it->second); -          message.push_back(m); -        } -        // append a special message with the total table delta for this dish -        triple<Dish, int, int> m(dish_it->first, -1, dish_it->second.tables); -        message.push_back(m); -      } -      boost::mpi::broadcast(world, message, process); -    } -    else { -      // receive deltas -      boost::mpi::broadcast(world, message, process); -      for (typename std::vector< triple<Dish, int, int> >::const_iterator it=message.begin(); it != message.end(); ++it) { -        typename PYP<Dish,Hash>::TableCounter& tc = PYP<Dish,Hash>::_dish_tables[it->first]; -        if (it->second >= 0) { -          std::map<int,int>::iterator tit; bool inserted; -          boost::tie(tit, inserted) = tc.table_histogram.insert(std::make_pair(it->second, 0)); -          tit->second += it->third; -          if (tit->second < 0) { -            std::cerr << tit->first << " " << tit->second << " " << it->first << " " << it->second << " " << it->third << std::endl; -            assert(tit->second >= 0); -          } -          if (tit->second == 0) { -            tc.table_histogram.erase(tit); -          } -        } -        else { -          tc.tables += it->third; -          PYP<Dish,Hash>::_total_tables += it->third; -          assert(tc.tables >= 0); -          if (tc.tables == 0) assert(tc.table_histogram.empty()); -          if (tc.table_histogram.empty()) { -            assert (tc.tables == 0); -            PYP<Dish,Hash>::_dish_tables.erase(it->first); -          } -        } -      } -    } -  } -*/ - -//  reset_deltas(); -} - -#endif diff --git a/gi/pyp-topics/src/mpi-train-contexts.cc b/gi/pyp-topics/src/mpi-train-contexts.cc deleted file mode 100644 index e05e0eac..00000000 --- a/gi/pyp-topics/src/mpi-train-contexts.cc +++ /dev/null @@ -1,201 +0,0 @@ -// STL -#include <iostream> -#include <fstream> -#include <algorithm> -#include <iterator> - -// Boost -#include <boost/program_options/parsers.hpp> -#include <boost/program_options/variables_map.hpp> -#include <boost/scoped_ptr.hpp> -#include <boost/mpi/environment.hpp> -#include <boost/mpi/communicator.hpp> -#include <boost/lexical_cast.hpp> - -// Local -#include "mpi-pyp-topics.hh" -#include "corpus.hh" -#include "mpi-corpus.hh" -#include "gzstream.hh" - -static const char *REVISION = "$Rev: 170 $"; - -// Namespaces -using namespace boost; -using namespace boost::program_options; -using namespace std; - -int main(int argc, char **argv) -{ -  mpi::environment env(argc, argv); -  mpi::communicator world; -  int rank = world.rank(); -  bool am_root = (rank==0); -  if (am_root) cout << "Pitman Yor topic models: Copyright 2010 Phil Blunsom\n"; -  if (am_root) std::cout << "I am process " << world.rank() << " of " << world.size() << "." << std::endl; -  if (am_root) cout << REVISION << '\n' <<endl; - -  //////////////////////////////////////////////////////////////////////////////////////////// -  // Command line processing -  variables_map vm;  - -  // Command line processing -  { -    options_description cmdline_specific("Command line specific options"); -    cmdline_specific.add_options() -      ("help,h", "print help message") -      ("config,c", value<string>(), "config file specifying additional command line options") -      ; -    options_description config_options("Allowed options"); -    config_options.add_options() -      ("help,h", "print help message") -      ("data,d", value<string>(), "file containing the documents and context terms") -      ("topics,t", value<int>()->default_value(50), "number of topics") -      ("document-topics-out,o", value<string>(), "file to write the document topics to") -      ("default-topics-out", value<string>(), "file to write default term topic assignments.") -      ("topic-words-out,w", value<string>(), "file to write the topic word distribution to") -      ("samples,s", value<int>()->default_value(10), "number of sampling passes through the data") -      ("backoff-type", value<string>(), "backoff type: none|simple") -//      ("filter-singleton-contexts", "filter singleton contexts") -      ("hierarchical-topics", "Use a backoff hierarchical PYP as the P0 for the document topics distribution.") -      ("binary-counts,b", "Use binary rather than integer counts for contexts.") -      ("freq-cutoff-start", value<int>()->default_value(0), "initial frequency cutoff.") -      ("freq-cutoff-end", value<int>()->default_value(0), "final frequency cutoff.") -      ("freq-cutoff-interval", value<int>()->default_value(0), "number of iterations between frequency decrement.") -      ("max-contexts-per-document", value<int>()->default_value(0), "Only sample the n most frequent contexts for a document.") -      ; - -    cmdline_specific.add(config_options); - -    store(parse_command_line(argc, argv, cmdline_specific), vm);  -    notify(vm); - -    if (vm.count("config") > 0) { -      ifstream config(vm["config"].as<string>().c_str()); -      store(parse_config_file(config, config_options), vm);  -    } - -    if (vm.count("help")) {  -      cout << cmdline_specific << "\n";  -      return 1;  -    } -  } -  //////////////////////////////////////////////////////////////////////////////////////////// - -  if (!vm.count("data")) { -    cerr << "Please specify a file containing the data." << endl; -    return 1; -  } - -  // seed the random number generator: 0 = automatic, specify value otherwise -  unsigned long seed = 0;  -  MPIPYPTopics model(vm["topics"].as<int>(), vm.count("hierarchical-topics"), seed); - -  // read the data -  BackoffGenerator* backoff_gen=0; -  if (vm.count("backoff-type")) { -    if (vm["backoff-type"].as<std::string>() == "none") { -      backoff_gen = 0; -    } -    else if (vm["backoff-type"].as<std::string>() == "simple") { -      backoff_gen = new SimpleBackoffGenerator(); -    } -    else { -     cerr << "Backoff type (--backoff-type) must be one of none|simple." <<endl; -      return(1); -    } -  } - -  //ContextsCorpus contexts_corpus; -  MPICorpus contexts_corpus; -  contexts_corpus.read_contexts(vm["data"].as<string>(), backoff_gen, /*vm.count("filter-singleton-contexts")*/ false, vm.count("binary-counts")); -  int mpi_start = 0, mpi_end = 0; -  contexts_corpus.bounds(&mpi_start, &mpi_end); -  std::cerr << "\tProcess " << rank << " has documents " << mpi_start << " -> " << mpi_end << "." << std::endl; - -  model.set_backoff(contexts_corpus.backoff_index()); - -  if (backoff_gen)  -    delete backoff_gen; - -  // train the sampler -  model.sample_corpus(contexts_corpus, vm["samples"].as<int>(), -                      vm["freq-cutoff-start"].as<int>(), -                      vm["freq-cutoff-end"].as<int>(), -                      vm["freq-cutoff-interval"].as<int>(), -                      vm["max-contexts-per-document"].as<int>()); - -  if (vm.count("document-topics-out")) { -    std::ofstream documents_out((vm["document-topics-out"].as<string>() + ".pyp-process-" + boost::lexical_cast<std::string>(rank)).c_str()); -    //int documents = contexts_corpus.num_documents(); -    /* -    int mpi_start = 0, mpi_end = documents; -    if (world.size() != 1) { -      mpi_start = (documents / world.size()) * rank; -      if (rank == world.size()-1) mpi_end = documents; -      else mpi_end = (documents / world.size())*(rank+1); -    } -    */ - -    map<int,int> all_terms; -    for (int document_id=mpi_start; document_id<mpi_end; ++document_id) { -      assert (document_id < contexts_corpus.num_documents()); -      const Document& doc = contexts_corpus.at(document_id); -      vector<int> unique_terms; -      for (Document::const_iterator docIt=doc.begin(); docIt != doc.end(); ++docIt) { -        if (unique_terms.empty() || *docIt != unique_terms.back()) -          unique_terms.push_back(*docIt); -        // increment this terms frequency -        pair<map<int,int>::iterator,bool> insert_result = all_terms.insert(make_pair(*docIt,1)); -        if (!insert_result.second)  -          all_terms[*docIt] = all_terms[*docIt] + 1; -      } -      documents_out << contexts_corpus.key(document_id) << '\t'; -      documents_out << model.max(document_id).first << " " << doc.size() << " ||| "; -      for (std::vector<int>::const_iterator termIt=unique_terms.begin(); termIt != unique_terms.end(); ++termIt) { -        if (termIt != unique_terms.begin()) -          documents_out << " ||| "; -        vector<std::string> strings = contexts_corpus.context2string(*termIt); -        copy(strings.begin(), strings.end(),ostream_iterator<std::string>(documents_out, " ")); -        std::pair<int,MPIPYPTopics::F> maxinfo = model.max(document_id, *termIt); -        documents_out << "||| C=" << maxinfo.first << " P=" << maxinfo.second; -      } -      documents_out <<endl; -    } -    documents_out.close(); -    world.barrier(); - -    if (am_root) { -      ogzstream root_documents_out(vm["document-topics-out"].as<string>().c_str()); -      for (int p=0; p < world.size(); ++p) { -        std::string rank_p_prefix((vm["document-topics-out"].as<string>() + ".pyp-process-" + boost::lexical_cast<std::string>(p)).c_str()); -        std::ifstream rank_p_trees_istream(rank_p_prefix.c_str(), std::ios_base::binary); -        root_documents_out << rank_p_trees_istream.rdbuf(); -        rank_p_trees_istream.close(); -        remove((rank_p_prefix).c_str()); -      } -      root_documents_out.close(); -    } - -    if (am_root && vm.count("default-topics-out")) { -      ofstream default_topics(vm["default-topics-out"].as<string>().c_str()); -      default_topics << model.max_topic() <<endl; -      for (std::map<int,int>::const_iterator termIt=all_terms.begin(); termIt != all_terms.end(); ++termIt) { -        vector<std::string> strings = contexts_corpus.context2string(termIt->first); -        default_topics << model.max(-1, termIt->first).first << " ||| " << termIt->second << " ||| "; -        copy(strings.begin(), strings.end(),ostream_iterator<std::string>(default_topics, " ")); -        default_topics <<endl; -      } -    } -  } - -  if (am_root && vm.count("topic-words-out")) { -    ogzstream topics_out(vm["topic-words-out"].as<string>().c_str()); -    model.print_topic_terms(topics_out); -    topics_out.close(); -  } - -  cout <<endl; - -  return 0; -} diff --git a/gi/pyp-topics/src/mt19937ar.c b/gi/pyp-topics/src/mt19937ar.c deleted file mode 100644 index 6551ea39..00000000 --- a/gi/pyp-topics/src/mt19937ar.c +++ /dev/null @@ -1,194 +0,0 @@ -/*  -   A C-program for MT19937, with initialization improved 2002/1/26. -   Coded by Takuji Nishimura and Makoto Matsumoto. - -   Before using, initialize the state by using mt_init_genrand(seed)   -   or mt_init_by_array(init_key, key_length). - -   Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, -   All rights reserved.                           - -   Redistribution and use in source and binary forms, with or without -   modification, are permitted provided that the following conditions -   are met: - -     1. Redistributions of source code must retain the above copyright -        notice, this list of conditions and the following disclaimer. - -     2. Redistributions in binary form must reproduce the above copyright -        notice, this list of conditions and the following disclaimer in the -        documentation and/or other materials provided with the distribution. - -     3. The names of its contributors may not be used to endorse or promote  -        products derived from this software without specific prior written  -        permission. - -   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR -   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - -   Any feedback is very welcome. -   http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html -   email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space) -*/ - -#include "mt19937ar.h"   /* XXX MJ 17th March 2006 */ - -/* Period parameters */   -#define N 624 -#define M 397 -#define MATRIX_A 0x9908b0dfUL   /* constant vector a */ -#define UPPER_MASK 0x80000000UL /* most significant w-r bits */ -#define LOWER_MASK 0x7fffffffUL /* least significant r bits */ - -static unsigned long mt[N]; /* the array for the state vector  */ -static int mti=N+1; /* mti==N+1 means mt[N] is not initialized */ - -/* initializes mt[N] with a seed */ -void mt_init_genrand(unsigned long s) -{ -    mt[0]= s & 0xffffffffUL; -    for (mti=1; mti<N; mti++) { -        mt[mti] =  -	    (1812433253UL * (mt[mti-1] ^ (mt[mti-1] >> 30)) + mti);  -        /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */ -        /* In the previous versions, MSBs of the seed affect   */ -        /* only MSBs of the array mt[].                        */ -        /* 2002/01/09 modified by Makoto Matsumoto             */ -        mt[mti] &= 0xffffffffUL; -        /* for >32 bit machines */ -    } -} - -/* initialize by an array with array-length */ -/* init_key is the array for initializing keys */ -/* key_length is its length */ -/* slight change for C++, 2004/2/26 */ -void mt_init_by_array(unsigned long init_key[], int key_length) -{ -    int i, j, k; -    mt_init_genrand(19650218UL); -    i=1; j=0; -    k = (N>key_length ? N : key_length); -    for (; k; k--) { -        mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 30)) * 1664525UL)) -          + init_key[j] + j; /* non linear */ -        mt[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */ -        i++; j++; -        if (i>=N) { mt[0] = mt[N-1]; i=1; } -        if (j>=key_length) j=0; -    } -    for (k=N-1; k; k--) { -        mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 30)) * 1566083941UL)) -          - i; /* non linear */ -        mt[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */ -        i++; -        if (i>=N) { mt[0] = mt[N-1]; i=1; } -    } - -    mt[0] = 0x80000000UL; /* MSB is 1; assuring non-zero initial array */  -} - -/* generates a random number on [0,0xffffffff]-interval */ -unsigned long mt_genrand_int32(void) -{ -    unsigned long y; -    static unsigned long mag01[2]={0x0UL, MATRIX_A}; -    /* mag01[x] = x * MATRIX_A  for x=0,1 */ - -    if (mti >= N) { /* generate N words at one time */ -        int kk; - -        if (mti == N+1)   /* if mt_init_genrand() has not been called, */ -            mt_init_genrand(5489UL); /* a default initial seed is used */ - -        for (kk=0;kk<N-M;kk++) { -            y = (mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK); -            mt[kk] = mt[kk+M] ^ (y >> 1) ^ mag01[y & 0x1UL]; -        } -        for (;kk<N-1;kk++) { -            y = (mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK); -            mt[kk] = mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & 0x1UL]; -        } -        y = (mt[N-1]&UPPER_MASK)|(mt[0]&LOWER_MASK); -        mt[N-1] = mt[M-1] ^ (y >> 1) ^ mag01[y & 0x1UL]; - -        mti = 0; -    } -   -    y = mt[mti++]; - -    /* Tempering */ -    y ^= (y >> 11); -    y ^= (y << 7) & 0x9d2c5680UL; -    y ^= (y << 15) & 0xefc60000UL; -    y ^= (y >> 18); - -    return y; -} - -/* generates a random number on [0,0x7fffffff]-interval */ -long mt_genrand_int31(void) -{ -    return (long)( mt_genrand_int32()>>1); -} - -/* generates a random number on [0,1]-real-interval */ -double mt_genrand_real1(void) -{ -    return mt_genrand_int32()*(1.0/4294967295.0);  -    /* divided by 2^32-1 */  -} - -/* generates a random number on [0,1)-real-interval */ -double mt_genrand_real2(void) -{ -    return mt_genrand_int32()*(1.0/4294967296.0);  -    /* divided by 2^32 */ -} - -/* generates a random number on (0,1)-real-interval */ -double mt_genrand_real3(void) -{ -    return (((double) mt_genrand_int32()) + 0.5)*(1.0/4294967296.0);  -    /* divided by 2^32 */ -} - -/* generates a random number on [0,1) with 53-bit resolution*/ -double mt_genrand_res53(void)  -{  -    unsigned long a=mt_genrand_int32()>>5, b=mt_genrand_int32()>>6;  -    return(a*67108864.0+b)*(1.0/9007199254740992.0);  -}  -/* These real versions are due to Isaku Wada, 2002/01/09 added */ - -/* -#include <stdio.h> - -int main(void) -{ -    int i; -    unsigned long init[4]={0x123, 0x234, 0x345, 0x456}, length=4; -    mt_init_by_array(init, length); -    printf("1000 outputs of genrand_int32()\n"); -    for (i=0; i<1000; i++) { -      printf("%10lu ", mt_genrand_int32()); -      if (i%5==4) printf("\n"); -    } -    printf("\n1000 outputs of genrand_real2()\n"); -    for (i=0; i<1000; i++) { -      printf("%10.8f ", mt_genrand_real2()); -      if (i%5==4) printf("\n"); -    } -    return 0; -} -*/ diff --git a/gi/pyp-topics/src/mt19937ar.h b/gi/pyp-topics/src/mt19937ar.h deleted file mode 100644 index caab4045..00000000 --- a/gi/pyp-topics/src/mt19937ar.h +++ /dev/null @@ -1,44 +0,0 @@ -/* mt19937ar.h - * - * Mark Johnson, 17th March 2006 - */ - -#ifndef MT19937AR_H -#define MT19937AR_H - -#ifdef __cplusplus -extern "C" { -#endif - -  /* initializes mt[N] with a seed */ -  void mt_init_genrand(unsigned long s); - -  /* initialize by an array with array-length */ -  /* init_key is the array for initializing keys */ -  /* key_length is its length */ -  /* slight change for C++, 2004/2/26 */ -  void mt_init_by_array(unsigned long init_key[], int key_length); - -  /* generates a random number on [0,0xffffffff]-interval */ -  unsigned long mt_genrand_int32(void); - -  /* generates a random number on [0,0x7fffffff]-interval */ -  long mt_genrand_int31(void); - -  /* generates a random number on [0,1]-real-interval */ -  double mt_genrand_real1(void); - -  /* generates a random number on [0,1)-real-interval */ -  double mt_genrand_real2(void); - -  /* generates a random number on (0,1)-real-interval */ -  double mt_genrand_real3(void); - -  /* generates a random number on [0,1) with 53-bit resolution*/ -  double mt_genrand_res53(void); - -#ifdef __cplusplus -}; -#endif - -#endif /* MT19937AR_H */ diff --git a/gi/pyp-topics/src/pyp-topics.cc b/gi/pyp-topics/src/pyp-topics.cc deleted file mode 100644 index 4de52fd7..00000000 --- a/gi/pyp-topics/src/pyp-topics.cc +++ /dev/null @@ -1,499 +0,0 @@ -#include "timing.h" -#include "pyp-topics.hh" -#include "contexts_corpus.hh" - -//Dict const *dict; - -//#include <boost/date_time/posix_time/posix_time_types.hpp> -void PYPTopics::sample_corpus(const Corpus& corpus, int samples, -                              int freq_cutoff_start, int freq_cutoff_end, -                              int freq_cutoff_interval, -                              int max_contexts_per_document, -                              F temp_start, F temp_end) { -  Timer timer; -  //dict = &((ContextsCorpus*) &corpus)->dict(); - -  if (!m_backoff.get()) { -    m_word_pyps.clear(); -    m_word_pyps.push_back(PYPs()); -  } - -  std::cerr << "\n Training with " << m_word_pyps.size()-1 << " backoff level" -    << (m_word_pyps.size()==2 ? ":" : "s:") << std::endl; - - -  for (int i=0; i<(int)m_word_pyps.size(); ++i) -  { -    m_word_pyps.at(i).reserve(m_num_topics); -    for (int j=0; j<m_num_topics; ++j) -      m_word_pyps.at(i).push_back(new PYP<int>(0.01, 1.0, m_seed)); -  } -  std::cerr << std::endl; - -  m_document_pyps.reserve(corpus.num_documents()); -  for (int j=0; j<corpus.num_documents(); ++j) -    m_document_pyps.push_back(new PYP<int>(0.01, 1.0, m_seed)); - -  m_topic_p0 = 1.0/m_num_topics; -  m_term_p0 = 1.0/(F)m_backoff->terms_at_level(m_word_pyps.size()-1); -  //m_term_p0 = 1.0/corpus.num_types(); -  m_backoff_p0 = 1.0/corpus.num_documents(); - -  std::cerr << " Documents: " << corpus.num_documents() << " Terms: " -    << corpus.num_types() << std::endl; - -  int frequency_cutoff = freq_cutoff_start; -  std::cerr << " Context frequency cutoff set to " << frequency_cutoff << std::endl; - -  timer.Reset(); -  // Initialisation pass -  int document_id=0, topic_counter=0; -  for (Corpus::const_iterator corpusIt=corpus.begin(); -       corpusIt != corpus.end(); ++corpusIt, ++document_id) { -    m_corpus_topics.push_back(DocumentTopics(corpusIt->size(), 0)); - -    int term_index=0; -    for (Document::const_iterator docIt=corpusIt->begin(); -         docIt != corpusIt->end(); ++docIt, ++term_index) { -      topic_counter++; -      Term term = *docIt; - -      // sample a new_topic -      //int new_topic = (topic_counter % m_num_topics); -      int freq = corpus.context_count(term); -      int new_topic = -1; -      if (freq > frequency_cutoff -          && (!max_contexts_per_document || term_index < max_contexts_per_document)) { -        //new_topic = sample(document_id, term); -        //new_topic = document_id % m_num_topics; -        new_topic = (int) (rnd() * m_num_topics); - -        // add the new topic to the PYPs -        increment(term, new_topic); - -        if (m_use_topic_pyp) { -          F p0 = m_topic_pyp.prob(new_topic, m_topic_p0); -          int table_delta = m_document_pyps[document_id].increment(new_topic, p0); -          if (table_delta) -            m_topic_pyp.increment(new_topic, m_topic_p0); -        } -        else m_document_pyps[document_id].increment(new_topic, m_topic_p0); -      } - -      m_corpus_topics[document_id][term_index] = new_topic; -    } -  } -  std::cerr << "  Initialized in " << timer.Elapsed() << " seconds\n"; - -  int* randomDocIndices = new int[corpus.num_documents()]; -  for (int i = 0; i < corpus.num_documents(); ++i) -	  randomDocIndices[i] = i; - -  if (num_jobs < max_threads) -    num_jobs = max_threads; -  int job_incr = (int) ( (float)m_document_pyps.size() / float(num_jobs) ); - -  // Sampling phase -  for (int curr_sample=0; curr_sample < samples; ++curr_sample) { -    if (freq_cutoff_interval > 0 && curr_sample != 1 -        && curr_sample % freq_cutoff_interval == 1 -        && frequency_cutoff > freq_cutoff_end) { -      frequency_cutoff--; -      std::cerr << "\n Context frequency cutoff set to " << frequency_cutoff << std::endl; -    } - -    F temp = 1.0 / (temp_start - curr_sample*(temp_start-temp_end)/samples); -    std::cerr << "\n  -- Sample " << curr_sample << " (T=" << temp << ") "; std::cerr.flush(); - -    // Randomize the corpus indexing array -    int tmp; -    int processed_terms=0; -    /* -    for (int i = corpus.num_documents()-1; i > 0; --i) -    { -        //i+1 since j \in [0,i] but rnd() \in [0,1) -    	int j = (int)(rnd() * (i+1)); -      assert(j >= 0 && j <= i); -     	tmp = randomDocIndices[i]; -    	randomDocIndices[i] = randomDocIndices[j]; -    	randomDocIndices[j] = tmp; -    } -    */ - -    // for each document in the corpus -    int document_id; -    for (int i=0; i<corpus.num_documents(); ++i) { -    	document_id = randomDocIndices[i]; - -      // for each term in the document -      int term_index=0; -      Document::const_iterator docEnd = corpus.at(document_id).end(); -      for (Document::const_iterator docIt=corpus.at(document_id).begin(); -           docIt != docEnd; ++docIt, ++term_index) { -        if (max_contexts_per_document && term_index > max_contexts_per_document) -          break; -         -        Term term = *docIt; - -        int freq = corpus.context_count(term); -        if (freq < frequency_cutoff) -          continue; - -        processed_terms++; - -        // remove the prevous topic from the PYPs -        int current_topic = m_corpus_topics[document_id][term_index]; -        // a negative label mean that term hasn't been sampled yet -        if (current_topic >= 0) { -          decrement(term, current_topic); - -          int table_delta = m_document_pyps[document_id].decrement(current_topic); -          if (m_use_topic_pyp && table_delta < 0) -            m_topic_pyp.decrement(current_topic); -        } - -        // sample a new_topic -        int new_topic = sample(document_id, term, temp); -        //std::cerr << "TERM: " << dict->Convert(term) << " (" << term << ") " << " Old Topic: "  -        //  << current_topic << " New Topic: " << new_topic << "\n" << std::endl; - -        // add the new topic to the PYPs -        m_corpus_topics[document_id][term_index] = new_topic; -        increment(term, new_topic); - -        if (m_use_topic_pyp) { -          F p0 = m_topic_pyp.prob(new_topic, m_topic_p0); -          int table_delta = m_document_pyps[document_id].increment(new_topic, p0); -          if (table_delta) -            m_topic_pyp.increment(new_topic, m_topic_p0); -        } -        else m_document_pyps[document_id].increment(new_topic, m_topic_p0); -      } -      if (document_id && document_id % 10000 == 0) { -        std::cerr << "."; std::cerr.flush(); -      } -    } -    std::cerr << " ||| LLH= " << log_likelihood(); - -    if (curr_sample != 0 && curr_sample % 10 == 0) { -    //if (true) { -      std::cerr << " ||| time=" << (timer.Elapsed() / 10.0) << " sec/sample" << std::endl; -      timer.Reset(); -      std::cerr << "     ... Resampling hyperparameters ("; -       -      // resample the hyperparamters -      F log_p=0.0; -      if (max_threads == 1) -      {  -        std::cerr << "1 thread)" << std::endl; std::cerr.flush(); -        log_p += hresample_topics(); -        log_p += hresample_docs(0, m_document_pyps.size()); -      } -      else -      { //parallelize -        std::cerr << max_threads << " threads, " << num_jobs << " jobs)" << std::endl; std::cerr.flush(); -         -        WorkerPool<JobReturnsF, F> pool(max_threads);  -        int i=0, sz = m_document_pyps.size(); -        //documents... -        while (i <= sz - 2*job_incr) -        {     -          JobReturnsF job = boost::bind(&PYPTopics::hresample_docs, this, i, i+job_incr); -          pool.addJob(job); -          i += job_incr; -        } -        //  do all remaining documents -        JobReturnsF job = boost::bind(&PYPTopics::hresample_docs, this, i,sz); -        pool.addJob(job); -         -        //topics... -        JobReturnsF topics_job = boost::bind(&PYPTopics::hresample_topics, this); -        pool.addJob(topics_job); - -        log_p += pool.get_result(); //blocks - -      } - -      if (m_use_topic_pyp) { -        m_topic_pyp.resample_prior(rnd); -        log_p += m_topic_pyp.log_restaurant_prob(); -      } - -      std::cerr.precision(10); -      std::cerr << " ||| LLH=" << log_likelihood() << " ||| resampling time=" << timer.Elapsed() << " sec" << std::endl; -      timer.Reset(); - -      int k=0; -      std::cerr << "Topics distribution: "; -      std::cerr.precision(2); -      for (PYPs::iterator pypIt=m_word_pyps.front().begin(); -           pypIt != m_word_pyps.front().end(); ++pypIt, ++k) { -        if (k % 5 == 0) std::cerr << std::endl << '\t'; -        std::cerr << "<" << k << ":" << pypIt->num_customers() << "," -          << pypIt->num_types() << "," << m_topic_pyp.prob(k, m_topic_p0) << "> "; -      } -      std::cerr.precision(10); -      std::cerr << std::endl; -    } -  } -  delete [] randomDocIndices; -} - -PYPTopics::F PYPTopics::hresample_docs(int start, int end) -{ -  int resample_counter=0; -  F log_p = 0.0; -  assert(start >= 0); -  assert(end >= 0); -  assert(start <= end); -  for (int i=start; i < end; ++i) -  { -    m_document_pyps[i].resample_prior(rnd); -    log_p += m_document_pyps[i].log_restaurant_prob(); -    if (resample_counter++ % 5000 == 0) { -      std::cerr << "."; std::cerr.flush(); -    } -  } -  return log_p; -} - -PYPTopics::F PYPTopics::hresample_topics() -{ -  F log_p = 0.0; -  for (std::vector<PYPs>::iterator levelIt=m_word_pyps.begin(); -      levelIt != m_word_pyps.end(); ++levelIt) { -    for (PYPs::iterator pypIt=levelIt->begin(); -        pypIt != levelIt->end(); ++pypIt) { - -      pypIt->resample_prior(rnd); -      log_p += pypIt->log_restaurant_prob(); -    } -    std::cerr << log_p << std::endl; -  } -  return log_p; -} - -PYPTopics::F PYPTopics::log_likelihood() const  -{ -  F log_p = 0.0; - -  // LLH of topic term distribution -  size_t i=0; -  for (std::vector<PYPs>::const_iterator levelIt=m_word_pyps.begin(); -      levelIt != m_word_pyps.end(); ++levelIt, ++i) { -    for (PYPs::const_iterator pypIt=levelIt->begin(); -        pypIt != levelIt->end(); ++pypIt, ++i) { -      log_p += pypIt->log_restaurant_prob(); - -      if (i == m_word_pyps.size()-1) -        log_p += (pypIt->num_tables() * -log(m_backoff->terms_at_level(i))); -      else -        log_p += (pypIt->num_tables() * log(m_term_p0)); -    } -  } -  std::cerr << " TERM LLH: " << log_p << " "; //std::endl; - -  // LLH of document topic distribution -  for (size_t i=0; i < m_document_pyps.size(); ++i) { -    log_p += m_document_pyps[i].log_restaurant_prob(); -    if (!m_use_topic_pyp) log_p += (m_document_pyps[i].num_tables() * m_topic_p0); -  } -  if (m_use_topic_pyp) { -    log_p += m_topic_pyp.log_restaurant_prob(); -    log_p += (m_topic_pyp.num_tables() * log(m_topic_p0)); -  } - -  return log_p; -} - -void PYPTopics::decrement(const Term& term, int topic, int level) { -  //std::cerr << "PYPTopics::decrement(" << term << "," << topic << "," << level << ")" << std::endl; -  int table_delta = m_word_pyps.at(level).at(topic).decrement(term); -  if (table_delta && m_backoff.get()) { -    Term backoff_term = (*m_backoff)[term]; -    if (!m_backoff->is_null(backoff_term)) -      decrement(backoff_term, topic, level+1); -  } -} - -void PYPTopics::increment(const Term& term, int topic, int level) { -  //std::cerr << "PYPTopics::increment(" << term << "," << topic << "," << level << ")" << std::endl; -  int table_delta = m_word_pyps.at(level).at(topic).increment(term, word_pyps_p0(term, topic, level)); - -  if (table_delta && m_backoff.get()) { -    Term backoff_term = (*m_backoff)[term]; -    if (!m_backoff->is_null(backoff_term)) -      increment(backoff_term, topic, level+1); -  } -} - -int PYPTopics::sample(const DocumentId& doc, const Term& term, F inv_temp) { -  // First pass: collect probs -  F sum=0.0; -  std::vector<F> sums; -  for (int k=0; k<m_num_topics; ++k) { -    F p_w_k = prob(term, k); - -    F topic_prob = m_topic_p0; -    if (m_use_topic_pyp) topic_prob = m_topic_pyp.prob(k, m_topic_p0); - -    //F p_k_d = m_document_pyps[doc].prob(k, topic_prob); -    F p_k_d = m_document_pyps[doc].unnormalised_prob(k, topic_prob); - -    F prob = p_w_k*p_k_d; -    /* -    if (prob < 0.0) { std::cerr << "\n\n" << prob << " " << p_w_k << " " << p_k_d << std::endl; assert(false); } -    if (prob > 1.0) { std::cerr << "\n\n" << prob << " " << p_w_k << " " << p_k_d << std::endl; assert(false); } -    assert (pow(prob, inv_temp) >= 0.0); -    assert (pow(prob, inv_temp) <= 1.0); -    */ -    sum += pow(prob, inv_temp); -    sums.push_back(sum); -  } -  // Second pass: sample a topic -  F cutoff = rnd() * sum; -  for (int k=0; k<m_num_topics; ++k) { -    if (cutoff <= sums[k]) -      return k; -  } -  assert(false); -} - -PYPTopics::F PYPTopics::word_pyps_p0(const Term& term, int topic, int level) const { -  //for (int i=0; i<level+1; ++i) std::cerr << "  "; -  //std::cerr << "PYPTopics::word_pyps_p0(" << term << "," << topic << "," << level << ")" << std::endl; - -  F p0 = m_term_p0; -  if (m_backoff.get()) { -    //static F fudge=m_backoff_p0; // TODO - -    Term backoff_term = (*m_backoff)[term]; -    //std::cerr << "T: " << term << " BO: " << backoff_term << std::endl; -    if (!m_backoff->is_null(backoff_term)) { -      assert (level < m_backoff->order()); -      //p0 = (1.0/(F)m_backoff->terms_at_level(level))*prob(backoff_term, topic, level+1); -      p0 = m_term_p0*prob(backoff_term, topic, level+1); -      p0 = prob(backoff_term, topic, level+1); -    } -    else -      p0 = (1.0/(F) m_backoff->terms_at_level(level)); -      //p0 = m_term_p0; -  } -  //for (int i=0; i<level+1; ++i) std::cerr << "  "; -  //std::cerr << "PYPTopics::word_pyps_p0(" << term << "," << topic << "," << level << ") = " << p0 << std::endl; -  return p0; -} - -PYPTopics::F PYPTopics::prob(const Term& term, int topic, int level) const { -  //for (int i=0; i<level+1; ++i) std::cerr << "  "; -  //std::cerr << "PYPTopics::prob(" << dict->Convert(term) << "," << topic << "," << level << ")" << std::endl; - -  F p0 = word_pyps_p0(term, topic, level); -  F p_w_k = m_word_pyps.at(level).at(topic).prob(term, p0); - -  /* -  for (int i=0; i<level+1; ++i) std::cerr << "  "; -  std::cerr << "PYPTopics::prob(" << dict->Convert(term) << "," << topic << "," << level << ") = " << p_w_k << std::endl; -  for (int i=0; i<level+1; ++i) std::cerr << "  "; -  m_word_pyps.at(level).at(topic).debug_info(std::cerr); -  */ -  return p_w_k; -} - -int PYPTopics::max_topic() const { -  if (!m_use_topic_pyp) -    return -1; - -  F current_max=0.0; -  int current_topic=-1; -  for (int k=0; k<m_num_topics; ++k) { -    F prob = m_topic_pyp.prob(k, m_topic_p0); -    if (prob > current_max) { -      current_max = prob; -      current_topic = k; -    } -  } -  assert(current_topic >= 0); -  return current_topic; -} - -std::pair<int,PYPTopics::F> PYPTopics::max(const DocumentId& doc) const { -  //std::cerr << "PYPTopics::max(" << doc << "," << term << ")" << std::endl; -  // collect probs -  F current_max=0.0; -  int current_topic=-1; -  for (int k=0; k<m_num_topics; ++k) { -    //F p_w_k = prob(term, k); - -    F topic_prob = m_topic_p0; -    if (m_use_topic_pyp) -      topic_prob = m_topic_pyp.prob(k, m_topic_p0); - -    F prob = 0; -    if (doc < 0) prob = topic_prob; -    else         prob = m_document_pyps[doc].prob(k, topic_prob); - -    if (prob > current_max) { -      current_max = prob; -      current_topic = k; -    } -  } -  assert(current_topic >= 0); -  assert(current_max >= 0); -  return std::make_pair(current_topic, current_max); -} - -std::pair<int,PYPTopics::F> PYPTopics::max(const DocumentId& doc, const Term& term) const { -  //std::cerr << "PYPTopics::max(" << doc << "," << term << ")" << std::endl; -  // collect probs -  F current_max=0.0; -  int current_topic=-1; -  for (int k=0; k<m_num_topics; ++k) { -    F p_w_k = prob(term, k); - -    F topic_prob = m_topic_p0; -    if (m_use_topic_pyp) -      topic_prob = m_topic_pyp.prob(k, m_topic_p0); - -    F p_k_d = 0; -    if (doc < 0) p_k_d = topic_prob; -    else         p_k_d = m_document_pyps[doc].prob(k, topic_prob); - -    F prob = (p_w_k*p_k_d); -    if (prob > current_max) { -      current_max = prob; -      current_topic = k; -    } -  } -  assert(current_topic >= 0); -  assert(current_max >= 0); -  return std::make_pair(current_topic,current_max); -} - -std::ostream& PYPTopics::print_document_topics(std::ostream& out) const { -  for (CorpusTopics::const_iterator corpusIt=m_corpus_topics.begin(); -       corpusIt != m_corpus_topics.end(); ++corpusIt) { -    int term_index=0; -    for (DocumentTopics::const_iterator docIt=corpusIt->begin(); -         docIt != corpusIt->end(); ++docIt, ++term_index) { -      if (term_index) out << " "; -      out << *docIt; -    } -    out << std::endl; -  } -  return out; -} - -std::ostream& PYPTopics::print_topic_terms(std::ostream& out) const { -  for (PYPs::const_iterator pypsIt=m_word_pyps.front().begin(); -       pypsIt != m_word_pyps.front().end(); ++pypsIt) { -    int term_index=0; -    for (PYP<int>::const_iterator termIt=pypsIt->begin(); -         termIt != pypsIt->end(); ++termIt, ++term_index) { -      if (term_index) out << " "; -      out << termIt->first << ":" << termIt->second; -    } -    out << std::endl; -  } -  return out; -} diff --git a/gi/pyp-topics/src/pyp-topics.hh b/gi/pyp-topics/src/pyp-topics.hh deleted file mode 100644 index 3a910540..00000000 --- a/gi/pyp-topics/src/pyp-topics.hh +++ /dev/null @@ -1,98 +0,0 @@ -#ifndef PYP_TOPICS_HH -#define PYP_TOPICS_HH - -#include <vector> -#include <iostream> -#include <boost/ptr_container/ptr_vector.hpp> - -#include <boost/random/uniform_real.hpp> -#include <boost/random/variate_generator.hpp> -#include <boost/random/mersenne_twister.hpp> - -#include "pyp.hh" -#include "corpus.hh" -#include "workers.hh" - -class PYPTopics { -public: -  typedef std::vector<int> DocumentTopics; -  typedef std::vector<DocumentTopics> CorpusTopics; -  typedef long double F; - -public: -  PYPTopics(int num_topics, bool use_topic_pyp=false, unsigned long seed = 0, -        int max_threads = 1, int num_jobs = 1)  -    : m_num_topics(num_topics), m_word_pyps(1),  -    m_topic_pyp(0.5,1.0,seed), m_use_topic_pyp(use_topic_pyp), -    m_seed(seed), -    uni_dist(0,1), rng(seed == 0 ? (unsigned long)this : seed),  -    rnd(rng, uni_dist), max_threads(max_threads), num_jobs(num_jobs) {} - -  void sample_corpus(const Corpus& corpus, int samples, -                     int freq_cutoff_start=0, int freq_cutoff_end=0,  -                     int freq_cutoff_interval=0, -                     int max_contexts_per_document=0, -                     F temp_start=1.0, F temp_end=1.0); - -  int sample(const DocumentId& doc, const Term& term, F inv_temp=1.0); -  std::pair<int,F> max(const DocumentId& doc, const Term& term) const; -  std::pair<int,F> max(const DocumentId& doc) const; -  int max_topic() const; - -  void set_backoff(const std::string& filename) { -    m_backoff.reset(new TermBackoff); -    m_backoff->read(filename); -    m_word_pyps.clear(); -    m_word_pyps.resize(m_backoff->order(), PYPs()); -  } -  void set_backoff(TermBackoffPtr backoff) { -    m_backoff = backoff; -    m_word_pyps.clear(); -    m_word_pyps.resize(m_backoff->order(), PYPs()); -  } - -  F prob(const Term& term, int topic, int level=0) const; -  void decrement(const Term& term, int topic, int level=0); -  void increment(const Term& term, int topic, int level=0); - -  F log_likelihood() const; - -  std::ostream& print_document_topics(std::ostream& out) const; -  std::ostream& print_topic_terms(std::ostream& out) const; - -private: -  F word_pyps_p0(const Term& term, int topic, int level) const; - -  int m_num_topics; -  F m_term_p0, m_topic_p0, m_backoff_p0; - -  CorpusTopics m_corpus_topics; -  typedef boost::ptr_vector< PYP<int> > PYPs; -  PYPs m_document_pyps; -  std::vector<PYPs> m_word_pyps; -  PYP<int> m_topic_pyp; -  bool m_use_topic_pyp; - -  unsigned long m_seed; - -  typedef boost::mt19937 base_generator_type; -  typedef boost::uniform_real<> uni_dist_type; -  typedef boost::variate_generator<base_generator_type&, uni_dist_type> gen_type; - -  uni_dist_type uni_dist; -  base_generator_type rng; //this gets the seed -  gen_type rnd; //instantiate: rnd(rng, uni_dist) -                //call: rnd() generates uniform on [0,1) - -  typedef boost::function<F()> JobReturnsF; - -  F hresample_docs(int start, int end); //does i in [start, end) - -  F hresample_topics(); -   -  int max_threads; -  int num_jobs; -  TermBackoffPtr m_backoff; -}; - -#endif // PYP_TOPICS_HH diff --git a/gi/pyp-topics/src/pyp.hh b/gi/pyp-topics/src/pyp.hh deleted file mode 100644 index b1cb62be..00000000 --- a/gi/pyp-topics/src/pyp.hh +++ /dev/null @@ -1,566 +0,0 @@ -#ifndef _pyp_hh -#define _pyp_hh - -#include "slice-sampler.h" -#include <math.h> -#include <map> -#include <tr1/unordered_map> -//#include <google/sparse_hash_map> - -#include <boost/random/uniform_real.hpp> -#include <boost/random/variate_generator.hpp> -#include <boost/random/mersenne_twister.hpp> - -#include "log_add.h" -#include "mt19937ar.h" - -// -// Pitman-Yor process with customer and table tracking -// - -template <typename Dish, typename Hash=std::tr1::hash<Dish> > -class PYP : protected std::tr1::unordered_map<Dish, int, Hash> -//class PYP : protected google::sparse_hash_map<Dish, int, Hash> -{ -public: -  using std::tr1::unordered_map<Dish,int>::const_iterator; -  using std::tr1::unordered_map<Dish,int>::iterator; -  using std::tr1::unordered_map<Dish,int>::begin; -  using std::tr1::unordered_map<Dish,int>::end; -//  using google::sparse_hash_map<Dish,int>::const_iterator; -//  using google::sparse_hash_map<Dish,int>::iterator; -//  using google::sparse_hash_map<Dish,int>::begin; -//  using google::sparse_hash_map<Dish,int>::end; - -  PYP(double a, double b, unsigned long seed = 0, Hash hash=Hash()); - -  virtual int increment(Dish d, double p0); -  virtual int decrement(Dish d); - -  // lookup functions -  int count(Dish d) const; -  double prob(Dish dish, double p0) const; -  double prob(Dish dish, double dcd, double dca,  -              double dtd, double dta, double p0) const; -  double unnormalised_prob(Dish dish, double p0) const; - -  int num_customers() const { return _total_customers; } -  int num_types() const { return std::tr1::unordered_map<Dish,int>::size(); } -  //int num_types() const { return google::sparse_hash_map<Dish,int>::size(); } -  bool empty() const { return _total_customers == 0; } - -  double log_prob(Dish dish, double log_p0) const; -  // nb. d* are NOT logs -  double log_prob(Dish dish, double dcd, double dca,  -                       double dtd, double dta, double log_p0) const; - -  int num_tables(Dish dish) const; -  int num_tables() const; - -  double a() const { return _a; } -  void set_a(double a) { _a = a; } - -  double b() const { return _b; } -  void set_b(double b) { _b = b; } - -  virtual void clear(); -  std::ostream& debug_info(std::ostream& os) const; - -  double log_restaurant_prob() const; -  double log_prior() const; -  static double log_prior_a(double a, double beta_a, double beta_b); -  static double log_prior_b(double b, double gamma_c, double gamma_s); - -  template <typename Uniform01> -    void resample_prior(Uniform01& rnd); -  template <typename Uniform01> -    void resample_prior_a(Uniform01& rnd); -  template <typename Uniform01> -    void resample_prior_b(Uniform01& rnd); - -protected: -  double _a, _b; // parameters of the Pitman-Yor distribution -  double _a_beta_a, _a_beta_b; // parameters of Beta prior on a -  double _b_gamma_s, _b_gamma_c; // parameters of Gamma prior on b - -  struct TableCounter { -    TableCounter() : tables(0) {}; -    int tables; -    std::map<int, int> table_histogram; // num customers at table -> number tables -  }; -  typedef std::tr1::unordered_map<Dish, TableCounter, Hash> DishTableType; -  //typedef google::sparse_hash_map<Dish, TableCounter, Hash> DishTableType; -  DishTableType _dish_tables; -  int _total_customers, _total_tables; - -  typedef boost::mt19937 base_generator_type; -  typedef boost::uniform_real<> uni_dist_type; -  typedef boost::variate_generator<base_generator_type&, uni_dist_type> gen_type; - -//  uni_dist_type uni_dist; -//  base_generator_type rng; //this gets the seed -//  gen_type rnd; //instantiate: rnd(rng, uni_dist) -                //call: rnd() generates uniform on [0,1) - -  // Function objects for calculating the parts of the log_prob for  -  // the parameters a and b -  struct resample_a_type { -    int n, m; double b, a_beta_a, a_beta_b; -    const DishTableType& dish_tables; -    resample_a_type(int n, int m, double b, double a_beta_a,  -                    double a_beta_b, const DishTableType& dish_tables) -      : n(n), m(m), b(b), a_beta_a(a_beta_a), a_beta_b(a_beta_b), dish_tables(dish_tables) {} - -    double operator() (double proposed_a) const { -      double log_prior = log_prior_a(proposed_a, a_beta_a, a_beta_b); -      double log_prob = 0.0; -      double lgamma1a = lgamma(1.0 - proposed_a); -      for (typename DishTableType::const_iterator dish_it=dish_tables.begin(); dish_it != dish_tables.end(); ++dish_it)  -        for (std::map<int, int>::const_iterator table_it=dish_it->second.table_histogram.begin();  -             table_it !=dish_it->second.table_histogram.end(); ++table_it)  -          log_prob += (table_it->second * (lgamma(table_it->first - proposed_a) - lgamma1a)); - -      log_prob += (proposed_a == 0.0 ? (m-1.0)*log(b)  -                   : ((m-1.0)*log(proposed_a) + lgamma((m-1.0) + b/proposed_a) - lgamma(b/proposed_a))); -      assert(std::isfinite(log_prob)); -      return log_prob + log_prior; -    } -  }; - -  struct resample_b_type { -    int n, m; double a, b_gamma_c, b_gamma_s; -    resample_b_type(int n, int m, double a, double b_gamma_c, double b_gamma_s) -      : n(n), m(m), a(a), b_gamma_c(b_gamma_c), b_gamma_s(b_gamma_s) {} - -    double operator() (double proposed_b) const { -      double log_prior = log_prior_b(proposed_b, b_gamma_c, b_gamma_s); -      double log_prob = 0.0; -      log_prob += (a == 0.0  ? (m-1.0)*log(proposed_b)  -                  : ((m-1.0)*log(a) + lgamma((m-1.0) + proposed_b/a) - lgamma(proposed_b/a))); -      log_prob += (lgamma(1.0+proposed_b) - lgamma(n+proposed_b)); -      return log_prob + log_prior; -    } -  }; -    -  /* lbetadist() returns the log probability density of x under a Beta(alpha,beta) -   * distribution. - copied from Mark Johnson's gammadist.c -   */ -  static long double lbetadist(long double x, long double alpha, long double beta); - -  /* lgammadist() returns the log probability density of x under a Gamma(alpha,beta) -   * distribution - copied from Mark Johnson's gammadist.c -   */ -  static long double lgammadist(long double x, long double alpha, long double beta); - -}; - -template <typename Dish, typename Hash> -PYP<Dish,Hash>::PYP(double a, double b, unsigned long seed, Hash) -: std::tr1::unordered_map<Dish, int, Hash>(10), _a(a), _b(b),  -//: google::sparse_hash_map<Dish, int, Hash>(10), _a(a), _b(b),  -  _a_beta_a(1), _a_beta_b(1), _b_gamma_s(1), _b_gamma_c(1), -  //_a_beta_a(1), _a_beta_b(1), _b_gamma_s(10), _b_gamma_c(0.1), -  _total_customers(0), _total_tables(0)//, -  //uni_dist(0,1), rng(seed == 0 ? (unsigned long)this : seed), rnd(rng, uni_dist) -{ -//  std::cerr << "\t##PYP<Dish,Hash>::PYP(a=" << _a << ",b=" << _b << ")" << std::endl; -  //set_deleted_key(-std::numeric_limits<Dish>::max()); -} - -template <typename Dish, typename Hash> -double  -PYP<Dish,Hash>::prob(Dish dish, double p0) const -{ -  int c = count(dish), t = num_tables(dish); -  double r = num_tables() * _a + _b; -  //std::cerr << "\t\t\t\tPYP<Dish,Hash>::prob(" << dish << "," << p0 << ") c=" << c << " r=" << r << std::endl; -  if (c > 0) -    return (c - _a * t + r * p0) / (num_customers() + _b); -  else -    return r * p0 / (num_customers() + _b); -} - -template <typename Dish, typename Hash> -double  -PYP<Dish,Hash>::unnormalised_prob(Dish dish, double p0) const -{ -  int c = count(dish), t = num_tables(dish); -  double r = num_tables() * _a + _b; -  if (c > 0) return (c - _a * t + r * p0); -  else       return r * p0; -} - -template <typename Dish, typename Hash> -double  -PYP<Dish,Hash>::prob(Dish dish, double dcd, double dca,  -                     double dtd, double dta, double p0) -const -{ -  int c = count(dish) + dcd, t = num_tables(dish) + dtd; -  double r = (num_tables() + dta) * _a + _b; -  if (c > 0) -    return (c - _a * t + r * p0) / (num_customers() + dca + _b); -  else -    return r * p0 / (num_customers() + dca + _b); -} - -template <typename Dish, typename Hash> -double  -PYP<Dish,Hash>::log_prob(Dish dish, double log_p0) const -{ -  using std::log; -  int c = count(dish), t = num_tables(dish); -  double r = log(num_tables() * _a + b); -  if (c > 0) -    return Log<double>::add(log(c - _a * t), r + log_p0) -      - log(num_customers() + _b); -  else -    return r + log_p0 - log(num_customers() + b); -} - -template <typename Dish, typename Hash> -double  -PYP<Dish,Hash>::log_prob(Dish dish, double dcd, double dca,  -                         double dtd, double dta, double log_p0) -const -{ -  using std::log; -  int c = count(dish) + dcd, t = num_tables(dish) + dtd; -  double r = log((num_tables() + dta) * _a + b); -  if (c > 0) -    return Log<double>::add(log(c - _a * t), r + log_p0) -      - log(num_customers() + dca + _b); -  else -    return r + log_p0 - log(num_customers() + dca + b); -} - -template <typename Dish, typename Hash> -int  -PYP<Dish,Hash>::increment(Dish dish, double p0) { -  int delta = 0; -  TableCounter &tc = _dish_tables[dish]; - -  // seated on a new or existing table? -  int c = count(dish), t = num_tables(dish), T = num_tables(); -  double pshare = (c > 0) ? (c - _a*t) : 0.0; -  double pnew = (_b + _a*T) * p0; -  assert (pshare >= 0.0); -  //assert (pnew > 0.0); - -  //if (rnd() < pnew / (pshare + pnew)) { -  if (mt_genrand_res53() < pnew / (pshare + pnew)) { -    // assign to a new table -    tc.tables += 1; -    tc.table_histogram[1] += 1; -    _total_tables += 1; -    delta = 1; -  } -  else { -    // randomly assign to an existing table -    // remove constant denominator from inner loop -    //double r = rnd() * (c - _a*t); -    double r = mt_genrand_res53() * (c - _a*t); -    for (std::map<int,int>::iterator -         hit = tc.table_histogram.begin(); -         hit != tc.table_histogram.end(); ++hit) { -      r -= ((hit->first - _a) * hit->second); -      if (r <= 0) { -        tc.table_histogram[hit->first+1] += 1; -        hit->second -= 1; -        if (hit->second == 0) -          tc.table_histogram.erase(hit); -        break; -      } -    } -    if (r > 0) { -      std::cerr << r << " " << c << " " << _a << " " << t << std::endl; -      assert(false); -    } -    delta = 0; -  } - -  std::tr1::unordered_map<Dish,int,Hash>::operator[](dish) += 1; -  //google::sparse_hash_map<Dish,int,Hash>::operator[](dish) += 1; -  _total_customers += 1; - -  return delta; -} - -template <typename Dish, typename Hash> -int  -PYP<Dish,Hash>::count(Dish dish) const -{ -  typename std::tr1::unordered_map<Dish, int>::const_iterator  -  //typename google::sparse_hash_map<Dish, int>::const_iterator  -    dcit = find(dish); -  if (dcit != end()) -    return dcit->second; -  else -    return 0; -} - -template <typename Dish, typename Hash> -int  -PYP<Dish,Hash>::decrement(Dish dish) -{ -  typename std::tr1::unordered_map<Dish, int>::iterator dcit = find(dish); -  //typename google::sparse_hash_map<Dish, int>::iterator dcit = find(dish); -  if (dcit == end()) { -    std::cerr << dish << std::endl; -    assert(false); -  }  - -  int delta = 0; - -  typename std::tr1::unordered_map<Dish, TableCounter>::iterator dtit = _dish_tables.find(dish); -  //typename google::sparse_hash_map<Dish, TableCounter>::iterator dtit = _dish_tables.find(dish); -  if (dtit == _dish_tables.end()) { -    std::cerr << dish << std::endl; -    assert(false); -  }  -  TableCounter &tc = dtit->second; - -  //std::cerr << "\tdecrement for " << dish << "\n"; -  //std::cerr << "\tBEFORE histogram: " << tc.table_histogram << " "; -  //std::cerr << "count: " << count(dish) << " "; -  //std::cerr << "tables: " << tc.tables << "\n"; - -  //double r = rnd() * count(dish); -  double r = mt_genrand_res53() * count(dish); -  for (std::map<int,int>::iterator hit = tc.table_histogram.begin(); -       hit != tc.table_histogram.end(); ++hit) -  { -    //r -= (hit->first - _a) * hit->second; -    r -= (hit->first) * hit->second; -    if (r <= 0) -    { -      if (hit->first > 1) -        tc.table_histogram[hit->first-1] += 1; -      else -      { -        delta = -1; -        tc.tables -= 1; -        _total_tables -= 1; -      } - -      hit->second -= 1; -      if (hit->second == 0) tc.table_histogram.erase(hit); -      break; -    } -  } -  if (r > 0) { -    std::cerr << r << " " << count(dish) << " " << _a << " " << num_tables(dish) << std::endl; -    assert(false); -  } - -  // remove the customer -  dcit->second -= 1; -  _total_customers -= 1; -  assert(dcit->second >= 0); -  if (dcit->second == 0) { -    erase(dcit); -    _dish_tables.erase(dtit); -    //std::cerr << "\tAFTER histogram: Empty\n"; -  } -  else { -    //std::cerr << "\tAFTER histogram: " << _dish_tables[dish].table_histogram << " "; -    //std::cerr << "count: " << count(dish) << " "; -    //std::cerr << "tables: " << _dish_tables[dish].tables << "\n"; -  } - -  return delta; -} - -template <typename Dish, typename Hash> -int  -PYP<Dish,Hash>::num_tables(Dish dish) const -{ -  typename std::tr1::unordered_map<Dish, TableCounter, Hash>::const_iterator  -  //typename google::sparse_hash_map<Dish, TableCounter, Hash>::const_iterator  -    dtit = _dish_tables.find(dish); - -  //assert(dtit != _dish_tables.end()); -  if (dtit == _dish_tables.end()) -    return 0; - -  return dtit->second.tables; -} - -template <typename Dish, typename Hash> -int  -PYP<Dish,Hash>::num_tables() const -{ -  return _total_tables; -} - -template <typename Dish, typename Hash> -std::ostream& -PYP<Dish,Hash>::debug_info(std::ostream& os) const -{ -  int hists = 0, tables = 0; -  for (typename std::tr1::unordered_map<Dish, TableCounter, Hash>::const_iterator  -  //for (typename google::sparse_hash_map<Dish, TableCounter, Hash>::const_iterator  -       dtit = _dish_tables.begin(); dtit != _dish_tables.end(); ++dtit) -  { -    hists += dtit->second.table_histogram.size(); -    tables += dtit->second.tables; - -//    if (dtit->second.tables <= 0) -//      std::cerr << dtit->first << " " << count(dtit->first) << std::endl; -    assert(dtit->second.tables > 0); -    assert(!dtit->second.table_histogram.empty()); - -//    os << "Dish " << dtit->first << " has " << count(dtit->first) << " customers, and is sitting at " << dtit->second.tables << " tables.\n";  -    for (std::map<int,int>::const_iterator  -         hit = dtit->second.table_histogram.begin(); -         hit != dtit->second.table_histogram.end(); ++hit) { -//      os << "    " << hit->second << " tables with " << hit->first << " customers." << std::endl;  -      assert(hit->second > 0); -    } -  } - -  os << "restaurant has "  -    << _total_customers << " customers; " -    << _total_tables << " tables; "  -    << tables << " tables'; "  -    << num_types() << " dishes; " -    << _dish_tables.size() << " dishes'; and " -    << hists << " histogram entries\n"; - -  return os; -} - -template <typename Dish, typename Hash> -void  -PYP<Dish,Hash>::clear() -{ -  this->std::tr1::unordered_map<Dish,int,Hash>::clear(); -  //this->google::sparse_hash_map<Dish,int,Hash>::clear(); -  _dish_tables.clear(); -  _total_tables = _total_customers = 0; -} - -// log_restaurant_prob returns the log probability of the PYP table configuration. -// Excludes Hierarchical P0 term which must be calculated separately. -template <typename Dish, typename Hash> -double  -PYP<Dish,Hash>::log_restaurant_prob() const { -  if (_total_customers < 1) -    return (double)0.0; - -  double log_prob = 0.0; -  double lgamma1a = lgamma(1.0-_a); - -  //std::cerr << "-------------------\n" << std::endl; -  for (typename DishTableType::const_iterator dish_it=_dish_tables.begin();  -       dish_it != _dish_tables.end(); ++dish_it) { -    for (std::map<int, int>::const_iterator table_it=dish_it->second.table_histogram.begin();  -         table_it !=dish_it->second.table_histogram.end(); ++table_it) { -      log_prob += (table_it->second * (lgamma(table_it->first - _a) - lgamma1a)); -      //std::cerr << "|" << dish_it->first->parent << " --> " << dish_it->first->rhs << " " << table_it->first << " " << table_it->second << " " << log_prob; -    } -  } -  //std::cerr << std::endl; - -  log_prob += (_a == (double)0.0 ? (_total_tables-1.0)*log(_b) : (_total_tables-1.0)*log(_a) + lgamma((_total_tables-1.0) + _b/_a) - lgamma(_b/_a)); -  //std::cerr << "\t\t" << log_prob << std::endl; -  log_prob += (lgamma(1.0 + _b) - lgamma(_total_customers + _b)); - -  //std::cerr << _total_customers << " " << _total_tables << " " << log_prob << " " << log_prior() << std::endl; -  //std::cerr << _a << " " << _b << std::endl; -  if (!std::isfinite(log_prob)) { -    assert(false); -  } -  //return log_prob; -  if (log_prob > 0.0) -    std::cerr << log_prob << std::endl; -  return log_prob;// + log_prior(); -} - -template <typename Dish, typename Hash> -double  -PYP<Dish,Hash>::log_prior() const { -  double prior = 0.0; -  if (_a_beta_a > 0.0 && _a_beta_b > 0.0 && _a > 0.0) -    prior += log_prior_a(_a, _a_beta_a, _a_beta_b); -  if (_b_gamma_s > 0.0 && _b_gamma_c > 0.0) -    prior += log_prior_b(_b, _b_gamma_c, _b_gamma_s); - -  return prior; -} - -template <typename Dish, typename Hash> -double  -PYP<Dish,Hash>::log_prior_a(double a, double beta_a, double beta_b) { -  return lbetadist(a, beta_a, beta_b);  -} - -template <typename Dish, typename Hash> -double  -PYP<Dish,Hash>::log_prior_b(double b, double gamma_c, double gamma_s) { -  return lgammadist(b, gamma_c, gamma_s);  -} - -template <typename Dish, typename Hash> -long double PYP<Dish,Hash>::lbetadist(long double x, long double alpha, long double beta) { -  assert(x > 0); -  assert(x < 1); -  assert(alpha > 0); -  assert(beta > 0); -  return (alpha-1)*log(x)+(beta-1)*log(1-x)+lgamma(alpha+beta)-lgamma(alpha)-lgamma(beta); -//boost::math::lgamma -} - -template <typename Dish, typename Hash> -long double PYP<Dish,Hash>::lgammadist(long double x, long double alpha, long double beta) { -  assert(alpha > 0); -  assert(beta > 0); -  return (alpha-1)*log(x) - alpha*log(beta) - x/beta - lgamma(alpha); -} - - -template <typename Dish, typename Hash> -  template <typename Uniform01> -void  -PYP<Dish,Hash>::resample_prior(Uniform01& rnd) { -  for (int num_its=5; num_its >= 0; --num_its) { -    resample_prior_b(rnd); -    resample_prior_a(rnd); -  } -  resample_prior_b(rnd); -} - -template <typename Dish, typename Hash> -  template <typename Uniform01> -void  -PYP<Dish,Hash>::resample_prior_b(Uniform01& rnd) { -  if (_total_tables == 0)  -    return; - -  //int niterations = 10;   // number of resampling iterations -  int niterations = 5;   // number of resampling iterations -  //std::cerr << "\n## resample_prior_b(), initial a = " << _a << ", b = " << _b << std::endl; -  resample_b_type b_log_prob(_total_customers, _total_tables, _a, _b_gamma_c, _b_gamma_s); -  _b = slice_sampler1d(b_log_prob, _b, rnd, (double) 0.0, std::numeric_limits<double>::infinity(),  -  //_b = slice_sampler1d(b_log_prob, _b, mt_genrand_res53, (double) 0.0, std::numeric_limits<double>::infinity(),  -                       (double) 0.0, niterations, 100*niterations); -  //std::cerr << "\n## resample_prior_b(), final a = " << _a << ", b = " << _b << std::endl; -} - -template <typename Dish, typename Hash> -  template <typename Uniform01> -void  -PYP<Dish,Hash>::resample_prior_a(Uniform01& rnd) { -  if (_total_tables == 0)  -    return; - -  //int niterations = 10; -  int niterations = 5; -  //std::cerr << "\n## Initial a = " << _a << ", b = " << _b << std::endl; -  resample_a_type a_log_prob(_total_customers, _total_tables, _b, _a_beta_a, _a_beta_b, _dish_tables); -  _a = slice_sampler1d(a_log_prob, _a, rnd, std::numeric_limits<double>::min(),  -  //_a = slice_sampler1d(a_log_prob, _a, mt_genrand_res53, std::numeric_limits<double>::min(),  -                       (double) 1.0, (double) 0.0, niterations, 100*niterations); -} - -#endif diff --git a/gi/pyp-topics/src/slice-sampler.h b/gi/pyp-topics/src/slice-sampler.h deleted file mode 100644 index 3108a0f7..00000000 --- a/gi/pyp-topics/src/slice-sampler.h +++ /dev/null @@ -1,192 +0,0 @@ -//! slice-sampler.h is an MCMC slice sampler -//! -//! Mark Johnson, 1st August 2008 - -#ifndef SLICE_SAMPLER_H -#define SLICE_SAMPLER_H - -#include <algorithm> -#include <cassert> -#include <cmath> -#include <iostream> -#include <limits> - -//! slice_sampler_rfc_type{} returns the value of a user-specified -//! function if the argument is within range, or - infinity otherwise -// -template <typename F, typename Fn, typename U> -struct slice_sampler_rfc_type { -  F min_x, max_x; -  const Fn& f; -  U max_nfeval, nfeval; -  slice_sampler_rfc_type(F min_x, F max_x, const Fn& f, U max_nfeval)  -    : min_x(min_x), max_x(max_x), f(f), max_nfeval(max_nfeval), nfeval(0) { } -     -  F operator() (F x) { -    if (min_x < x && x < max_x) { -      assert(++nfeval <= max_nfeval); -      F fx = f(x); -      assert(std::isfinite(fx)); -      return fx; -    } -    else -      return -std::numeric_limits<F>::infinity(); -  } -};  // slice_sampler_rfc_type{} - -//! slice_sampler1d() implements the univariate "range doubling" slice sampler -//! described in Neal (2003) "Slice Sampling", The Annals of Statistics 31(3), 705-767. -// -template <typename F, typename LogF, typename Uniform01> -F slice_sampler1d(const LogF& logF0,               //!< log of function to sample -		  F x,                             //!< starting point -		  Uniform01& u01,                  //!< uniform [0,1) random number generator -		  F min_x = -std::numeric_limits<F>::infinity(),  //!< minimum value of support -		  F max_x = std::numeric_limits<F>::infinity(),   //!< maximum value of support -		  F w = 0.0,                       //!< guess at initial width -		  unsigned nsamples=1,             //!< number of samples to draw -		  unsigned max_nfeval=200)         //!< max number of function evaluations -{ -  typedef unsigned U; -  slice_sampler_rfc_type<F,LogF,U> logF(min_x, max_x, logF0, max_nfeval); - -  assert(std::isfinite(x)); - -  if (w <= 0.0) {                           // set w to a default width  -    if (min_x > -std::numeric_limits<F>::infinity() && max_x < std::numeric_limits<F>::infinity()) -      w = (max_x - min_x)/4; -    else -      w = std::max(((x < 0.0) ? -x : x)/4, (F) 0.1); -  } -  assert(std::isfinite(w)); - -  F logFx = logF(x); -  for (U sample = 0; sample < nsamples; ++sample) { -    F logY = logFx + log(u01()+1e-100);     //! slice logFx at this value -    assert(std::isfinite(logY)); - -    F xl = x - w*u01();                     //! lower bound on slice interval -    F logFxl = logF(xl); -    F xr = xl + w;                          //! upper bound on slice interval -    F logFxr = logF(xr); - -    while (logY < logFxl || logY < logFxr)  // doubling procedure -      if (u01() < 0.5)  -	logFxl = logF(xl -= xr - xl); -      else -	logFxr = logF(xr += xr - xl); -	 -    F xl1 = xl; -    F xr1 = xr; -    while (true) {                          // shrinking procedure -      F x1 = xl1 + u01()*(xr1 - xl1); -      if (logY < logF(x1)) { -	F xl2 = xl;                         // acceptance procedure -	F xr2 = xr;  -	bool d = false; -	while (xr2 - xl2 > 1.1*w) { -	  F xm = (xl2 + xr2)/2; -	  if ((x < xm && x1 >= xm) || (x >= xm && x1 < xm)) -	    d = true; -	  if (x1 < xm) -	    xr2 = xm; -	  else -	    xl2 = xm; -	  if (d && logY >= logF(xl2) && logY >= logF(xr2)) -	    goto unacceptable; -	} -	x = x1; -	goto acceptable; -      } -      goto acceptable; -    unacceptable: -      if (x1 < x)                           // rest of shrinking procedure -	xl1 = x1; -      else  -	xr1 = x1; -    } -  acceptable: -    w = (4*w + (xr1 - xl1))/5;              // update width estimate -  } -  return x; -} - -/* -//! slice_sampler1d() implements a 1-d MCMC slice sampler. -//! It should be correct for unimodal distributions, but -//! not for multimodal ones. -// -template <typename F, typename LogP, typename Uniform01> -F slice_sampler1d(const LogP& logP,     //!< log of distribution to sample -		  F x,                  //!< initial sample -		  Uniform01& u01,       //!< uniform random number generator -		  F min_x = -std::numeric_limits<F>::infinity(),  //!< minimum value of support -		  F max_x = std::numeric_limits<F>::infinity(),   //!< maximum value of support -		  F w = 0.0,            //!< guess at initial width -		  unsigned nsamples=1,  //!< number of samples to draw -		  unsigned max_nfeval=200)  //!< max number of function evaluations -{ -  typedef unsigned U; -  assert(std::isfinite(x)); -  if (w <= 0.0) { -    if (min_x > -std::numeric_limits<F>::infinity() && max_x < std::numeric_limits<F>::infinity()) -      w = (max_x - min_x)/4; -    else -      w = std::max(((x < 0.0) ? -x : x)/4, 0.1); -  } -  // TRACE4(x, min_x, max_x, w); -  F logPx = logP(x); -  assert(std::isfinite(logPx)); -  U nfeval = 1; -  for (U sample = 0; sample < nsamples; ++sample) { -    F x0 = x; -    F logU = logPx + log(u01()+1e-100); -    assert(std::isfinite(logU)); -    F r = u01(); -    F xl = std::max(min_x, x - r*w); -    F xr = std::min(max_x, x + (1-r)*w); -    // TRACE3(x, logPx, logU); -    while (xl > min_x && logP(xl) > logU) { -      xl -= w; -      w *= 2; -      ++nfeval; -      if (nfeval >= max_nfeval) -	std::cerr << "## Error: nfeval = " << nfeval << ", max_nfeval = " << max_nfeval << ", sample = " << sample << ", nsamples = " << nsamples << ", r = " << r << ", w = " << w << ", xl = " << xl << std::endl; -      assert(nfeval < max_nfeval); -    } -    xl = std::max(xl, min_x); -    while (xr < max_x && logP(xr) > logU) { -      xr += w; -      w *= 2; -      ++nfeval; -      if (nfeval >= max_nfeval) -	std::cerr << "## Error: nfeval = " << nfeval << ", max_nfeval = " << max_nfeval << ", sample = " << sample << ", nsamples = " << nsamples << ", r = " << r << ", w = " << w << ", xr = " << xr << std::endl; -      assert(nfeval < max_nfeval); -    } -    xr = std::min(xr, max_x); -    while (true) { -      r = u01(); -      x = r*xl + (1-r)*xr; -      assert(std::isfinite(x)); -      logPx = logP(x); -      // TRACE4(logPx, x, xl, xr); -      assert(std::isfinite(logPx)); -      ++nfeval; -      if (nfeval >= max_nfeval) -	std::cerr << "## Error: nfeval = " << nfeval << ", max_nfeval = " << max_nfeval << ", sample = " << sample << ", nsamples = " << nsamples << ", r = " << r << ", w = " << w << ", xl = " << xl << ", xr = " << xr << ", x = " << x << std::endl; -      assert(nfeval < max_nfeval); -      if (logPx > logU) -        break; -      else if (x > x0) -          xr = x; -        else -          xl = x; -    } -    // w = (4*w + (xr-xl))/5;   // gradually adjust w -  } -  // TRACE2(logPx, x); -  return x; -}  // slice_sampler1d() -*/ - -#endif  // SLICE_SAMPLER_H diff --git a/gi/pyp-topics/src/timing.h b/gi/pyp-topics/src/timing.h deleted file mode 100644 index 08360b0f..00000000 --- a/gi/pyp-topics/src/timing.h +++ /dev/null @@ -1,37 +0,0 @@ -#ifndef TIMING_H -#define TIMING_H - -#ifdef __CYGWIN__ -# ifndef _POSIX_MONOTONIC_CLOCK -#  define _POSIX_MONOTONIC_CLOCK -// this modifies <time.h> -# endif -// in case someone included <time.h> before we got here (this is lifted from time.h>) -# ifndef CLOCK_MONOTONIC -#  define CLOCK_MONOTONIC (clockid_t)4 -# endif -#endif - - -#include <time.h> -#include <sys/time.h> -#include "clock_gettime_stub.c" - -struct Timer { -  Timer() { Reset(); } -  void Reset() -  { -    clock_gettime(CLOCK_MONOTONIC, &start_t); -  } -  double Elapsed() const { -    timespec end_t; -    clock_gettime(CLOCK_MONOTONIC, &end_t); -    const double elapsed = (end_t.tv_sec - start_t.tv_sec) -                + (end_t.tv_nsec - start_t.tv_nsec) / 1000000000.0; -    return elapsed; -  } - private: -  timespec start_t; -}; - -#endif diff --git a/gi/pyp-topics/src/train-contexts.cc b/gi/pyp-topics/src/train-contexts.cc deleted file mode 100644 index 9463f9fc..00000000 --- a/gi/pyp-topics/src/train-contexts.cc +++ /dev/null @@ -1,174 +0,0 @@ -// STL -#include <iostream> -#include <fstream> -#include <algorithm> -#include <iterator> - -// Boost -#include <boost/program_options/parsers.hpp> -#include <boost/program_options/variables_map.hpp> -#include <boost/scoped_ptr.hpp> - -// Local -#include "pyp-topics.hh" -#include "corpus.hh" -#include "contexts_corpus.hh" -#include "gzstream.hh" - -static const char *REVISION = "$Rev$"; - -// Namespaces -using namespace boost; -using namespace boost::program_options; -using namespace std; - -int main(int argc, char **argv) -{ - cout << "Pitman Yor topic models: Copyright 2010 Phil Blunsom\n"; - cout << REVISION << '\n' <<endl; - -  //////////////////////////////////////////////////////////////////////////////////////////// -  // Command line processing -  variables_map vm;  - -  // Command line processing -  { -    options_description cmdline_specific("Command line specific options"); -    cmdline_specific.add_options() -      ("help,h", "print help message") -      ("config,c", value<string>(), "config file specifying additional command line options") -      ; -    options_description config_options("Allowed options"); -    config_options.add_options() -      ("data,d", value<string>(), "file containing the documents and context terms") -      ("topics,t", value<int>()->default_value(50), "number of topics") -      ("document-topics-out,o", value<string>(), "file to write the document topics to") -      ("default-topics-out", value<string>(), "file to write default term topic assignments.") -      ("topic-words-out,w", value<string>(), "file to write the topic word distribution to") -      ("samples,s", value<int>()->default_value(10), "number of sampling passes through the data") -      ("backoff-type", value<string>(), "backoff type: none|simple") -//      ("filter-singleton-contexts", "filter singleton contexts") -      ("hierarchical-topics", "Use a backoff hierarchical PYP as the P0 for the document topics distribution.") -      ("freq-cutoff-start", value<int>()->default_value(0), "initial frequency cutoff.") -      ("freq-cutoff-end", value<int>()->default_value(0), "final frequency cutoff.") -      ("freq-cutoff-interval", value<int>()->default_value(0), "number of iterations between frequency decrement.") -      ("max-threads", value<int>()->default_value(1), "maximum number of simultaneous threads allowed") -      ("max-contexts-per-document", value<int>()->default_value(0), "Only sample the n most frequent contexts for a document.") -      ("num-jobs", value<int>()->default_value(1), "allows finer control over parallelization") -      ("temp-start", value<double>()->default_value(1.0), "starting annealing temperature.") -      ("temp-end", value<double>()->default_value(1.0), "end annealing temperature.") -      ; - -    cmdline_specific.add(config_options); - -    store(parse_command_line(argc, argv, cmdline_specific), vm);  -    notify(vm); - -    if (vm.count("config") > 0) { -      ifstream config(vm["config"].as<string>().c_str()); -      store(parse_config_file(config, config_options), vm);  -    } - -    if (vm.count("help")) {  -      cout << cmdline_specific << "\n";  -      return 1;  -    } -  } -  //////////////////////////////////////////////////////////////////////////////////////////// - -  if (!vm.count("data")) { -    cerr << "Please specify a file containing the data." << endl; -    return 1; -  } -  assert(vm["max-threads"].as<int>() > 0); -  assert(vm["num-jobs"].as<int>() > -1); -  // seed the random number generator: 0 = automatic, specify value otherwise -  unsigned long seed = 0;  -  PYPTopics model(vm["topics"].as<int>(), vm.count("hierarchical-topics"), seed, vm["max-threads"].as<int>(), vm["num-jobs"].as<int>()); - -  // read the data -  BackoffGenerator* backoff_gen=0; -  if (vm.count("backoff-type")) { -    if (vm["backoff-type"].as<std::string>() == "none") { -      backoff_gen = 0; -    } -    else if (vm["backoff-type"].as<std::string>() == "simple") { -      backoff_gen = new SimpleBackoffGenerator(); -    } -    else { -     cerr << "Backoff type (--backoff-type) must be one of none|simple." <<endl; -      return(1); -    } -  } - -  ContextsCorpus contexts_corpus; -  contexts_corpus.read_contexts(vm["data"].as<string>(), backoff_gen, /*vm.count("filter-singleton-contexts")*/ false); -  model.set_backoff(contexts_corpus.backoff_index()); - -  if (backoff_gen)  -    delete backoff_gen; - -  // train the sampler -  model.sample_corpus(contexts_corpus, vm["samples"].as<int>(), -                      vm["freq-cutoff-start"].as<int>(), -                      vm["freq-cutoff-end"].as<int>(), -                      vm["freq-cutoff-interval"].as<int>(), -                      vm["max-contexts-per-document"].as<int>(), -                      vm["temp-start"].as<double>(), vm["temp-end"].as<double>()); - -  if (vm.count("document-topics-out")) { -    ogzstream documents_out(vm["document-topics-out"].as<string>().c_str()); - -    int document_id=0; -    map<int,int> all_terms; -    for (Corpus::const_iterator corpusIt=contexts_corpus.begin();  -         corpusIt != contexts_corpus.end(); ++corpusIt, ++document_id) { -      vector<int> unique_terms; -      for (Document::const_iterator docIt=corpusIt->begin(); -           docIt != corpusIt->end(); ++docIt) { -        if (unique_terms.empty() || *docIt != unique_terms.back()) -          unique_terms.push_back(*docIt); -        // increment this terms frequency -        pair<map<int,int>::iterator,bool> insert_result = all_terms.insert(make_pair(*docIt,1)); -        if (!insert_result.second)  -          all_terms[*docIt] = all_terms[*docIt] + 1; -          //insert_result.first++; -      } -      documents_out << contexts_corpus.key(document_id) << '\t'; -      documents_out << model.max(document_id).first << " " << corpusIt->size() << " ||| "; -      for (std::vector<int>::const_iterator termIt=unique_terms.begin(); -           termIt != unique_terms.end(); ++termIt) { -        if (termIt != unique_terms.begin()) -          documents_out << " ||| "; -       vector<std::string> strings = contexts_corpus.context2string(*termIt); -       copy(strings.begin(), strings.end(),ostream_iterator<std::string>(documents_out, " ")); -        std::pair<int,PYPTopics::F> maxinfo = model.max(document_id, *termIt); -        documents_out << "||| C=" << maxinfo.first << " P=" << maxinfo.second; - -      } -      documents_out <<endl; -    } -    documents_out.close(); - -    if (vm.count("default-topics-out")) { -      ofstream default_topics(vm["default-topics-out"].as<string>().c_str()); -      default_topics << model.max_topic() <<endl; -      for (std::map<int,int>::const_iterator termIt=all_terms.begin(); termIt != all_terms.end(); ++termIt) { -       vector<std::string> strings = contexts_corpus.context2string(termIt->first); -        default_topics << model.max(-1, termIt->first).first << " ||| " << termIt->second << " ||| "; -       copy(strings.begin(), strings.end(),ostream_iterator<std::string>(default_topics, " ")); -        default_topics <<endl; -      } -    } -  } - -  if (vm.count("topic-words-out")) { -    ogzstream topics_out(vm["topic-words-out"].as<string>().c_str()); -    model.print_topic_terms(topics_out); -    topics_out.close(); -  } - - cout <<endl; - -  return 0; -} diff --git a/gi/pyp-topics/src/train.cc b/gi/pyp-topics/src/train.cc deleted file mode 100644 index db7ca46e..00000000 --- a/gi/pyp-topics/src/train.cc +++ /dev/null @@ -1,135 +0,0 @@ -// STL -#include <iostream> -#include <fstream> - -// Boost -#include <boost/program_options/parsers.hpp> -#include <boost/program_options/variables_map.hpp> -#include <boost/scoped_ptr.hpp> - -// Local -#include "pyp-topics.hh" -#include "corpus.hh" -#include "contexts_corpus.hh" -#include "gzstream.hh" - -static const char *REVISION = "$Rev$"; - -// Namespaces -using namespace boost; -using namespace boost::program_options; -using namespace std; - -int main(int argc, char **argv) -{ -  std::cout << "Pitman Yor topic models: Copyright 2010 Phil Blunsom\n"; -  std::cout << REVISION << '\n' << std::endl; - -  //////////////////////////////////////////////////////////////////////////////////////////// -  // Command line processing -  variables_map vm;  - -  // Command line processing -  options_description cmdline_specific("Command line specific options"); -  cmdline_specific.add_options() -    ("help,h", "print help message") -    ("config,c", value<string>(), "config file specifying additional command line options") -    ; -  options_description generic("Allowed options"); -  generic.add_options() -    ("documents,d", value<string>(), "file containing the documents") -    ("topics,t", value<int>()->default_value(50), "number of topics") -    ("document-topics-out,o", value<string>(), "file to write the document topics to") -    ("topic-words-out,w", value<string>(), "file to write the topic word distribution to") -    ("samples,s", value<int>()->default_value(10), "number of sampling passes through the data") -    ("test-corpus", value<string>(), "file containing the test data") -    ("backoff-paths", value<string>(), "file containing the term backoff paths") -    ; -  options_description config_options, cmdline_options; -  config_options.add(generic); -  cmdline_options.add(generic).add(cmdline_specific); - -  store(parse_command_line(argc, argv, cmdline_options), vm);  -  if (vm.count("config") > 0) { -    ifstream config(vm["config"].as<string>().c_str()); -    store(parse_config_file(config, cmdline_options), vm);  -  } -  notify(vm); -  //////////////////////////////////////////////////////////////////////////////////////////// - -  if (vm.count("documents") == 0) { -    cerr << "Please specify a file containing the documents." << endl; -    cout << cmdline_options << "\n";  -    return 1; -  } - -  if (vm.count("help")) {  -    cout << cmdline_options << "\n";  -    return 1;  -  } - -  // seed the random number generator: 0 = automatic, specify value otherwise -  unsigned long seed = 0;  -  PYPTopics model(vm["topics"].as<int>(), false, seed); - -  // read the data -  Corpus corpus; -  corpus.read(vm["documents"].as<string>()); - -  // read the backoff dictionary -  if (vm.count("backoff-paths")) -    model.set_backoff(vm["backoff-paths"].as<string>()); - -  // train the sampler -  model.sample_corpus(corpus, vm["samples"].as<int>()); - -  if (vm.count("document-topics-out")) { -    ogzstream documents_out(vm["document-topics-out"].as<string>().c_str()); -    //model.print_document_topics(documents_out); - -    int document_id=0; -    for (Corpus::const_iterator corpusIt=corpus.begin();  -         corpusIt != corpus.end(); ++corpusIt, ++document_id) { -      std::vector<int> unique_terms; -      for (Document::const_iterator docIt=corpusIt->begin(); -           docIt != corpusIt->end(); ++docIt) { -        if (unique_terms.empty() || *docIt != unique_terms.back()) -          unique_terms.push_back(*docIt); -      } -      documents_out << unique_terms.size(); -      for (std::vector<int>::const_iterator termIt=unique_terms.begin(); -           termIt != unique_terms.end(); ++termIt) -        documents_out << " " << *termIt << ":" << model.max(document_id, *termIt).first; -      documents_out << std::endl; -    } -    documents_out.close(); -  } - -  if (vm.count("topic-words-out")) { -    ogzstream topics_out(vm["topic-words-out"].as<string>().c_str()); -    model.print_topic_terms(topics_out); -    topics_out.close(); -  } - -  if (vm.count("test-corpus")) { -    TestCorpus test_corpus; -    test_corpus.read(vm["test-corpus"].as<string>()); -    ogzstream topics_out((vm["test-corpus"].as<string>() + ".topics.gz").c_str()); - -    for (TestCorpus::const_iterator corpusIt=test_corpus.begin();  -         corpusIt != test_corpus.end(); ++corpusIt) { -      int index=0; -      for (DocumentTerms::const_iterator instanceIt=corpusIt->begin(); -           instanceIt != corpusIt->end(); ++instanceIt, ++index) { -        int topic = model.max(instanceIt->doc, instanceIt->term).first; -        if (index != 0) topics_out << " "; -        topics_out << topic; -      } -      topics_out << std::endl; -    } -    topics_out.close(); -  } -  std::cout << std::endl; - -  return 0; -} diff --git a/gi/pyp-topics/src/utility.h b/gi/pyp-topics/src/utility.h deleted file mode 100644 index 405a5b0a..00000000 --- a/gi/pyp-topics/src/utility.h +++ /dev/null @@ -1,962 +0,0 @@ -// utility.h -// -// (c) Mark Johnson, 24th January 2005 -// -// modified 6th May 2002 to ensure write/read consistency, fixed 18th July 2002 -// modified 14th July 2002 to include insert() (generic inserter) -// modified 26th September 2003 to use mapped_type instead of data_type -// 25th August 2004 added istream >> const char* -// 24th January 2005 added insert_newkey() -// -// Defines: -//  loop macros foreach, cforeach -//  dfind (default find function) -//  afind (find function that asserts key exists) -//  insert_newkey (inserts a new key into a map) -//  insert (generic inserter into standard data structures) -//  disjoint (set operation) -//  first_lessthan and second_lessthan (compares elements of pairs) -// -// Simplified interfaces to STL routines: -// -//  includes (simplified interface) -//  set_intersection (simplified interface) -//  inserter (simplified interface) -//  max_element (simplified interface) -//  min_element (simplified interface) -//  hash functions for pairs, vectors, lists, slists and maps -//  input and output for pairs and vectors -//  resource_usage (interface improved) - - -#ifndef UTILITY_H -#define UTILITY_H - -#include <algorithm> -// #include <boost/smart_ptr.hpp>    // Comment out this line if boost is not used -#include <cassert> -#include <cmath> -#include <cctype> -#include <cstdio> -#include <unordered_map> -#include <unordered_set> -#include <ext/slist> -#include <iostream> -#include <iterator> -#include <list> -#include <map> -#include <set> -#include <string> -#include <utility> -#include <vector> -#include <memory> - -#if (__GNUC__ > 3) || (__GNUC__ >= 3 && __GNUC_MINOR__ >= 1) -#define EXT_NAMESPACE __gnu_cxx -#else -#define EXT_NAMESPACE std -#endif - -namespace ext = EXT_NAMESPACE; - -inline float power(float x, float y) { return powf(x, y); } -inline double power(double x, double y) { return pow(x, y); } -inline long double power(long double x, long double y) { return powl(x, y); } - -typedef unsigned U; -typedef long double F;  // slower than double, but underflows less - -/////////////////////////////////////////////////////////////////////////// -//                                                                       // -//                              Looping constructs                       // -//                                                                       // -/////////////////////////////////////////////////////////////////////////// - -// foreach is a simple loop construct -// -//   STORE should be an STL container -//   TYPE is the typename of STORE -//   VAR will be defined as a local variable of type TYPE::iterator -// -#define foreach(TYPE, VAR, STORE) \ -  for (TYPE::iterator VAR = (STORE).begin(); VAR != (STORE).end(); ++VAR) - -// cforeach is just like foreach, except that VAR is a const_iterator -// -//   STORE should be an STL container -//   TYPE is the typename of STORE -//   VAR will be defined as a local variable of type TYPE::const_iterator -// -#define cforeach(TYPE, VAR, STORE) \ -  for (TYPE::const_iterator VAR = (STORE).begin(); VAR != (STORE).end(); ++VAR) - - -/////////////////////////////////////////////////////////////////////////// -//                                                                       // -//                             Map searching                             // -//                                                                       // -// dfind(map, key) returns the key's value in map, or map's default      // -//   value if no such key exists (the default value is not inserted)     // -//                                                                       // -// afind(map, key) returns a reference to the key's value in map, and    // -//    asserts that this value exists                                     // -//                                                                       // -/////////////////////////////////////////////////////////////////////////// - -// dfind(Map, Key) returns the value Map associates with Key, or the -//  Map's default value if no such Key exists -// -template <class Map, class Key> -inline typename Map::mapped_type dfind(Map& m, const Key& k) -{ -  typename Map::iterator i = m.find(k); -  if (i == m.end()) -    return typename Map::mapped_type(); -  else -    return i->second; -} - -template <class Map, class Key> -inline const typename Map::mapped_type dfind(const Map& m, const Key& k) -{ -  typename Map::const_iterator i = m.find(k); -  if (i == m.end()) -    return typename Map::mapped_type(); -  else -    return i->second; -} - - -// afind(map, key) returns a reference to the value associated -//  with key in map.  It uses assert to check that the key's value -//  is defined. -// -template <class Map, class Key> -inline typename Map::mapped_type& afind(Map& m, const Key& k) -{ -  typename Map::iterator i = m.find(k); -  assert(i != m.end()); -  return i->second; -} - -template <class Map, class Key> -inline const typename Map::mapped_type& afind(const Map& m, const Key& k) -{ -  typename Map::const_iterator i = m.find(k); -  assert(i != m.end()); -  return i->second; -} - -//! insert_newkey(map, key, value) checks that map does not contain -//! key, and binds key to value. -// -template <class Map, class Key, class Value> -inline typename Map::value_type&  -insert_newkey(Map& m, const Key& k,const Value& v)  -{ -  std::pair<typename Map::iterator, bool> itb  -    = m.insert(Map::value_type(k, v)); -  assert(itb.second); -  return *(itb.first); -}  // insert_newkey() - - -/////////////////////////////////////////////////////////////////////////// -//                                                                       // -//                        Insert operations                              // -//                                                                       // -/////////////////////////////////////////////////////////////////////////// - - -template <typename T> -void insert(std::list<T>& xs, const T& x) { -  xs.push_back(x); -} - -template <typename T> -void insert(std::set<T>& xs, const T& x) { -  xs.insert(x); -} - -template <typename T> -void insert(std::vector<T>& xs, const T& x) { -  xs.push_back(x); -} - - -/////////////////////////////////////////////////////////////////////////// -//                                                                       // -//                Additional versions of standard algorithms             // -//                                                                       // -/////////////////////////////////////////////////////////////////////////// - -template <typename Set1, typename Set2> -inline bool includes(const Set1& set1, const Set2& set2) -{ -  return std::includes(set1.begin(), set1.end(), set2.begin(), set2.end()); -} - -template <typename Set1, typename Set2, typename Compare> -inline bool includes(const Set1& set1, const Set2& set2, Compare comp) -{ -  return std::includes(set1.begin(), set1.end(), set2.begin(), set2.end(), comp); -} - - -template <typename InputIter1, typename InputIter2> -bool disjoint(InputIter1 first1, InputIter1 last1, -              InputIter2 first2, InputIter2 last2) -{ -  while (first1 != last1 && first2 != last2) -    if (*first1 < *first2) -      ++first1; -    else if (*first2 < *first1) -      ++first2; -    else // *first1 == *first2 -      return false; -  return true; -} - -template <typename InputIter1, typename InputIter2, typename Compare> -bool disjoint(InputIter1 first1, InputIter1 last1, -              InputIter2 first2, InputIter2 last2, Compare comp) -{ -  while (first1 != last1 && first2 != last2) -    if (comp(*first1, *first2)) -      ++first1; -    else if (comp(*first2, *first1)) -      ++first2; -    else // *first1 == *first2 -      return false; -  return true; -} - -template <typename Set1, typename Set2> -inline bool disjoint(const Set1& set1, const Set2& set2) -{ -  return disjoint(set1.begin(), set1.end(), set2.begin(), set2.end()); -} - -template <typename Set1, typename Set2, typename Compare> -inline bool disjoint(const Set1& set1, const Set2& set2, Compare comp) -{ -  return disjoint(set1.begin(), set1.end(), set2.begin(), set2.end(), comp); -} - - -template <typename Set1, typename Set2, typename OutputIterator> -inline OutputIterator set_intersection(const Set1& set1, const Set2& set2,  -                                       OutputIterator result) -{ -  return set_intersection(set1.begin(), set1.end(), set2.begin(), set2.end(), result); -} - -template <typename Set1, typename Set2, typename OutputIterator, typename Compare> -inline OutputIterator set_intersection(const Set1& set1, const Set2& set2,  -                                       OutputIterator result, Compare comp) -{ -  return set_intersection(set1.begin(), set1.end(), set2.begin(), set2.end(), result, comp); -} - - -template <typename Container> -inline std::insert_iterator<Container> inserter(Container& container) -{ -  return std::inserter(container, container.begin()); -} - -// max_element -// -template <class Es> inline typename Es::iterator max_element(Es& es) -{ -  return std::max_element(es.begin(), es.end()); -} - -template <class Es> inline typename Es::const_iterator max_element(const Es& es) -{ -  return std::max_element(es.begin(), es.end()); -} - -template <class Es, class BinaryPredicate>  -inline typename Es::iterator max_element(Es& es, BinaryPredicate comp) -{ -  return std::max_element(es.begin(), es.end(), comp); -} - -template <class Es, class BinaryPredicate>  -inline typename Es::const_iterator max_element(const Es& es, BinaryPredicate comp) -{ -  return std::max_element(es.begin(), es.end(), comp); -} - -// min_element -// -template <class Es> inline typename Es::iterator min_element(Es& es) -{ -  return std::min_element(es.begin(), es.end()); -} - -template <class Es> inline typename Es::const_iterator min_element(const Es& es) -{ -  return std::min_element(es.begin(), es.end()); -} - -template <class Es, class BinaryPredicate>  -inline typename Es::iterator min_element(Es& es, BinaryPredicate comp) -{ -  return std::min_element(es.begin(), es.end(), comp); -} - -template <class Es, class BinaryPredicate>  -inline typename Es::const_iterator min_element(const Es& es, BinaryPredicate comp) -{ -  return std::min_element(es.begin(), es.end(), comp); -} - -// first_lessthan and second_lessthan -// -struct first_lessthan { -  template <typename T1, typename T2> -    bool operator() (const T1& e1, const T2& e2) { -      return e1.first < e2.first; -    } -}; - -struct second_lessthan { -  template <typename T1, typename T2> -    bool operator() (const T1& e1, const T2& e2) { -      return e1.second < e2.second; -    } -}; - -// first_greaterthan and second_greaterthan -// -struct first_greaterthan { -  template <typename T1, typename T2> -    bool operator() (const T1& e1, const T2& e2) { -      return e1.first > e2.first; -    } -}; - -struct second_greaterthan { -  template <typename T1, typename T2> -    bool operator() (const T1& e1, const T2& e2) { -      return e1.second > e2.second; -    } -}; - - -/////////////////////////////////////////////////////////////////////////// -//                                                                       // -//                          hash<> specializations                       // -//                                                                       // -// These must be in namespace std.   They permit the corresponding STL   // -// container to be used as a key in an STL hash table.                   // -//                                                                       // -/////////////////////////////////////////////////////////////////////////// - -//namespace EXT_NAMESPACE { -namespace std { -  /* -  // hash function for bool -  // -  template <> struct hash<bool> -  { -  size_t operator() (bool b) const  -  { -  return b; -  } // operator() -  }; // hash<bool>{} - -  // hash function for double -  // -  template <> struct hash<double> -  { -  size_t operator() (double d) const  -  { -  int exponent; -  double fraction = frexp(d, &exponent); -  return size_t(exponent) ^ size_t(1000000.0*(fabs(fraction-0.5))); -  } // operator() -  }; // hash<double>{} - -  // hash function for strings -  // -  template <> struct hash<std::string>  -  { -  size_t operator()(const std::string& s) const  -  { -  typedef std::string::const_iterator CI; - -  unsigned long h = 0;  -  unsigned long g; -  CI p = s.begin(); -  CI end = s.end(); - -  while (p!=end) { -  h = (h << 4) + (*p++); -  if ((g = h&0xf0000000)) { -  h = h ^ (g >> 24); -  h = h ^ g; -  }} -  return size_t(h); -  }  // operator() -  };  // hash<string>{} - -*/ -  // hash function for arbitrary pairs -  // -  template<class T1, class T2> struct hash<std::pair<T1,T2> > { -    size_t operator()(const std::pair<T1,T2>& p) const -    { -      size_t h1 = hash<T1>()(p.first); -      size_t h2 = hash<T2>()(p.second); -      return h1 ^ (h1 >> 1) ^ h2 ^ (h2 << 1); -    } -  }; - - -  // hash function for vectors -  // -  template<class T> struct hash<std::vector<T> >  -  { //  This is the fn hashpjw of Aho, Sethi and Ullman, p 436. -    size_t operator()(const std::vector<T>& s) const  -    { -      typedef typename std::vector<T>::const_iterator CI; - -      unsigned long h = 0;  -      unsigned long g; -      CI p = s.begin(); -      CI end = s.end(); - -      while (p!=end) { -        h = (h << 5) + hash<T>()(*p++); -        if ((g = h&0xff000000)) { -          h = h ^ (g >> 23); -          h = h ^ g; -        }} -        return size_t(h); -    } -  }; - -  // hash function for slists -  // -  template<class T> struct hash<ext::slist<T> >  -  { //  This is the fn hashpjw of Aho, Sethi and Ullman, p 436. -    size_t operator()(const ext::slist<T>& s) const  -    { -      typedef typename ext::slist<T>::const_iterator CI; - -      unsigned long h = 0;  -      unsigned long g; -      CI p = s.begin(); -      CI end = s.end(); - -      while (p!=end) { -        h = (h << 7) + hash<T>()(*p++); -        if ((g = h&0xff000000)) { -          h = h ^ (g >> 23); -          h = h ^ g; -        }} -        return size_t(h); -    } -  }; - -  // hash function for maps -  // -  template<typename T1, typename T2> struct hash<std::map<T1,T2> > -  { -    size_t operator()(const std::map<T1,T2>& m) const -    { -      typedef typename std::map<T1,T2> M; -      typedef typename M::const_iterator CI; - -      unsigned long h = 0; -      unsigned long g; -      CI p = m.begin(); -      CI end = m.end(); - -      while (p != end) { -        h = (h << 11) + hash<typename M::value_type>()(*p++); -        if ((g = h&0xff000000)) { -          h = h ^ (g >> 23); -          h = h ^ g; -        }} -        return size_t(h); -    } -  }; - -} // namespace EXT_NAMESPACE - - - -/////////////////////////////////////////////////////////////////////////// -//                                                                       // -//                           Write/Read code                             // -//                                                                       // -// These routines should possess write/read invariance IF their elements // -// also have write-read invariance.  Whitespace, '(' and ')' are used as // -// delimiters.                                                           // -//                                                                       // -/////////////////////////////////////////////////////////////////////////// - - -// Define istream >> const char* so that it consumes the characters from the -// istream.  Just as in scanf, a space consumes an arbitrary amount of whitespace. -// -inline std::istream& operator>> (std::istream& is, const char* cp) -{ -  if (*cp == '\0') -    return is; -  else if (*cp == ' ') { -    char c; -    if (is.get(c)) { -      if (isspace(c)) -        return is >> cp; -      else { -        is.unget(); -        return is >> (cp+1); -      } -    } -    else { -      is.clear(is.rdstate() & ~std::ios::failbit);  // clear failbit -      return is >> (cp+1); -    } -  } -  else { -    char c; -    if (is.get(c)) { -      if (c == *cp) -        return is >> (cp+1); -      else { -        is.unget(); -        is.setstate(std::ios::failbit); -      } -    } -    return is; -  } -} - - -// Write out an auto_ptr object just as you would write out the pointer object -// -template <typename T>  -inline std::ostream& operator<<(std::ostream& os, const std::auto_ptr<T>& sp) -{ -  return os << sp.get(); -} - - -// Pairs -// -template <class T1, class T2>  -std::ostream& operator<< (std::ostream& os, const std::pair<T1,T2>& p) -{ -  return os << '(' << p.first << ' ' << p.second << ')'; -} - -template <class T1, class T2> -std::istream& operator>> (std::istream& is, std::pair<T1,T2>& p) -{ -  char c; -  if (is >> c) { -    if (c == '(') { -      if (is >> p.first >> p.second >> c && c == ')') -        return is; -      else  -        is.setstate(std::ios::badbit); -    } -    else -      is.putback(c); -  } -  is.setstate(std::ios::failbit); -  return is; -} - -// Lists -// -template <class T> -std::ostream& operator<< (std::ostream& os, const std::list<T>& xs) -{ -  os << '('; -  for (typename std::list<T>::const_iterator xi = xs.begin(); xi != xs.end(); ++xi) { -    if (xi != xs.begin()) -      os << ' '; -    os << *xi; -  } -  return os << ')'; -} - -template <class T> -std::istream& operator>> (std::istream& is, std::list<T>& xs) -{ -  char c;                          // This code avoids unnecessary copy -  if (is >> c) {                   // read the initial '(' -    if (c == '(') { -      xs.clear();                  // clear the list -      do { -        xs.push_back(T());         // create a new elt in list -        is >> xs.back();           // read element -      } -      while (is.good());           // read as long as possible -      xs.pop_back();               // last read failed; pop last elt -      is.clear(is.rdstate() & ~std::ios::failbit);  // clear failbit -      if (is >> c && c == ')')     // read terminating ')' -        return is;                 // successful return -      else  -        is.setstate(std::ios::badbit); // something went wrong, set badbit -    } -    else                           // c is not '(' -      is.putback(c);               //  put c back into input -  } -  is.setstate(std::ios::failbit);  // read failed, set failbit -  return is; -} - -// Vectors -// -template <class T> -std::ostream& operator<< (std::ostream& os, const std::vector<T>& xs) -{ -  os << '('; -  for (typename std::vector<T>::const_iterator xi = xs.begin(); xi != xs.end(); ++xi) { -    if (xi != xs.begin()) -      os << ' '; -    os << *xi; -  } -  return os << ')'; -} - -template <class T> -std::istream& operator>> (std::istream& is, std::vector<T>& xs) -{ -  char c;                          // This code avoids unnecessary copy -  if (is >> c) {                   // read the initial '(' -    if (c == '(') { -      xs.clear();                  // clear the list -      do { -        xs.push_back(T());         // create a new elt in list -        is >> xs.back();           // read element -      } -      while (is.good());           // read as long as possible -      xs.pop_back();               // last read failed; pop last elt -      is.clear(is.rdstate() & ~std::ios::failbit);  // clear failbit -      if (is >> c && c == ')')     // read terminating ')' -        return is;                 // successful return -      else  -        is.setstate(std::ios::badbit); // something went wrong, set badbit -    } -    else                           // c is not '(' -      is.putback(c);               //  put c back into input -  } -  is.setstate(std::ios::failbit);  // read failed, set failbit -  return is; -} - -// Slists -// -template <class T> -std::ostream& operator<< (std::ostream& os, const ext::slist<T>& xs) -{ -  os << '('; -  for (typename ext::slist<T>::const_iterator xi = xs.begin(); xi != xs.end(); ++xi) { -    if (xi != xs.begin()) -      os << ' '; -    os << *xi; -  } -  return os << ')'; -} - -template <class T> -std::istream& operator>> (std::istream& is, ext::slist<T>& xs) -{ -  char c;  -  if (is >> c) {  -    if (c == '(') { -      xs.clear(); -      T e; -      if (is >> e) {    -        xs.push_front(e); -        typename ext::slist<T>::iterator xi = xs.begin(); -        while (is >> e) -          xi = xs.insert_after(xi, e); -        is.clear(is.rdstate() & ~std::ios::failbit); -        if (is >> c && c == ')') -          return is; -        else  -          is.setstate(std::ios::badbit);  -      } -      else { // empty list -        is.clear(is.rdstate() & ~std::ios::failbit); -        if (is >> c && c == ')') -          return is; -        else  // didn't see closing ')' -          is.setstate(std::ios::badbit); -      } -    } -    else  // didn't read '(' -      is.putback(c); -  } -  is.setstate(std::ios::failbit); -  return is; -} - -// Sets -// -template <class T> -std::ostream& operator<< (std::ostream& os, const std::set<T>& s) -{ -  os << '('; -  for (typename std::set<T>::const_iterator i = s.begin(); i != s.end(); ++i) { -    if (i != s.begin()) -      os << ' '; -    os << *i; -  } -  return os << ')'; -} - -template <class T> -std::istream& operator>> (std::istream& is, std::set<T>& s) -{ -  char c; -  if (is >> c) { -    if (c == '(') { -      s.clear(); -      T e; -      while (is >> e) -        s.insert(e); -      is.clear(is.rdstate() & ~std::ios::failbit); -      if (is >> c && c == ')') -        return is; -      else -        is.setstate(std::ios::badbit); -    } -    else -      is.putback(c); -  } -  is.setstate(std::ios::failbit); -  return is; -} - -// Hash_sets -// -template <class T> -std::ostream& operator<< (std::ostream& os, const std::unordered_set<T>& s) -{ -  os << '('; -  for (typename std::unordered_set<T>::const_iterator i = s.begin(); i != s.end(); ++i) { -    if (i != s.begin()) -      os << ' '; -    os << *i; -  } -  return os << ')'; -} - -template <class T> -std::istream& operator>> (std::istream& is, std::unordered_set<T>& s) -{ -  char c; -  if (is >> c) { -    if (c == '(') { -      s.clear(); -      T e; -      while (is >> e) -        s.insert(e); -      is.clear(is.rdstate() & ~std::ios::failbit); -      if (is >> c && c == ')') -        return is; -      else -        is.setstate(std::ios::badbit); -    } -    else -      is.putback(c); -  } -  is.setstate(std::ios::failbit); -  return is; -} - - -// Maps -// -template <class Key, class Value> -std::ostream& operator<< (std::ostream& os, const std::map<Key,Value>& m) -{ -  typedef std::map<Key,Value> M; -  os << '('; -  for (typename M::const_iterator it = m.begin(); it != m.end(); ++it) { -    if (it != m.begin()) -      os << ' '; -    os << *it; -  } -  return os << ")"; -} - -template <class Key, class Value> -std::istream& operator>> (std::istream& is, std::map<Key,Value>& m) -{ -  char c; -  if (is >> c) { -    if (c == '(') { -      m.clear(); -      std::pair<Key,Value> e; -      while (is >> e) -        m.insert(e); -      is.clear(is.rdstate() & ~std::ios::failbit); -      if (is >> c && c == ')') -        return is; -      else -        is.setstate(std::ios::badbit); -    } -    else -      is.putback(c); -  } -  is.setstate(std::ios::failbit); -  return is; -} - -// Hash_maps -// -template <class Key, class Value> -std::ostream& operator<< (std::ostream& os, const std::unordered_map<Key,Value>& m) -{ -  typedef std::unordered_map<Key,Value> M; -  os << '('; -  for (typename M::const_iterator it = m.begin(); it != m.end(); ++it) { -    if (it != m.begin()) -      os << ' '; -    os << *it; -  } -  return os << ")"; -} - -template <class Key, class Value> -std::istream& operator>> (std::istream& is, std::unordered_map<Key,Value>& m) -{ -  char c; -  if (is >> c) { -    if (c == '(') { -      m.clear(); -      std::pair<Key,Value> e; -      while (is >> e) -        m.insert(e); -      is.clear(is.rdstate() & ~std::ios::failbit); -      if (is >> c && c == ')') -        return is; -      else -        is.setstate(std::ios::badbit); -    } -    else -      is.putback(c); -  } -  is.setstate(std::ios::failbit); -  return is; -} - - -/////////////////////////////////////////////////////////////////////////// -//                                                                       // -//                       Boost library additions                         // -//                                                                       // -/////////////////////////////////////////////////////////////////////////// - -#ifdef BOOST_SHARED_PTR_HPP_INCLUDED - -// enhancements to boost::shared_ptr so it can be used with hash -// -namespace std { -  template <typename T> struct equal_to<boost::shared_ptr<T> >  -    : public binary_function<boost::shared_ptr<T>, boost::shared_ptr<T>, bool> { -      bool operator() (const boost::shared_ptr<T>& p1, const boost::shared_ptr<T>& p2) const { -        return equal_to<T*>()(p1.get(), p2.get()); -      } -    }; -}  // namespace std - -//namespace EXT_NAMESPACE { -namespace std { -  template <typename T> struct hash<boost::shared_ptr<T> > { -    size_t operator() (const boost::shared_ptr<T>& a) const { -      return hash<T*>()(a.get()); -    } -  }; -}  // namespace ext - -template <typename T>  -inline std::ostream& operator<< (std::ostream& os, const boost::shared_ptr<T>& sp) -{ -  return os << sp.get(); -} - -#endif  // BOOST_SHARED_PTR_HPP_INCLUDED - -struct resource_usage { }; - -#ifndef __i386 -inline std::ostream& operator<< (std::ostream& os, resource_usage r) -{ -  return os; -} -#else // Assume we are on a 586 linux -inline std::ostream& operator<< (std::ostream& os, resource_usage r) -{ -  FILE* fp = fopen("/proc/self/stat", "r"); -  assert(fp); -  int utime; -  int stime; -  unsigned int vsize; -  unsigned int rss; -  int result =  -    fscanf(fp, "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %d %d %*d %*d %*d %*d" -           "%*u %*u %*d %u %u", &utime, &stime, &vsize, &rss); -  assert(result == 4); -  fclose(fp); -  // s << "utime = " << utime << ", stime = " << stime << ", vsize = " << vsize << ", rss = " << rss -  ; -  // return s << "utime = " << utime << ", vsize = " << vsize; -  return os << "utime " << float(utime)/1.0e2 << "s, vsize "  -    << float(vsize)/1048576.0 << " Mb."; -} -#endif - -//! A default_value_type{} object is used to read an object from a stream, -//! assigning a default value if the read fails.  Users should not need to -//! construct such objects, but should use default_value() instead. -// -template <typename object_type, typename default_type> -struct default_value_type { -  object_type& object; -  const default_type defaultvalue; -  default_value_type(object_type& object, const default_type defaultvalue) -    : object(object), defaultvalue(defaultvalue) { } -}; - -//! default_value() is used to read an object from a stream, assigning a -//! default value if the read fails.  It returns a default_value_type{} -//! object, which does the actual reading. -// -template <typename object_type, typename default_type> -default_value_type<object_type,default_type> -default_value(object_type& object, const default_type defaultvalue=default_type()) { -  return default_value_type<object_type,default_type>(object, defaultvalue); -} - -//! This version of operator>>() reads default_value_type{} from an input stream. -// -template <typename object_type, typename default_type> -std::istream& operator>> (std::istream& is,  -                          default_value_type<object_type, default_type> dv) { -  if (is) { -    if (is >> dv.object) -      ; -    else { -      is.clear(is.rdstate() & ~std::ios::failbit);  // clear failbit -      dv.object = dv.defaultvalue; -    } -  } -  return is; -} - -// inline F random1() { return rand()/(RAND_MAX+1.0); } -inline F random1() { return mt_genrand_res53(); } - -#endif  // UTILITY_H diff --git a/gi/pyp-topics/src/workers.hh b/gi/pyp-topics/src/workers.hh deleted file mode 100644 index 95b18947..00000000 --- a/gi/pyp-topics/src/workers.hh +++ /dev/null @@ -1,275 +0,0 @@ -/** -  Basic thread-pool tools using Boost.Thread. -  (Jan Botha, 7/2010) - -  --Simple usage-- -  Use SimpleWorker. -    Example, call a function that returns an int in a new thread: -    typedef boost::function<int()> JobType; -    JobType job = boost::bind(funcname); -      //or boost::bind(&class::funcname, this) for a member function -    SimpleWorker<JobType, int> worker(job); -    int result = worker.getResult(); //blocks until result is ready - -  --Extended usage-- -  Use WorkerPool, which uses Queuemt (a synchronized queue) and Worker. -  Example: -    (same context and typedef -    WorkerPool<JobType, int> pool(num_threads); -    JobType job = ... -    pool.addJob(job); -    ... -    pool.get_result(); //blocks until all workers are done, returns the some of their results.   -     -    Jobs added to a WorkerPool need to be the same type. A WorkerPool instance should not be reused (e.g. adding jobs) after calling get_result().  -*/ - -#ifndef WORKERS_HH -#define WORKERS_HH - -#include <iostream> -#include <boost/bind.hpp> -#include <boost/function.hpp> -#include <queue> -#include <boost/ptr_container/ptr_vector.hpp> -#include <boost/thread/thread.hpp> -#include <boost/thread/mutex.hpp> -#include <boost/thread/shared_mutex.hpp> -#include <boost/thread/future.hpp> -#include <boost/thread/condition.hpp> - -#include <boost/date_time/posix_time/posix_time_types.hpp> -#include "timing.h" - -/** Implements a synchronized queue*/ -template<typename J> -class Queuemt -{ - -public: -    boost::condition_variable_any cond; -    const bool& running; - -    Queuemt() { } -    Queuemt(const bool& running) : running(running), maxsize(0), qsize(0)  -    {  -    } - -    ~Queuemt() {  -     } - -    J pop() -    { -        J job; -        { -            boost::unique_lock<boost::shared_mutex> qlock(q_mutex); -            while (running && qsize == 0) -                cond.wait(qlock); - -            if (qsize > 0) -            { -                job = q.front(); -                q.pop(); -                --qsize;       -            } -        } -        if (job) -            cond.notify_one(); -        return job; - -    } - -    void push(J job) -    { -        { -            boost::unique_lock<boost::shared_mutex> lock(q_mutex); -            q.push(job); -            ++qsize; -        } -        if (qsize > maxsize) -            maxsize = qsize; -         -        cond.notify_one(); -    } - -    int getMaxsize() -    { -        return maxsize; -    } - -    int size() -    { -        return qsize; -    } - -private: -    boost::shared_mutex q_mutex; -    std::queue<J> q; -    int maxsize; -    volatile int qsize; -}; - - -template<typename J, typename R> -class Worker -{ -typedef boost::packaged_task<R> PackagedTask; -public: -    Worker(Queuemt<J>& queue, int id, int num_workers) :   -      q(queue), tasktime(0.0), id(id), num_workers(num_workers) -    { -        PackagedTask task(boost::bind(&Worker<J, R>::run, this)); -        future = task.get_future(); -        boost::thread t(boost::move(task)); -    } - -    R run() //this is called upon thread creation -    { -        R wresult = 0; -        while (isRunning()) -        { -            J job = q.pop(); - -            if (job) -            { -                timer.Reset(); -                wresult += job(); -                tasktime += timer.Elapsed(); -            } -        } -        return wresult; -    } - -    R getResult() -    { -        if (!future.is_ready()) -            future.wait(); -        assert(future.is_ready()); -        return future.get(); -    } - -    double getTaskTime() -    { -        return tasktime; -    } - -private: - -    Queuemt<J>& q; - -    boost::unique_future<R> future; - -    bool isRunning() -    { -        return q.running || q.size() > 0; -    } -     -    Timer timer; -    double tasktime; -    int id; -    int num_workers; -}; - -template<typename J, typename R> -class WorkerPool -{ -typedef boost::packaged_task<R> PackagedTask; -typedef Worker<J,R> WJR; -typedef boost::ptr_vector<WJR> WorkerVector; -public: - -    WorkerPool(int num_workers) -    { -        q.reset(new Queuemt<J>(running)); -        running = true; -        for (int i = 0; i < num_workers; ++i) -            workers.push_back( new Worker<J, R>(*q, i, num_workers) ); -    } - -    ~WorkerPool() -    { -    } - -    R get_result() -    { -        running = false; -        q->cond.notify_all(); -        R tmp = 0; -        double tasktime = 0.0; -        for (typename WorkerVector::iterator it = workers.begin(); it != workers.end(); it++) -        { -            R res = it->getResult(); -            tmp += res; -            //std::cerr << "tasktime: " << it->getTaskTime() << std::endl;  -            tasktime += it->getTaskTime(); -        } -//        std::cerr << " maxQ = " << q->getMaxsize() << std::endl; -        return tmp; -    } - -    void addJob(J job) -    { -        q->push(job); -    } - -private: - -    WorkerVector workers; - -    boost::shared_ptr<Queuemt<J> > q; - -    bool running; -}; - -/////////////////// -template <typename J, typename R> -class SimpleWorker -{ -typedef boost::packaged_task<R> PackagedTask; -public: -    SimpleWorker(J& job) : job(job), tasktime(0.0) -    { -        PackagedTask task(boost::bind(&SimpleWorker<J, R>::run, this)); -        future = task.get_future(); -        boost::thread t(boost::move(task)); -    } - -    R run() //this is called upon thread creation -    { -        R wresult = 0; - -        assert(job); -        timer.Reset(); -        wresult = job(); -        tasktime = timer.Elapsed(); -        std::cerr << tasktime << " s" << std::endl;  -        return wresult; -    } - -    R getResult() -    { -        if (!future.is_ready()) -            future.wait(); -        assert(future.is_ready()); -        return future.get(); -    } - -    double getTaskTime() -    { -        return tasktime; -    } - -private: - -    J job; - -    boost::unique_future<R> future; - -    Timer timer; -    double tasktime; - -}; - - - -#endif  diff --git a/gi/scripts/buck2utf8.pl b/gi/scripts/buck2utf8.pl deleted file mode 100755 index 1acfae8d..00000000 --- a/gi/scripts/buck2utf8.pl +++ /dev/null @@ -1,87 +0,0 @@ -#!/usr/bin/perl -w -use strict; -use utf8; -binmode(STDOUT, ":utf8"); -while(<>) { -  chomp; -  my @words = split /\s+/; -  for my $w (@words) { -    $_ = $w; -    if ($w =~ /^__NTK__/o) { -      s/__NTK__//go; -      next if /^$/; -      print STDOUT "$_ "; -      next; -    } -s/tR/\x{0679}/g;  # retroflex t -s/dR/\x{0688}/g;  # retroflex d -s/rR/\x{0691}/g;  # retroflex r -s/p/\x{067E}/g;   # peh -s/c/\x{0686}/g;   # tcheh -s/g/\x{06AF}/g;   # geh (G=ghain) -s/@/\x{06BE}/g;   # heh doachashmee -s/h'/\x{06c2}/g;  # heh goal + hamza -s/h/\x{06c1}/g;   # heh goal -s/J/\x{0698}/g;   # zheh (rare, usually persian loan words) -s/k/\x{06A9}/g;   # k -s/Y'/\x{06d3}/g;  # yeh barree + hamza above (ligature) -s/y/\x{06cc}/g;   # same as ya' in arabic -s/Y/\x{06d2}/g;   # yeh barree -s/N/\x{06BA}/g;  # Ghunna - -    s/\'/\x{0621}/g; -    s/\|/\x{0622}/g; -    s/\>/\x{0623}/g; -    s/\&/\x{0624}/g; -    s/\</\x{0625}/g; -    s/\}/\x{0626}/g; -    s/A/\x{0627}/g; -    s/b/\x{0628}/g; -    s/t/\x{062A}/g; -    s/v/\x{062B}/g; -    s/j/\x{062C}/g; -    s/H/\x{062D}/g; -    s/x/\x{062E}/g; -    s/d/\x{062F}/g; -    s/\*/\x{0630}/g; -    s/r/\x{0631}/g; -    s/z/\x{0632}/g; -    s/s/\x{0633}/g; -    s/\$/\x{0634}/g; -    s/S/\x{0635}/g; -    s/D/\x{0636}/g; -    s/T/\x{0637}/g; -    s/Z/\x{0638}/g; -    s/E/\x{0639}/g; -    s/g/\x{063A}/g; -    s/_/\x{0640}/g; -    s/f/\x{0641}/g; -    s/q/\x{0642}/g; -    s/k/\x{0643}/g; -    s/l/\x{0644}/g; -    s/m/\x{0645}/g; -    s/n/\x{0646}/g; -    s/h/\x{0647}/g; -    s/w/\x{0648}/g; -    s/Y/\x{0649}/g; -    s/y/\x{064A}/g; -    s/F/\x{064B}/g; -    s/N/\x{064C}/g; -    s/K/\x{064D}/g; -    s/a/\x{064E}/g; -    s/u/\x{064F}/g; -    s/i/\x{0650}/g; -    s/\~/\x{0651}/g; -    s/o/\x{0652}/g; -    s/\`/\x{0670}/g; -    s/\{/\x{0671}/g; -    s/P/\x{067E}/g; -    s/J/\x{0686}/g; -    s/V/\x{06A4}/g; -    s/G/\x{06AF}/g; - - -print STDOUT "$_ "; -  } -  print STDOUT "\n"; -}  | 
