summaryrefslogtreecommitdiff
path: root/gi
diff options
context:
space:
mode:
authorPatrick Simianer <simianer@cl.uni-heidelberg.de>2012-11-05 15:29:46 +0100
committerPatrick Simianer <simianer@cl.uni-heidelberg.de>2012-11-05 15:29:46 +0100
commit1db70a45d59946560fbd5db6487b55a8674ef973 (patch)
tree172585dafe4d1462f22d8200e733d52dddb55b1e /gi
parent4dd5216d3afa9ab72b150e250a3c30a5f223ce53 (diff)
parent6bbf03ac46bd57400aa9e65a321a304a234af935 (diff)
merge upstream/master
Diffstat (limited to 'gi')
-rw-r--r--gi/clda/src/Makefile.am6
-rw-r--r--gi/clda/src/ccrp.h291
-rw-r--r--gi/clda/src/clda.cc148
-rw-r--r--gi/clda/src/crp.h50
-rw-r--r--gi/clda/src/slice_sampler.h191
-rw-r--r--gi/clda/src/timer.h20
-rw-r--r--gi/evaluation/conditional_entropy.py61
-rw-r--r--gi/evaluation/confusion_matrix.py123
-rw-r--r--gi/evaluation/entropy.py38
-rw-r--r--gi/evaluation/extract_ccg_labels.py129
-rw-r--r--gi/evaluation/tree.py485
-rw-r--r--gi/markov_al/Makefile.am6
-rw-r--r--gi/markov_al/README2
-rw-r--r--gi/markov_al/ml.cc470
-rwxr-xr-xgi/morf-segmentation/filter_docs.pl24
-rw-r--r--gi/morf-segmentation/invalid_vocab.patterns6
-rwxr-xr-xgi/morf-segmentation/linestripper.py40
-rwxr-xr-xgi/morf-segmentation/morf-pipeline.pl486
-rwxr-xr-xgi/morf-segmentation/morfsegment.py50
-rwxr-xr-xgi/morf-segmentation/morftrain.sh110
-rwxr-xr-xgi/morf-segmentation/vocabextractor.sh40
-rw-r--r--gi/pf/Makefile.am44
-rw-r--r--gi/pf/README2
-rw-r--r--gi/pf/align-lexonly-pyp.cc243
-rw-r--r--gi/pf/align-tl.cc339
-rw-r--r--gi/pf/backward.cc89
-rw-r--r--gi/pf/backward.h33
-rw-r--r--gi/pf/base_distributions.cc241
-rw-r--r--gi/pf/base_distributions.h238
-rw-r--r--gi/pf/bayes_lattice_score.cc309
-rw-r--r--gi/pf/brat.cc543
-rw-r--r--gi/pf/cbgi.cc330
-rw-r--r--gi/pf/cfg_wfst_composer.cc731
-rw-r--r--gi/pf/cfg_wfst_composer.h46
-rw-r--r--gi/pf/conditional_pseg.h275
-rw-r--r--gi/pf/condnaive.cc298
-rw-r--r--gi/pf/corpus.cc62
-rw-r--r--gi/pf/corpus.h19
-rw-r--r--gi/pf/dpnaive.cc301
-rwxr-xr-xgi/pf/guess-translits.pl72
-rw-r--r--gi/pf/hpyp_tm.cc133
-rw-r--r--gi/pf/hpyp_tm.h38
-rw-r--r--gi/pf/itg.cc275
-rw-r--r--gi/pf/learn_cfg.cc428
-rwxr-xr-xgi/pf/make-freq-bins.pl26
-rw-r--r--gi/pf/mh_test.cc148
-rw-r--r--gi/pf/monotonic_pseg.h89
-rw-r--r--gi/pf/ngram_base.cc69
-rw-r--r--gi/pf/ngram_base.h25
-rw-r--r--gi/pf/nuisance_test.cc161
-rw-r--r--gi/pf/os_phrase.h15
-rw-r--r--gi/pf/pf.h84
-rw-r--r--gi/pf/pf_test.cc148
-rw-r--r--gi/pf/pfbrat.cc543
-rw-r--r--gi/pf/pfdist.cc598
-rw-r--r--gi/pf/pfdist.new.cc620
-rw-r--r--gi/pf/pfnaive.cc284
-rw-r--r--gi/pf/poisson_uniform_word_model.h50
-rw-r--r--gi/pf/pyp_lm.cc209
-rw-r--r--gi/pf/pyp_tm.cc128
-rw-r--r--gi/pf/pyp_tm.h36
-rw-r--r--gi/pf/pyp_word_model.h62
-rw-r--r--gi/pf/quasi_model2.h177
-rw-r--r--gi/pf/reachability.cc74
-rw-r--r--gi/pf/reachability.h34
-rw-r--r--gi/pf/tied_resampler.h122
-rw-r--r--gi/pf/tpf.cc99
-rw-r--r--gi/pf/transliterations.cc334
-rw-r--r--gi/pf/transliterations.h24
-rw-r--r--gi/pf/unigrams.cc80
-rw-r--r--gi/pf/unigrams.h69
-rw-r--r--gi/pipeline/OLD.clsp.config9
-rwxr-xr-xgi/pipeline/OLD.evaluation-pipeline.pl277
-rw-r--r--gi/pipeline/backoff-pipe.pl215
-rw-r--r--gi/pipeline/blacklight.config9
-rw-r--r--gi/pipeline/clsp.config10
-rwxr-xr-xgi/pipeline/evaluation-pipeline.pl364
-rwxr-xr-xgi/pipeline/local-gi-pipeline.pl465
-rw-r--r--gi/pipeline/lticluster.config9
-rwxr-xr-xgi/pipeline/scripts/filter-by-f.pl56
-rwxr-xr-xgi/pipeline/scripts/patch-corpus.pl65
-rwxr-xr-xgi/pipeline/scripts/refilter.pl40
-rwxr-xr-xgi/pipeline/scripts/rekey.pl8
-rwxr-xr-xgi/pipeline/scripts/remove-tags-from-contexts.pl53
-rwxr-xr-xgi/pipeline/scripts/remove-tags-from-corpus.pl44
-rwxr-xr-xgi/pipeline/scripts/sort-by-key.sh5
-rwxr-xr-xgi/pipeline/scripts/xfeats.pl39
-rw-r--r--gi/pipeline/valhalla.config9
-rw-r--r--gi/posterior-regularisation/Corpus.java167
-rw-r--r--gi/posterior-regularisation/Lexicon.java32
-rw-r--r--gi/posterior-regularisation/PhraseContextModel.java466
-rw-r--r--gi/posterior-regularisation/README3
-rw-r--r--gi/posterior-regularisation/alphabet.hh61
-rw-r--r--gi/posterior-regularisation/canned.concordance4
-rw-r--r--gi/posterior-regularisation/em.cc830
-rw-r--r--gi/posterior-regularisation/invert.hh45
-rw-r--r--gi/posterior-regularisation/linesearch.py58
-rw-r--r--gi/posterior-regularisation/log_add.hh30
l---------gi/posterior-regularisation/prjava.jar1
-rwxr-xr-xgi/posterior-regularisation/prjava/Makefile8
-rw-r--r--gi/posterior-regularisation/prjava/build.xml38
-rw-r--r--gi/posterior-regularisation/prjava/lib/commons-math-2.1.jarbin832410 -> 0 bytes
-rw-r--r--gi/posterior-regularisation/prjava/lib/jopt-simple-3.2.jarbin53244 -> 0 bytes
-rw-r--r--gi/posterior-regularisation/prjava/lib/trove-2.0.2.jarbin737844 -> 0 bytes
-rw-r--r--gi/posterior-regularisation/prjava/src/arr/F.java99
-rw-r--r--gi/posterior-regularisation/prjava/src/data/Corpus.java233
-rw-r--r--gi/posterior-regularisation/prjava/src/hmm/HMM.java579
-rw-r--r--gi/posterior-regularisation/prjava/src/hmm/HMMObjective.java351
-rw-r--r--gi/posterior-regularisation/prjava/src/hmm/POS.java120
-rw-r--r--gi/posterior-regularisation/prjava/src/io/FileUtil.java48
-rw-r--r--gi/posterior-regularisation/prjava/src/io/SerializedObjects.java83
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/examples/GeneralizedRosenbrock.java110
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/examples/x2y2.java128
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/examples/x2y2WithConstraints.java127
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/AbstractGradientBaseMethod.java120
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ConjugateGradient.java92
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/DebugHelpers.java65
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/GradientDescent.java19
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/LBFGS.java234
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Objective.java87
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Optimizer.java19
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedAbstractGradientBaseMethod.java11
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedGradientDescent.java154
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedObjective.java29
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedOptimizer.java10
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/OptimizerStats.java86
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/ProjectedOptimizerStats.java70
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimization.java102
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimizationAlongProjectionArc.java141
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/linesearch/DifferentiableLineSearchObjective.java185
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/linesearch/GenericPickFirstStep.java20
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/linesearch/InterpolationPickFirstStep.java25
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/linesearch/LineSearchMethod.java14
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/linesearch/NonNewtonInterpolationPickFirstStep.java33
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/linesearch/ProjectedDifferentiableLineSearchObjective.java137
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfRuleLineSearch.java300
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfeConditions.java45
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/projections/BoundsProjection.java104
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/projections/Projection.java72
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/projections/SimplexProjection.java127
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/stopCriteria/CompositeStopingCriteria.java33
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/stopCriteria/GradientL2Norm.java30
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedGradientL2Norm.java48
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedProjectedGradientL2Norm.java60
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedValueDifference.java54
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ProjectedGradientL2Norm.java51
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/stopCriteria/StopingCriteria.java8
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ValueDifference.java41
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/util/Interpolation.java37
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/util/Logger.java7
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/util/MathUtils.java339
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/util/MatrixOutput.java28
-rw-r--r--gi/posterior-regularisation/prjava/src/optimization/util/StaticTools.java180
-rw-r--r--gi/posterior-regularisation/prjava/src/phrase/Agree.java204
-rw-r--r--gi/posterior-regularisation/prjava/src/phrase/Agree2Sides.java197
-rw-r--r--gi/posterior-regularisation/prjava/src/phrase/C2F.java216
-rw-r--r--gi/posterior-regularisation/prjava/src/phrase/Corpus.java288
-rw-r--r--gi/posterior-regularisation/prjava/src/phrase/Lexicon.java34
-rw-r--r--gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java540
-rw-r--r--gi/posterior-regularisation/prjava/src/phrase/PhraseContextObjective.java436
-rw-r--r--gi/posterior-regularisation/prjava/src/phrase/PhraseCorpus.java193
-rw-r--r--gi/posterior-regularisation/prjava/src/phrase/PhraseObjective.java224
-rw-r--r--gi/posterior-regularisation/prjava/src/phrase/Trainer.java257
-rw-r--r--gi/posterior-regularisation/prjava/src/phrase/VB.java419
-rw-r--r--gi/posterior-regularisation/prjava/src/test/CorpusTest.java60
-rw-r--r--gi/posterior-regularisation/prjava/src/test/HMMModelStats.java105
-rw-r--r--gi/posterior-regularisation/prjava/src/test/IntDoublePair.java23
-rw-r--r--gi/posterior-regularisation/prjava/src/test/X2y2WithConstraints.java131
-rw-r--r--gi/posterior-regularisation/prjava/src/util/Array.java41
-rw-r--r--gi/posterior-regularisation/prjava/src/util/ArrayMath.java186
-rw-r--r--gi/posterior-regularisation/prjava/src/util/DifferentiableObjective.java14
-rw-r--r--gi/posterior-regularisation/prjava/src/util/DigammaFunction.java21
-rw-r--r--gi/posterior-regularisation/prjava/src/util/FileSystem.java21
-rw-r--r--gi/posterior-regularisation/prjava/src/util/InputOutput.java67
-rw-r--r--gi/posterior-regularisation/prjava/src/util/LogSummer.java86
-rw-r--r--gi/posterior-regularisation/prjava/src/util/MathUtil.java148
-rw-r--r--gi/posterior-regularisation/prjava/src/util/Matrix.java16
-rw-r--r--gi/posterior-regularisation/prjava/src/util/MemoryTracker.java47
-rw-r--r--gi/posterior-regularisation/prjava/src/util/Pair.java31
-rw-r--r--gi/posterior-regularisation/prjava/src/util/Printing.java158
-rw-r--r--gi/posterior-regularisation/prjava/src/util/Sorters.java39
-rwxr-xr-xgi/posterior-regularisation/prjava/train-PR-cluster.sh4
-rw-r--r--gi/posterior-regularisation/projected_gradient.cc87
-rw-r--r--gi/posterior-regularisation/simplex_pg.py55
-rwxr-xr-xgi/posterior-regularisation/split-languages.py23
-rw-r--r--gi/posterior-regularisation/train_pr_agree.py400
-rw-r--r--gi/posterior-regularisation/train_pr_global.py296
-rw-r--r--gi/posterior-regularisation/train_pr_parallel.py333
-rwxr-xr-xgi/pyp-topics/scripts/contexts2documents.py37
-rwxr-xr-xgi/pyp-topics/scripts/extract_contexts.py144
-rwxr-xr-xgi/pyp-topics/scripts/extract_contexts_test.py72
-rwxr-xr-xgi/pyp-topics/scripts/extract_leaves.py49
-rwxr-xr-xgi/pyp-topics/scripts/map-documents.py20
-rwxr-xr-xgi/pyp-topics/scripts/map-terms.py20
-rw-r--r--gi/pyp-topics/scripts/run.sh13
-rwxr-xr-xgi/pyp-topics/scripts/score-mkcls.py61
-rwxr-xr-xgi/pyp-topics/scripts/score-topics.py64
-rwxr-xr-xgi/pyp-topics/scripts/spans2labels.py137
-rwxr-xr-xgi/pyp-topics/scripts/tokens2classes.py27
-rwxr-xr-xgi/pyp-topics/scripts/topics.py20
-rw-r--r--gi/pyp-topics/src/Makefile.am16
-rw-r--r--gi/pyp-topics/src/Makefile.mpi26
-rw-r--r--gi/pyp-topics/src/clock_gettime_stub.c141
-rw-r--r--gi/pyp-topics/src/contexts_corpus.cc164
-rw-r--r--gi/pyp-topics/src/contexts_corpus.hh90
-rw-r--r--gi/pyp-topics/src/contexts_lexer.h22
-rw-r--r--gi/pyp-topics/src/contexts_lexer.l113
-rw-r--r--gi/pyp-topics/src/corpus.cc104
-rw-r--r--gi/pyp-topics/src/corpus.hh133
-rw-r--r--gi/pyp-topics/src/gammadist.c247
-rw-r--r--gi/pyp-topics/src/gammadist.h72
-rw-r--r--gi/pyp-topics/src/gzstream.cc165
-rw-r--r--gi/pyp-topics/src/gzstream.hh121
-rw-r--r--gi/pyp-topics/src/log_add.h30
-rw-r--r--gi/pyp-topics/src/macros.Linux18
-rw-r--r--gi/pyp-topics/src/makefile.darwin15
-rw-r--r--gi/pyp-topics/src/makefile.depend4042
-rw-r--r--gi/pyp-topics/src/mpi-corpus.hh69
-rw-r--r--gi/pyp-topics/src/mpi-pyp-topics.cc466
-rw-r--r--gi/pyp-topics/src/mpi-pyp-topics.hh106
-rw-r--r--gi/pyp-topics/src/mpi-pyp.hh447
-rw-r--r--gi/pyp-topics/src/mpi-train-contexts.cc201
-rw-r--r--gi/pyp-topics/src/mt19937ar.c194
-rw-r--r--gi/pyp-topics/src/mt19937ar.h44
-rw-r--r--gi/pyp-topics/src/pyp-topics.cc499
-rw-r--r--gi/pyp-topics/src/pyp-topics.hh98
-rw-r--r--gi/pyp-topics/src/pyp.hh566
-rw-r--r--gi/pyp-topics/src/slice-sampler.h192
-rw-r--r--gi/pyp-topics/src/timing.h37
-rw-r--r--gi/pyp-topics/src/train-contexts.cc174
-rw-r--r--gi/pyp-topics/src/train.cc135
-rw-r--r--gi/pyp-topics/src/utility.h962
-rw-r--r--gi/pyp-topics/src/workers.hh275
-rwxr-xr-xgi/scripts/buck2utf8.pl87
234 files changed, 0 insertions, 36823 deletions
diff --git a/gi/clda/src/Makefile.am b/gi/clda/src/Makefile.am
deleted file mode 100644
index cdca1f97..00000000
--- a/gi/clda/src/Makefile.am
+++ /dev/null
@@ -1,6 +0,0 @@
-bin_PROGRAMS = clda
-
-clda_SOURCES = clda.cc
-
-AM_CPPFLAGS = -W -Wall -Wno-sign-compare -funroll-loops -I$(top_srcdir)/utils $(GTEST_CPPFLAGS)
-AM_LDFLAGS = $(top_srcdir)/utils/libutils.a -lz
diff --git a/gi/clda/src/ccrp.h b/gi/clda/src/ccrp.h
deleted file mode 100644
index a7c2825c..00000000
--- a/gi/clda/src/ccrp.h
+++ /dev/null
@@ -1,291 +0,0 @@
-#ifndef _CCRP_H_
-#define _CCRP_H_
-
-#include <numeric>
-#include <cassert>
-#include <cmath>
-#include <list>
-#include <iostream>
-#include <vector>
-#include <tr1/unordered_map>
-#include <boost/functional/hash.hpp>
-#include "sampler.h"
-#include "slice_sampler.h"
-
-// Chinese restaurant process (Pitman-Yor parameters) with table tracking.
-
-template <typename Dish, typename DishHash = boost::hash<Dish> >
-class CCRP {
- public:
- CCRP(double disc, double conc) :
- num_tables_(),
- num_customers_(),
- discount_(disc),
- concentration_(conc),
- discount_prior_alpha_(std::numeric_limits<double>::quiet_NaN()),
- discount_prior_beta_(std::numeric_limits<double>::quiet_NaN()),
- concentration_prior_shape_(std::numeric_limits<double>::quiet_NaN()),
- concentration_prior_rate_(std::numeric_limits<double>::quiet_NaN()) {}
-
- CCRP(double d_alpha, double d_beta, double c_shape, double c_rate, double d = 0.1, double c = 10.0) :
- num_tables_(),
- num_customers_(),
- discount_(d),
- concentration_(c),
- discount_prior_alpha_(d_alpha),
- discount_prior_beta_(d_beta),
- concentration_prior_shape_(c_shape),
- concentration_prior_rate_(c_rate) {}
-
- double discount() const { return discount_; }
- double concentration() const { return concentration_; }
-
- bool has_discount_prior() const {
- return !std::isnan(discount_prior_alpha_);
- }
-
- bool has_concentration_prior() const {
- return !std::isnan(concentration_prior_shape_);
- }
-
- void clear() {
- num_tables_ = 0;
- num_customers_ = 0;
- dish_locs_.clear();
- }
-
- unsigned num_tables(const Dish& dish) const {
- const typename std::tr1::unordered_map<Dish, DishLocations, DishHash>::const_iterator it = dish_locs_.find(dish);
- if (it == dish_locs_.end()) return 0;
- return it->second.table_counts_.size();
- }
-
- unsigned num_customers() const {
- return num_customers_;
- }
-
- unsigned num_customers(const Dish& dish) const {
- const typename std::tr1::unordered_map<Dish, DishLocations, DishHash>::const_iterator it = dish_locs_.find(dish);
- if (it == dish_locs_.end()) return 0;
- return it->total_dish_count_;
- }
-
- // returns +1 or 0 indicating whether a new table was opened
- int increment(const Dish& dish, const double& p0, MT19937* rng) {
- DishLocations& loc = dish_locs_[dish];
- bool share_table = false;
- if (loc.total_dish_count_) {
- const double p_empty = (concentration_ + num_tables_ * discount_) * p0;
- const double p_share = (loc.total_dish_count_ - loc.table_counts_.size() * discount_);
- share_table = rng->SelectSample(p_empty, p_share);
- }
- if (share_table) {
- double r = rng->next() * (loc.total_dish_count_ - loc.table_counts_.size() * discount_);
- for (typename std::list<unsigned>::iterator ti = loc.table_counts_.begin();
- ti != loc.table_counts_.end(); ++ti) {
- r -= (*ti - discount_);
- if (r <= 0.0) {
- ++(*ti);
- break;
- }
- }
- if (r > 0.0) {
- std::cerr << "Serious error: r=" << r << std::endl;
- Print(&std::cerr);
- assert(r <= 0.0);
- }
- } else {
- loc.table_counts_.push_back(1u);
- ++num_tables_;
- }
- ++loc.total_dish_count_;
- ++num_customers_;
- return (share_table ? 0 : 1);
- }
-
- // returns -1 or 0, indicating whether a table was closed
- int decrement(const Dish& dish, MT19937* rng) {
- DishLocations& loc = dish_locs_[dish];
- assert(loc.total_dish_count_);
- if (loc.total_dish_count_ == 1) {
- dish_locs_.erase(dish);
- --num_tables_;
- --num_customers_;
- return -1;
- } else {
- int delta = 0;
- // sample customer to remove UNIFORMLY. that is, do NOT use the discount
- // here. if you do, it will introduce (unwanted) bias!
- double r = rng->next() * loc.total_dish_count_;
- --loc.total_dish_count_;
- for (typename std::list<unsigned>::iterator ti = loc.table_counts_.begin();
- ti != loc.table_counts_.end(); ++ti) {
- r -= *ti;
- if (r <= 0.0) {
- if ((--(*ti)) == 0) {
- --num_tables_;
- delta = -1;
- loc.table_counts_.erase(ti);
- }
- break;
- }
- }
- if (r > 0.0) {
- std::cerr << "Serious error: r=" << r << std::endl;
- Print(&std::cerr);
- assert(r <= 0.0);
- }
- --num_customers_;
- return delta;
- }
- }
-
- double prob(const Dish& dish, const double& p0) const {
- const typename std::tr1::unordered_map<Dish, DishLocations, DishHash>::const_iterator it = dish_locs_.find(dish);
- const double r = num_tables_ * discount_ + concentration_;
- if (it == dish_locs_.end()) {
- return r * p0 / (num_customers_ + concentration_);
- } else {
- return (it->second.total_dish_count_ - discount_ * it->second.table_counts_.size() + r * p0) /
- (num_customers_ + concentration_);
- }
- }
-
- double log_crp_prob() const {
- return log_crp_prob(discount_, concentration_);
- }
-
- static double log_beta_density(const double& x, const double& alpha, const double& beta) {
- assert(x > 0.0);
- assert(x < 1.0);
- assert(alpha > 0.0);
- assert(beta > 0.0);
- const double lp = (alpha-1)*log(x)+(beta-1)*log(1-x)+lgamma(alpha+beta)-lgamma(alpha)-lgamma(beta);
- return lp;
- }
-
- static double log_gamma_density(const double& x, const double& shape, const double& rate) {
- assert(x >= 0.0);
- assert(shape > 0.0);
- assert(rate > 0.0);
- const double lp = (shape-1)*log(x) - shape*log(rate) - x/rate - lgamma(shape);
- return lp;
- }
-
- // taken from http://en.wikipedia.org/wiki/Chinese_restaurant_process
- // does not include P_0's
- double log_crp_prob(const double& discount, const double& concentration) const {
- double lp = 0.0;
- if (has_discount_prior())
- lp = log_beta_density(discount, discount_prior_alpha_, discount_prior_beta_);
- if (has_concentration_prior())
- lp += log_gamma_density(concentration, concentration_prior_shape_, concentration_prior_rate_);
- assert(lp <= 0.0);
- if (num_customers_) {
- if (discount > 0.0) {
- const double r = lgamma(1.0 - discount);
- lp += lgamma(concentration) - lgamma(concentration + num_customers_)
- + num_tables_ * log(discount) + lgamma(concentration / discount + num_tables_)
- - lgamma(concentration / discount);
- assert(std::isfinite(lp));
- for (typename std::tr1::unordered_map<Dish, DishLocations, DishHash>::const_iterator it = dish_locs_.begin();
- it != dish_locs_.end(); ++it) {
- const DishLocations& cur = it->second;
- for (std::list<unsigned>::const_iterator ti = cur.table_counts_.begin(); ti != cur.table_counts_.end(); ++ti) {
- lp += lgamma(*ti - discount) - r;
- }
- }
- } else {
- assert(!"not implemented yet");
- }
- }
- assert(std::isfinite(lp));
- return lp;
- }
-
- void resample_hyperparameters(MT19937* rng) {
- assert(has_discount_prior() || has_concentration_prior());
- DiscountResampler dr(*this);
- ConcentrationResampler cr(*this);
- const int niterations = 10;
- double gamma_upper = std::numeric_limits<double>::infinity();
- for (int iter = 0; iter < 5; ++iter) {
- if (has_concentration_prior()) {
- concentration_ = slice_sampler1d(cr, concentration_, *rng, 0.0,
- gamma_upper, 0.0, niterations, 100*niterations);
- }
- if (has_discount_prior()) {
- discount_ = slice_sampler1d(dr, discount_, *rng, std::numeric_limits<double>::min(),
- 1.0, 0.0, niterations, 100*niterations);
- }
- }
- concentration_ = slice_sampler1d(cr, concentration_, *rng, 0.0,
- gamma_upper, 0.0, niterations, 100*niterations);
- }
-
- struct DiscountResampler {
- DiscountResampler(const CCRP& crp) : crp_(crp) {}
- const CCRP& crp_;
- double operator()(const double& proposed_discount) const {
- return crp_.log_crp_prob(proposed_discount, crp_.concentration_);
- }
- };
-
- struct ConcentrationResampler {
- ConcentrationResampler(const CCRP& crp) : crp_(crp) {}
- const CCRP& crp_;
- double operator()(const double& proposed_concentration) const {
- return crp_.log_crp_prob(crp_.discount_, proposed_concentration);
- }
- };
-
- struct DishLocations {
- DishLocations() : total_dish_count_() {}
- unsigned total_dish_count_; // customers at all tables with this dish
- std::list<unsigned> table_counts_; // list<> gives O(1) deletion and insertion, which we want
- // .size() is the number of tables for this dish
- };
-
- void Print(std::ostream* out) const {
- for (typename std::tr1::unordered_map<Dish, DishLocations, DishHash>::const_iterator it = dish_locs_.begin();
- it != dish_locs_.end(); ++it) {
- (*out) << it->first << " (" << it->second.total_dish_count_ << " on " << it->second.table_counts_.size() << " tables): ";
- for (typename std::list<unsigned>::const_iterator i = it->second.table_counts_.begin();
- i != it->second.table_counts_.end(); ++i) {
- (*out) << " " << *i;
- }
- (*out) << std::endl;
- }
- }
-
- typedef typename std::tr1::unordered_map<Dish, DishLocations, DishHash>::const_iterator const_iterator;
- const_iterator begin() const {
- return dish_locs_.begin();
- }
- const_iterator end() const {
- return dish_locs_.end();
- }
-
- unsigned num_tables_;
- unsigned num_customers_;
- std::tr1::unordered_map<Dish, DishLocations, DishHash> dish_locs_;
-
- double discount_;
- double concentration_;
-
- // optional beta prior on discount_ (NaN if no prior)
- double discount_prior_alpha_;
- double discount_prior_beta_;
-
- // optional gamma prior on concentration_ (NaN if no prior)
- double concentration_prior_shape_;
- double concentration_prior_rate_;
-};
-
-template <typename T,typename H>
-std::ostream& operator<<(std::ostream& o, const CCRP<T,H>& c) {
- c.Print(&o);
- return o;
-}
-
-#endif
diff --git a/gi/clda/src/clda.cc b/gi/clda/src/clda.cc
deleted file mode 100644
index f548997f..00000000
--- a/gi/clda/src/clda.cc
+++ /dev/null
@@ -1,148 +0,0 @@
-#include <iostream>
-#include <vector>
-#include <map>
-#include <string>
-
-#include "timer.h"
-#include "crp.h"
-#include "ccrp.h"
-#include "sampler.h"
-#include "tdict.h"
-const size_t MAX_DOC_LEN_CHARS = 10000000;
-
-using namespace std;
-
-void ShowTopWordsForTopic(const map<WordID, int>& counts) {
- multimap<int, WordID> ms;
- for (map<WordID,int>::const_iterator it = counts.begin(); it != counts.end(); ++it)
- ms.insert(make_pair(it->second, it->first));
- int cc = 0;
- for (multimap<int, WordID>::reverse_iterator it = ms.rbegin(); it != ms.rend(); ++it) {
- cerr << it->first << ':' << TD::Convert(it->second) << " ";
- ++cc;
- if (cc==20) break;
- }
- cerr << endl;
-}
-
-int main(int argc, char** argv) {
- if (argc != 3) {
- cerr << "Usage: " << argv[0] << " num-classes num-samples\n";
- return 1;
- }
- const int num_classes = atoi(argv[1]);
- const int num_iterations = atoi(argv[2]);
- const int burnin_size = num_iterations * 0.9;
- if (num_classes < 2) {
- cerr << "Must request more than 1 class\n";
- return 1;
- }
- if (num_iterations < 5) {
- cerr << "Must request more than 5 iterations\n";
- return 1;
- }
- cerr << "CLASSES: " << num_classes << endl;
- char* buf = new char[MAX_DOC_LEN_CHARS];
- vector<vector<int> > wji; // w[j][i] - observed word i of doc j
- vector<vector<int> > zji; // z[j][i] - topic assignment for word i of doc j
- cerr << "READING DOCUMENTS\n";
- while(cin) {
- cin.getline(buf, MAX_DOC_LEN_CHARS);
- if (buf[0] == 0) continue;
- wji.push_back(vector<WordID>());
- TD::ConvertSentence(buf, &wji.back());
- }
- cerr << "READ " << wji.size() << " DOCUMENTS\n";
- MT19937 rng;
- cerr << "INITIALIZING RANDOM TOPIC ASSIGNMENTS\n";
- zji.resize(wji.size());
- double disc = 0.1;
- double beta = 10.0;
- double alpha = 50.0;
- const double uniform_topic = 1.0 / num_classes;
- const double uniform_word = 1.0 / TD::NumWords();
- vector<CCRP<int> > dr(zji.size(), CCRP<int>(1,1,1,1,disc, beta)); // dr[i] describes the probability of using a topic in document i
- vector<CCRP<int> > wr(num_classes, CCRP<int>(1,1,1,1,disc, alpha)); // wr[k] describes the probability of generating a word in topic k
- for (int j = 0; j < zji.size(); ++j) {
- const size_t num_words = wji[j].size();
- vector<int>& zj = zji[j];
- const vector<int>& wj = wji[j];
- zj.resize(num_words);
- for (int i = 0; i < num_words; ++i) {
- int random_topic = rng.next() * num_classes;
- if (random_topic == num_classes) { --random_topic; }
- zj[i] = random_topic;
- const int word = wj[i];
- dr[j].increment(random_topic, uniform_topic, &rng);
- wr[random_topic].increment(word, uniform_word, &rng);
- }
- }
- cerr << "SAMPLING\n";
- vector<map<WordID, int> > t2w(num_classes);
- Timer timer;
- SampleSet<double> ss;
- ss.resize(num_classes);
- double total_time = 0;
- for (int iter = 0; iter < num_iterations; ++iter) {
- cerr << '.';
- if (iter && iter % 10 == 0) {
- total_time += timer.Elapsed();
- timer.Reset();
- double llh = 0;
-#if 1
- for (int j = 0; j < dr.size(); ++j)
- dr[j].resample_hyperparameters(&rng);
- for (int j = 0; j < wr.size(); ++j)
- wr[j].resample_hyperparameters(&rng);
-#endif
-
- for (int j = 0; j < dr.size(); ++j)
- llh += dr[j].log_crp_prob();
- for (int j = 0; j < wr.size(); ++j)
- llh += wr[j].log_crp_prob();
- cerr << " [LLH=" << llh << " I=" << iter << "]\n";
- }
- for (int j = 0; j < zji.size(); ++j) {
- const size_t num_words = wji[j].size();
- vector<int>& zj = zji[j];
- const vector<int>& wj = wji[j];
- for (int i = 0; i < num_words; ++i) {
- const int word = wj[i];
- const int cur_topic = zj[i];
- dr[j].decrement(cur_topic, &rng);
- wr[cur_topic].decrement(word, &rng);
-
- for (int k = 0; k < num_classes; ++k) {
- ss[k]= dr[j].prob(k, uniform_topic) * wr[k].prob(word, uniform_word);
- }
- const int new_topic = rng.SelectSample(ss);
- dr[j].increment(new_topic, uniform_topic, &rng);
- wr[new_topic].increment(word, uniform_word, &rng);
- zj[i] = new_topic;
- if (iter > burnin_size) {
- ++t2w[cur_topic][word];
- }
- }
- }
- }
- for (int i = 0; i < num_classes; ++i) {
- cerr << "---------------------------------\n";
- cerr << " final PYP(" << wr[i].discount() << "," << wr[i].concentration() << ")\n";
- ShowTopWordsForTopic(t2w[i]);
- }
- cerr << "-------------\n";
-#if 0
- for (int j = 0; j < zji.size(); ++j) {
- const size_t num_words = wji[j].size();
- vector<int>& zj = zji[j];
- const vector<int>& wj = wji[j];
- zj.resize(num_words);
- for (int i = 0; i < num_words; ++i) {
- cerr << TD::Convert(wji[j][i]) << '(' << zj[i] << ") ";
- }
- cerr << endl;
- }
-#endif
- return 0;
-}
-
diff --git a/gi/clda/src/crp.h b/gi/clda/src/crp.h
deleted file mode 100644
index 9d35857e..00000000
--- a/gi/clda/src/crp.h
+++ /dev/null
@@ -1,50 +0,0 @@
-#ifndef _CRP_H_
-#define _CRP_H_
-
-// shamelessly adapted from code by Phil Blunsom and Trevor Cohn
-
-#include <boost/functional/hash.hpp>
-#include <tr1/unordered_map>
-
-#include "prob.h"
-
-template <typename DishType, typename Hash = boost::hash<DishType> >
-class CRP {
- public:
- CRP(double alpha) : alpha_(alpha), palpha_(alpha), total_customers_() {}
- void increment(const DishType& dish);
- void decrement(const DishType& dish);
- void erase(const DishType& dish) {
- counts_.erase(dish);
- }
- inline int count(const DishType& dish) const {
- const typename MapType::const_iterator i = counts_.find(dish);
- if (i == counts_.end()) return 0; else return i->second;
- }
- inline prob_t prob(const DishType& dish, const prob_t& p0) const {
- return (prob_t(count(dish)) + palpha_ * p0) / prob_t(total_customers_ + alpha_);
- }
- private:
- typedef std::tr1::unordered_map<DishType, int, Hash> MapType;
- MapType counts_;
- const double alpha_;
- const prob_t palpha_;
- int total_customers_;
-};
-
-template <typename Dish, typename Hash>
-void CRP<Dish,Hash>::increment(const Dish& dish) {
- ++counts_[dish];
- ++total_customers_;
-}
-
-template <typename Dish, typename Hash>
-void CRP<Dish,Hash>::decrement(const Dish& dish) {
- typename MapType::iterator i = counts_.find(dish);
- assert(i != counts_.end());
- if (--i->second == 0)
- counts_.erase(i);
- --total_customers_;
-}
-
-#endif
diff --git a/gi/clda/src/slice_sampler.h b/gi/clda/src/slice_sampler.h
deleted file mode 100644
index aa48a169..00000000
--- a/gi/clda/src/slice_sampler.h
+++ /dev/null
@@ -1,191 +0,0 @@
-//! slice-sampler.h is an MCMC slice sampler
-//!
-//! Mark Johnson, 1st August 2008
-
-#ifndef SLICE_SAMPLER_H
-#define SLICE_SAMPLER_H
-
-#include <algorithm>
-#include <cassert>
-#include <cmath>
-#include <iostream>
-#include <limits>
-
-//! slice_sampler_rfc_type{} returns the value of a user-specified
-//! function if the argument is within range, or - infinity otherwise
-//
-template <typename F, typename Fn, typename U>
-struct slice_sampler_rfc_type {
- F min_x, max_x;
- const Fn& f;
- U max_nfeval, nfeval;
- slice_sampler_rfc_type(F min_x, F max_x, const Fn& f, U max_nfeval)
- : min_x(min_x), max_x(max_x), f(f), max_nfeval(max_nfeval), nfeval(0) { }
-
- F operator() (F x) {
- if (min_x < x && x < max_x) {
- assert(++nfeval <= max_nfeval);
- F fx = f(x);
- assert(std::isfinite(fx));
- return fx;
- }
- return -std::numeric_limits<F>::infinity();
- }
-}; // slice_sampler_rfc_type{}
-
-//! slice_sampler1d() implements the univariate "range doubling" slice sampler
-//! described in Neal (2003) "Slice Sampling", The Annals of Statistics 31(3), 705-767.
-//
-template <typename F, typename LogF, typename Uniform01>
-F slice_sampler1d(const LogF& logF0, //!< log of function to sample
- F x, //!< starting point
- Uniform01& u01, //!< uniform [0,1) random number generator
- F min_x = -std::numeric_limits<F>::infinity(), //!< minimum value of support
- F max_x = std::numeric_limits<F>::infinity(), //!< maximum value of support
- F w = 0.0, //!< guess at initial width
- unsigned nsamples=1, //!< number of samples to draw
- unsigned max_nfeval=200) //!< max number of function evaluations
-{
- typedef unsigned U;
- slice_sampler_rfc_type<F,LogF,U> logF(min_x, max_x, logF0, max_nfeval);
-
- assert(std::isfinite(x));
-
- if (w <= 0.0) { // set w to a default width
- if (min_x > -std::numeric_limits<F>::infinity() && max_x < std::numeric_limits<F>::infinity())
- w = (max_x - min_x)/4;
- else
- w = std::max(((x < 0.0) ? -x : x)/4, (F) 0.1);
- }
- assert(std::isfinite(w));
-
- F logFx = logF(x);
- for (U sample = 0; sample < nsamples; ++sample) {
- F logY = logFx + log(u01()+1e-100); //! slice logFx at this value
- assert(std::isfinite(logY));
-
- F xl = x - w*u01(); //! lower bound on slice interval
- F logFxl = logF(xl);
- F xr = xl + w; //! upper bound on slice interval
- F logFxr = logF(xr);
-
- while (logY < logFxl || logY < logFxr) // doubling procedure
- if (u01() < 0.5)
- logFxl = logF(xl -= xr - xl);
- else
- logFxr = logF(xr += xr - xl);
-
- F xl1 = xl;
- F xr1 = xr;
- while (true) { // shrinking procedure
- F x1 = xl1 + u01()*(xr1 - xl1);
- if (logY < logF(x1)) {
- F xl2 = xl; // acceptance procedure
- F xr2 = xr;
- bool d = false;
- while (xr2 - xl2 > 1.1*w) {
- F xm = (xl2 + xr2)/2;
- if ((x < xm && x1 >= xm) || (x >= xm && x1 < xm))
- d = true;
- if (x1 < xm)
- xr2 = xm;
- else
- xl2 = xm;
- if (d && logY >= logF(xl2) && logY >= logF(xr2))
- goto unacceptable;
- }
- x = x1;
- goto acceptable;
- }
- goto acceptable;
- unacceptable:
- if (x1 < x) // rest of shrinking procedure
- xl1 = x1;
- else
- xr1 = x1;
- }
- acceptable:
- w = (4*w + (xr1 - xl1))/5; // update width estimate
- }
- return x;
-}
-
-/*
-//! slice_sampler1d() implements a 1-d MCMC slice sampler.
-//! It should be correct for unimodal distributions, but
-//! not for multimodal ones.
-//
-template <typename F, typename LogP, typename Uniform01>
-F slice_sampler1d(const LogP& logP, //!< log of distribution to sample
- F x, //!< initial sample
- Uniform01& u01, //!< uniform random number generator
- F min_x = -std::numeric_limits<F>::infinity(), //!< minimum value of support
- F max_x = std::numeric_limits<F>::infinity(), //!< maximum value of support
- F w = 0.0, //!< guess at initial width
- unsigned nsamples=1, //!< number of samples to draw
- unsigned max_nfeval=200) //!< max number of function evaluations
-{
- typedef unsigned U;
- assert(std::isfinite(x));
- if (w <= 0.0) {
- if (min_x > -std::numeric_limits<F>::infinity() && max_x < std::numeric_limits<F>::infinity())
- w = (max_x - min_x)/4;
- else
- w = std::max(((x < 0.0) ? -x : x)/4, 0.1);
- }
- // TRACE4(x, min_x, max_x, w);
- F logPx = logP(x);
- assert(std::isfinite(logPx));
- U nfeval = 1;
- for (U sample = 0; sample < nsamples; ++sample) {
- F x0 = x;
- F logU = logPx + log(u01()+1e-100);
- assert(std::isfinite(logU));
- F r = u01();
- F xl = std::max(min_x, x - r*w);
- F xr = std::min(max_x, x + (1-r)*w);
- // TRACE3(x, logPx, logU);
- while (xl > min_x && logP(xl) > logU) {
- xl -= w;
- w *= 2;
- ++nfeval;
- if (nfeval >= max_nfeval)
- std::cerr << "## Error: nfeval = " << nfeval << ", max_nfeval = " << max_nfeval << ", sample = " << sample << ", nsamples = " << nsamples << ", r = " << r << ", w = " << w << ", xl = " << xl << std::endl;
- assert(nfeval < max_nfeval);
- }
- xl = std::max(xl, min_x);
- while (xr < max_x && logP(xr) > logU) {
- xr += w;
- w *= 2;
- ++nfeval;
- if (nfeval >= max_nfeval)
- std::cerr << "## Error: nfeval = " << nfeval << ", max_nfeval = " << max_nfeval << ", sample = " << sample << ", nsamples = " << nsamples << ", r = " << r << ", w = " << w << ", xr = " << xr << std::endl;
- assert(nfeval < max_nfeval);
- }
- xr = std::min(xr, max_x);
- while (true) {
- r = u01();
- x = r*xl + (1-r)*xr;
- assert(std::isfinite(x));
- logPx = logP(x);
- // TRACE4(logPx, x, xl, xr);
- assert(std::isfinite(logPx));
- ++nfeval;
- if (nfeval >= max_nfeval)
- std::cerr << "## Error: nfeval = " << nfeval << ", max_nfeval = " << max_nfeval << ", sample = " << sample << ", nsamples = " << nsamples << ", r = " << r << ", w = " << w << ", xl = " << xl << ", xr = " << xr << ", x = " << x << std::endl;
- assert(nfeval < max_nfeval);
- if (logPx > logU)
- break;
- else if (x > x0)
- xr = x;
- else
- xl = x;
- }
- // w = (4*w + (xr-xl))/5; // gradually adjust w
- }
- // TRACE2(logPx, x);
- return x;
-} // slice_sampler1d()
-*/
-
-#endif // SLICE_SAMPLER_H
diff --git a/gi/clda/src/timer.h b/gi/clda/src/timer.h
deleted file mode 100644
index 123d9a94..00000000
--- a/gi/clda/src/timer.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef _TIMER_STATS_H_
-#define _TIMER_STATS_H_
-
-#include <ctime>
-
-struct Timer {
- Timer() { Reset(); }
- void Reset() {
- start_t = clock();
- }
- double Elapsed() const {
- const clock_t end_t = clock();
- const double elapsed = (end_t - start_t) / 1000000.0;
- return elapsed;
- }
- private:
- std::clock_t start_t;
-};
-
-#endif
diff --git a/gi/evaluation/conditional_entropy.py b/gi/evaluation/conditional_entropy.py
deleted file mode 100644
index 356d3b1d..00000000
--- a/gi/evaluation/conditional_entropy.py
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/usr/bin/env python
-
-import sys, math, itertools, getopt
-
-def usage():
- print >>sys.stderr, 'Usage:', sys.argv[0], '[-s slash_threshold] input-1 input-2'
- sys.exit(0)
-
-optlist, args = getopt.getopt(sys.argv[1:], 'hs:')
-slash_threshold = None
-for opt, arg in optlist:
- if opt == '-s':
- slash_threshold = int(arg)
- else:
- usage()
-if len(args) != 2:
- usage()
-
-ginfile = open(args[0])
-pinfile = open(args[1])
-
-# evaluating: H(G | P) = sum_{g,p} p(g,p) log { p(p) / p(g,p) }
-# = sum_{g,p} c(g,p)/N { log c(p) - log N - log c(g,p) + log N }
-# = 1/N sum_{g,p} c(g,p) { log c(p) - log c(g,p) }
-# where G = gold, P = predicted, N = number of events
-
-N = 0
-gold_frequencies = {}
-predict_frequencies = {}
-joint_frequencies = {}
-
-for gline, pline in itertools.izip(ginfile, pinfile):
- gparts = gline.split('||| ')[1].split()
- pparts = pline.split('||| ')[1].split()
- assert len(gparts) == len(pparts)
-
- for gpart, ppart in zip(gparts, pparts):
- gtag = gpart.split(':',1)[1]
- ptag = ppart.split(':',1)[1]
-
- if slash_threshold == None or gtag.count('/') + gtag.count('\\') <= slash_threshold:
- joint_frequencies.setdefault((gtag, ptag), 0)
- joint_frequencies[gtag,ptag] += 1
-
- predict_frequencies.setdefault(ptag, 0)
- predict_frequencies[ptag] += 1
-
- gold_frequencies.setdefault(gtag, 0)
- gold_frequencies[gtag] += 1
-
- N += 1
-
-hg2p = 0
-hp2g = 0
-for (gtag, ptag), cgp in joint_frequencies.items():
- hp2g += cgp * (math.log(predict_frequencies[ptag], 2) - math.log(cgp, 2))
- hg2p += cgp * (math.log(gold_frequencies[gtag], 2) - math.log(cgp, 2))
-hg2p /= N
-hp2g /= N
-
-print 'H(P|G)', hg2p, 'H(G|P)', hp2g, 'VI', hg2p + hp2g
diff --git a/gi/evaluation/confusion_matrix.py b/gi/evaluation/confusion_matrix.py
deleted file mode 100644
index 2dd7aa47..00000000
--- a/gi/evaluation/confusion_matrix.py
+++ /dev/null
@@ -1,123 +0,0 @@
-#!/usr/bin/env python
-
-import sys, math, itertools, getopt
-
-def usage():
- print >>sys.stderr, 'Usage:', sys.argv[0], '[-s slash_threshold] [-p output] [-m] input-1 input-2'
- sys.exit(0)
-
-optlist, args = getopt.getopt(sys.argv[1:], 'hs:mp:')
-slash_threshold = None
-output_fname = None
-show_matrix = False
-for opt, arg in optlist:
- if opt == '-s':
- slash_threshold = int(arg)
- elif opt == '-p':
- output_fname = arg
- elif opt == '-m':
- show_matrix = True
- else:
- usage()
-if len(args) != 2 or (not show_matrix and not output_fname):
- usage()
-
-ginfile = open(args[0])
-pinfile = open(args[1])
-
-if output_fname:
- try:
- import Image, ImageDraw
- except ImportError:
- print >>sys.stderr, "Error: Python Image Library not available. Did you forget to set your PYTHONPATH environment variable?"
- sys.exit(1)
-
-N = 0
-gold_frequencies = {}
-predict_frequencies = {}
-joint_frequencies = {}
-
-for gline, pline in itertools.izip(ginfile, pinfile):
- gparts = gline.split('||| ')[1].split()
- pparts = pline.split('||| ')[1].split()
- assert len(gparts) == len(pparts)
-
- for gpart, ppart in zip(gparts, pparts):
- gtag = gpart.split(':',1)[1]
- ptag = ppart.split(':',1)[1]
-
- if slash_threshold == None or gtag.count('/') + gtag.count('\\') <= slash_threshold:
- joint_frequencies.setdefault((gtag, ptag), 0)
- joint_frequencies[gtag,ptag] += 1
-
- predict_frequencies.setdefault(ptag, 0)
- predict_frequencies[ptag] += 1
-
- gold_frequencies.setdefault(gtag, 0)
- gold_frequencies[gtag] += 1
-
- N += 1
-
-# find top tags
-gtags = gold_frequencies.items()
-gtags.sort(lambda x,y: x[1]-y[1])
-gtags.reverse()
-#gtags = gtags[:50]
-
-preds = predict_frequencies.items()
-preds.sort(lambda x,y: x[1]-y[1])
-preds.reverse()
-
-if show_matrix:
- print '%7s %7s' % ('pred', 'cnt'),
- for gtag, gcount in gtags: print '%7s' % gtag,
- print
- print '=' * 80
-
- for ptag, pcount in preds:
- print '%7s %7d' % (ptag, pcount),
- for gtag, gcount in gtags:
- print '%7d' % joint_frequencies.get((gtag, ptag), 0),
- print
-
- print '%7s %7d' % ('total', N),
- for gtag, gcount in gtags: print '%7d' % gcount,
- print
-
-if output_fname:
- offset=10
-
- image = Image.new("RGB", (len(preds), len(gtags)), (255, 255, 255))
- #hsl(hue, saturation%, lightness%)
-
- # re-sort preds to get a better diagonal
- ptags=[]
- if True:
- ptags = map(lambda (p,c): p, preds)
- else:
- remaining = set(predict_frequencies.keys())
- for y, (gtag, gcount) in enumerate(gtags):
- best = (None, 0)
- for ptag in remaining:
- #pcount = predict_frequencies[ptag]
- p = joint_frequencies.get((gtag, ptag), 0)# / float(pcount)
- if p > best[1]: best = (ptag, p)
- ptags.append(ptag)
- remaining.remove(ptag)
- if not remaining: break
-
- print 'Predicted tag ordering:', ' '.join(ptags)
- print 'Gold tag ordering:', ' '.join(map(lambda (t,c): t, gtags))
-
- draw = ImageDraw.Draw(image)
- for x, ptag in enumerate(ptags):
- pcount = predict_frequencies[ptag]
- minval = math.log(offset)
- maxval = math.log(pcount + offset)
- for y, (gtag, gcount) in enumerate(gtags):
- f = math.log(offset + joint_frequencies.get((gtag, ptag), 0))
- z = int(240. * (maxval - f) / float(maxval - minval))
- #print x, y, z, f, maxval
- draw.point([(x,y)], fill='hsl(%d, 100%%, 50%%)' % z)
- del draw
- image.save(output_fname)
diff --git a/gi/evaluation/entropy.py b/gi/evaluation/entropy.py
deleted file mode 100644
index ec1ef502..00000000
--- a/gi/evaluation/entropy.py
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/usr/bin/env python
-
-import sys, math, itertools, getopt
-
-def usage():
- print >>sys.stderr, 'Usage:', sys.argv[0], '[-s slash_threshold] input file'
- sys.exit(0)
-
-optlist, args = getopt.getopt(sys.argv[1:], 'hs:')
-slash_threshold = None
-for opt, arg in optlist:
- if opt == '-s':
- slash_threshold = int(arg)
- else:
- usage()
-if len(args) != 1:
- usage()
-
-infile = open(args[0])
-N = 0
-frequencies = {}
-
-for line in infile:
-
- for part in line.split('||| ')[1].split():
- tag = part.split(':',1)[1]
-
- if slash_threshold == None or tag.count('/') + tag.count('\\') <= slash_threshold:
- frequencies.setdefault(tag, 0)
- frequencies[tag] += 1
- N += 1
-
-h = 0
-for tag, c in frequencies.items():
- h -= c * (math.log(c, 2) - math.log(N, 2))
-h /= N
-
-print 'entropy', h
diff --git a/gi/evaluation/extract_ccg_labels.py b/gi/evaluation/extract_ccg_labels.py
deleted file mode 100644
index e0034648..00000000
--- a/gi/evaluation/extract_ccg_labels.py
+++ /dev/null
@@ -1,129 +0,0 @@
-#!/usr/bin/env python
-
-#
-# Takes spans input along with treebank and spits out CG style categories for each span.
-# spans = output from CDEC's extools/extractor with --base_phrase_spans option
-# treebank = PTB format, one tree per line
-#
-# Output is in CDEC labelled-span format
-#
-
-import sys, itertools, tree
-
-tinfile = open(sys.argv[1])
-einfile = open(sys.argv[2])
-
-def number_leaves(node, next=0):
- left, right = None, None
- for child in node.children:
- l, r = number_leaves(child, next)
- next = max(next, r+1)
- if left == None or l < left:
- left = l
- if right == None or r > right:
- right = r
-
- #print node, left, right, next
- if left == None or right == None:
- assert not node.children
- left = right = next
-
- node.left = left
- node.right = right
-
- return left, right
-
-def ancestor(node, indices):
- #print node, node.left, node.right, indices
- # returns the deepest node covering all the indices
- if min(indices) >= node.left and max(indices) <= node.right:
- # try the children
- for child in node.children:
- x = ancestor(child, indices)
- if x: return x
- return node
- else:
- return None
-
-def frontier(node, indices):
- #print 'frontier for node', node, 'indices', indices
- if node.left > max(indices) or node.right < min(indices):
- #print '\toutside'
- return [node]
- elif node.children:
- #print '\tcovering at least part'
- ns = []
- for child in node.children:
- n = frontier(child, indices)
- ns.extend(n)
- return ns
- else:
- return [node]
-
-def project_heads(node):
- #print 'project_heads', node
- is_head = node.data.tag.endswith('-HEAD')
- if node.children:
- found = 0
- for child in node.children:
- x = project_heads(child)
- if x:
- node.data.tag = x
- found += 1
- assert found == 1
- elif is_head:
- node.data.tag = node.data.tag[:-len('-HEAD')]
-
- if is_head:
- return node.data.tag
- else:
- return None
-
-for tline, eline in itertools.izip(tinfile, einfile):
- if tline.strip() != '(())':
- if tline.startswith('( '):
- tline = tline[2:-1].strip()
- tr = tree.parse_PST(tline)
- if tr != None:
- number_leaves(tr)
- #project_heads(tr) # assumes Bikel-style head annotation for the input trees
- else:
- tr = None
-
- parts = eline.strip().split(" ||| ")
- zh, en = parts[:2]
- spans = parts[-1]
- print '|||',
- for span in spans.split():
- sps = span.split(":")
- i, j, x, y = map(int, sps[0].split("-"))
-
- if tr:
- a = ancestor(tr, range(x,y))
- try:
- fs = frontier(a, range(x,y))
- except:
- print >>sys.stderr, "problem with line", tline.strip(), "--", eline.strip()
- raise
-
- #print x, y
- #print 'ancestor', a
- #print 'frontier', fs
-
- cat = a.data.tag
- for f in fs:
- if f.right < x:
- cat += '\\' + f.data.tag
- else:
- break
- fs.reverse()
- for f in fs:
- if f.left >= y:
- cat += '/' + f.data.tag
- else:
- break
- else:
- cat = 'FAIL'
-
- print '%d-%d:%s' % (x, y, cat),
- print
diff --git a/gi/evaluation/tree.py b/gi/evaluation/tree.py
deleted file mode 100644
index 702d80b6..00000000
--- a/gi/evaluation/tree.py
+++ /dev/null
@@ -1,485 +0,0 @@
-import re, sys
-
-class Symbol:
- def __init__(self, nonterm, term=None, var=None):
- assert not (term != None and var != None)
- self.tag = nonterm
- self.token = term
- self.variable = var
-
- def is_variable(self):
- return self.variable != None
-
- def __eq__(self, other):
- return self.tag == other.tag and self.token == other.token and self.variable == other.variable
-
- def __ne__(self, other):
- return not (self == other)
-
- def __hash__(self):
- return hash((self.tag, self.token, self.variable))
-
- def __repr__(self):
- return str(self)
-
- def __cmp__(self, other):
- return cmp((self.tag, self.token, self.variable),
- (other.tag, other.token, other.variable))
-
- def __str__(self):
- parts = []
- if False: # DEPENDENCY
- if self.token:
- parts.append(str(self.token))
- elif self.variable != None:
- parts.append('#%d' % self.variable)
- if self.tag:
- parts.append(str(self.tag))
- return '/'.join(parts)
- else:
- if self.tag:
- parts.append(str(self.tag))
- if self.token:
- parts.append(str(self.token))
- elif self.variable != None:
- parts.append('#%d' % self.variable)
- return ' '.join(parts)
-
-class TreeNode:
- def __init__(self, data, children=None, order=-1):
- self.data = data
- self.children = []
- self.order = order
- self.parent = None
- if children: self.children = children
-
- def insert(self, child):
- self.children.append(child)
- child.parent = self
-
- def leaves(self):
- ls = []
- for node in self.xtraversal():
- if not node.children:
- ls.append(node.data)
- return ls
-
- def leaf_nodes(self):
- ls = []
- for node in self.xtraversal():
- if not node.children:
- ls.append(node)
- return ls
-
- def max_depth(self):
- d = 1
- for child in self.children:
- d = max(d, 1 + child.max_depth())
- if not self.children and self.data.token:
- d = 2
- return d
-
- def max_width(self):
- w = 0
- for child in self.children:
- w += child.max_width()
- return max(1, w)
-
- def num_internal_nodes(self):
- if self.children:
- n = 1
- for child in self.children:
- n += child.num_internal_nodes()
- return n
- elif self.data.token:
- return 1
- else:
- return 0
-
- def postorder_traversal(self, visit):
- """
- Postorder traversal; no guarantee that terminals will be read in the
- correct order for dep. trees.
- """
- for child in self.children:
- child.traversal(visit)
- visit(self)
-
- def traversal(self, visit):
- """
- Preorder for phrase structure trees, and inorder for dependency trees.
- In both cases the terminals will be read off in the correct order.
- """
- visited_self = False
- if self.order <= 0:
- visited_self = True
- visit(self)
-
- for i, child in enumerate(self.children):
- child.traversal(visit)
- if i + 1 == self.order:
- visited_self = True
- visit(self)
-
- assert visited_self
-
- def xpostorder_traversal(self):
- for child in self.children:
- for node in child.xpostorder_traversal():
- yield node
- yield self
-
- def xtraversal(self):
- visited_self = False
- if self.order <= 0:
- visited_self = True
- yield self
-
- for i, child in enumerate(self.children):
- for d in child.xtraversal():
- yield d
-
- if i + 1 == self.order:
- visited_self = True
- yield self
-
- assert visited_self
-
- def xpostorder_traversal(self):
- for i, child in enumerate(self.children):
- for d in child.xpostorder_traversal():
- yield d
- yield self
-
- def edges(self):
- es = []
- self.traverse_edges(lambda h,c: es.append((h,c)))
- return es
-
- def traverse_edges(self, visit):
- for child in self.children:
- visit(self.data, child.data)
- child.traverse_edges(visit)
-
- def subtrees(self, include_self=False):
- st = []
- if include_self:
- stack = [self]
- else:
- stack = self.children[:]
-
- while stack:
- node = stack.pop()
- st.append(node)
- stack.extend(node.children)
- return st
-
- def find_parent(self, node):
- try:
- index = self.children.index(node)
- return self, index
- except ValueError:
- for child in self.children:
- if isinstance(child, TreeNode):
- r = child.find_parent(node)
- if r: return r
- return None
-
- def is_ancestor_of(self, node):
- if self == node:
- return True
- for child in self.children:
- if child.is_ancestor_of(child):
- return True
- return False
-
- def find(self, node):
- if self == node:
- return self
- for child in self.children:
- if isinstance(child, TreeNode):
- r = child.find(node)
- if r: return r
- else:
- if child == node:
- return r
- return None
-
- def equals_ignorecase(self, other):
- if not isinstance(other, TreeNode):
- return False
- if self.data != other.data:
- return False
- if len(self.children) != len(other.children):
- return False
- for mc, oc in zip(self.children, other.children):
- if isinstance(mc, TreeNode):
- if not mc.equals_ignorecase(oc):
- return False
- else:
- if mc.lower() != oc.lower():
- return False
- return True
-
- def node_number(self, numbering, next=0):
- if self.order <= 0:
- numbering[id(self)] = next
- next += 1
-
- for i, child in enumerate(self.children):
- next = child.node_number(numbering, next)
- if i + 1 == self.order:
- numbering[id(self)] = next
- next += 1
-
- return next
-
- def display_conll(self, out):
- numbering = {}
- self.node_number(numbering)
- next = 0
- self.children[0].traversal(lambda x: \
- out.write('%d\t%s\t%s\t%s\t%s\t_\t%d\tLAB\n' \
- % (numbering[id(x)], x.data.token, x.data.token,
- x.data.tag, x.data.tag, numbering[id(x.parent)])))
- out.write('\n')
-
- def size(self):
- sz = 1
- for child in self.children:
- sz += child.size()
- return sz
-
- def __eq__(self, other):
- if isinstance(other, TreeNode) and self.data == other.data \
- and self.children == other.children:
- return True
- return False
-
- def __cmp__(self, other):
- if not isinstance(other, TreeNode): return 1
- n = cmp(self.data, other.data)
- if n != 0: return n
- n = len(self.children) - len(other.children)
- if n != 0: return n
- for sc, oc in zip(self.children, other.children):
- n = cmp(sc, oc)
- if n != 0: return n
- return 0
-
- def __ne__(self, other):
- return not self.__eq__(other)
-
- def __hash__(self):
- return hash((self.data, tuple(self.children)))
-
- def __repr__(self):
- return str(self)
-
- def __str__(self):
- s = '('
- space = False
- if self.order <= 0:
- s += str(self.data)
- space = True
- for i, child in enumerate(self.children):
- if space: s += ' '
- s += str(child)
- space = True
- if i+1 == self.order:
- s += ' ' + str(self.data)
- return s + ')'
-
-def read_PSTs(fname):
- infile = open(fname)
- trees = []
- for line in infile:
- trees.append(parse_PST(line.strip()))
- infile.close()
- return trees
-
-def parse_PST_multiline(infile, hash_is_var=True):
- buf = ''
- num_open = 0
- while True:
- line = infile.readline()
- if not line:
- return None
- buf += ' ' + line.rstrip()
- num_open += line.count('(') - line.count(')')
- if num_open == 0:
- break
-
- return parse_PST(buf, hash_is_var)
-
-def parse_PST(line, hash_is_var=True):
- line = line.rstrip()
- if not line or line.lower() == 'null':
- return None
-
- # allow either (a/DT) or (DT a)
- #parts_re = re.compile(r'(\(*)([^/)]*)(?:/([^)]*))?(\)*)$')
-
- # only allow (DT a)
- parts_re = re.compile(r'(\(*)([^)]*)(\)*)$')
-
- root = TreeNode(Symbol('TOP'))
- stack = [root]
- for part in line.rstrip().split():
- m = parts_re.match(part)
- #opening, tok_or_tag, tag, closing = m.groups()
- opening, tok_or_tag, closing = m.groups()
- tag = None
- #print 'token', part, 'bits', m.groups()
- for i in opening:
- node = TreeNode(Symbol(None))
- stack[-1].insert(node)
- stack.append(node)
-
- if tag:
- stack[-1].data.tag = tag
- if hash_is_var and tok_or_tag.startswith('#'):
- stack[-1].data.variable = int(tok_or_tag[1:])
- else:
- stack[-1].data.token = tok_or_tag
- else:
- if stack[-1].data.tag == None:
- stack[-1].data.tag = tok_or_tag
- else:
- if hash_is_var and tok_or_tag.startswith('#'):
- try:
- stack[-1].data.variable = int(tok_or_tag[1:])
- except ValueError: # it's really a token!
- #print >>sys.stderr, 'Warning: # used for token:', tok_or_tag
- stack[-1].data.token = tok_or_tag
- else:
- stack[-1].data.token = tok_or_tag
-
- for i in closing:
- stack.pop()
-
- #assert str(root.children[0]) == line
- return root.children[0]
-
-def read_DTs(fname):
- infile = open(fname)
- trees = []
- while True:
- t = parse_DT(infile)
- if t: trees.append(t)
- else: break
- infile.close()
- return trees
-
-def read_bracketed_DTs(fname):
- infile = open(fname)
- trees = []
- for line in infile:
- trees.append(parse_bracketed_DT(line))
- infile.close()
- return trees
-
-def parse_DT(infile):
- tokens = [Symbol('ROOT')]
- children = {}
-
- for line in infile:
- parts = line.rstrip().split()
- #print parts
- if not parts: break
- index = len(tokens)
- token = parts[1]
- tag = parts[3]
- parent = int(parts[6])
- if token.startswith('#'):
- tokens.append(Symbol(tag, var=int(token[1:])))
- else:
- tokens.append(Symbol(tag, token))
- children.setdefault(parent, set()).add(index)
-
- if len(tokens) == 1: return None
-
- root = TreeNode(Symbol('ROOT'), [], 0)
- schedule = []
- for child in sorted(children[0]):
- schedule.append((root, child))
-
- while schedule:
- parent, index = schedule[0]
- del schedule[0]
-
- node = TreeNode(tokens[index])
- node.order = 0
- parent.insert(node)
-
- for child in sorted(children.get(index, [])):
- schedule.append((node, child))
- if child < index:
- node.order += 1
-
- return root
-
-_bracket_split_re = re.compile(r'([(]*)([^)/]*)(?:/([^)]*))?([)]*)')
-
-def parse_bracketed_DT(line, insert_root=True):
- line = line.rstrip()
- if not line or line == 'NULL': return None
- #print line
-
- root = TreeNode(Symbol('ROOT'))
- stack = [root]
- for part in line.rstrip().split():
- m = _bracket_split_re.match(part)
-
- for c in m.group(1):
- node = TreeNode(Symbol(None))
- stack[-1].insert(node)
- stack.append(node)
-
- if m.group(3) != None:
- if m.group(2).startswith('#'):
- stack[-1].data.variable = int(m.group(2)[1:])
- else:
- stack[-1].data.token = m.group(2)
- stack[-1].data.tag = m.group(3)
- else:
- stack[-1].data.tag = m.group(2)
- stack[-1].order = len(stack[-1].children)
- # FIXME: also check for vars
-
- for c in m.group(4):
- stack.pop()
-
- assert len(stack) == 1
- if not insert_root or root.children[0].data.tag == 'ROOT':
- return root.children[0]
- else:
- return root
-
-_bracket_split_notag_re = re.compile(r'([(]*)([^)/]*)([)]*)')
-
-def parse_bracketed_untagged_DT(line):
- line = line.rstrip()
- if not line or line == 'NULL': return None
-
- root = TreeNode(Symbol('TOP'))
- stack = [root]
- for part in line.rstrip().split():
- m = _bracket_split_notag_re.match(part)
-
- for c in m.group(1):
- node = TreeNode(Symbol(None))
- stack[-1].insert(node)
- stack.append(node)
-
- if stack[-1].data.token == None:
- stack[-1].data.token = m.group(2)
- stack[-1].order = len(stack[-1].children)
- else:
- child = TreeNode(Symbol(nonterm=None, term=m.group(2)))
- stack[-1].insert(child)
-
- for c in m.group(3):
- stack.pop()
-
- return root.children[0]
diff --git a/gi/markov_al/Makefile.am b/gi/markov_al/Makefile.am
deleted file mode 100644
index fe3e3349..00000000
--- a/gi/markov_al/Makefile.am
+++ /dev/null
@@ -1,6 +0,0 @@
-bin_PROGRAMS = ml
-
-ml_SOURCES = ml.cc
-
-AM_CPPFLAGS = -W -Wall -Wno-sign-compare -funroll-loops -I$(top_srcdir)/utils $(GTEST_CPPFLAGS) -I$(top_srcdir)/decoder
-AM_LDFLAGS = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz
diff --git a/gi/markov_al/README b/gi/markov_al/README
deleted file mode 100644
index 9c10f7cd..00000000
--- a/gi/markov_al/README
+++ /dev/null
@@ -1,2 +0,0 @@
-Experimental translation models with Markovian dependencies.
-
diff --git a/gi/markov_al/ml.cc b/gi/markov_al/ml.cc
deleted file mode 100644
index 1e71edd6..00000000
--- a/gi/markov_al/ml.cc
+++ /dev/null
@@ -1,470 +0,0 @@
-#include <iostream>
-#include <tr1/unordered_map>
-
-#include <boost/shared_ptr.hpp>
-#include <boost/functional.hpp>
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-
-#include "tdict.h"
-#include "filelib.h"
-#include "sampler.h"
-#include "ccrp_onetable.h"
-#include "array2d.h"
-
-using namespace std;
-using namespace std::tr1;
-namespace po = boost::program_options;
-
-void PrintTopCustomers(const CCRP_OneTable<WordID>& crp) {
- for (CCRP_OneTable<WordID>::const_iterator it = crp.begin(); it != crp.end(); ++it) {
- cerr << " " << TD::Convert(it->first) << " = " << it->second << endl;
- }
-}
-
-void PrintAlignment(const vector<WordID>& src, const vector<WordID>& trg, const vector<unsigned char>& a) {
- cerr << TD::GetString(src) << endl << TD::GetString(trg) << endl;
- Array2D<bool> al(src.size(), trg.size());
- for (int i = 0; i < a.size(); ++i)
- if (a[i] != 255) al(a[i], i) = true;
- cerr << al << endl;
-}
-
-void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
- po::options_description opts("Configuration options");
- opts.add_options()
- ("samples,s",po::value<unsigned>()->default_value(1000),"Number of samples")
- ("input,i",po::value<string>(),"Read parallel data from")
- ("random_seed,S",po::value<uint32_t>(), "Random seed");
- po::options_description clo("Command line options");
- clo.add_options()
- ("config", po::value<string>(), "Configuration file")
- ("help,h", "Print this help message and exit");
- po::options_description dconfig_options, dcmdline_options;
- dconfig_options.add(opts);
- dcmdline_options.add(opts).add(clo);
-
- po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
- if (conf->count("config")) {
- ifstream config((*conf)["config"].as<string>().c_str());
- po::store(po::parse_config_file(config, dconfig_options), *conf);
- }
- po::notify(*conf);
-
- if (conf->count("help") || (conf->count("input") == 0)) {
- cerr << dcmdline_options << endl;
- exit(1);
- }
-}
-
-struct Unigram;
-struct Bigram {
- Bigram() : trg(), cond() {}
- Bigram(WordID prev, WordID cur, WordID t) : trg(t) { cond.first = prev; cond.second = cur; }
- const pair<WordID,WordID>& ConditioningPair() const {
- return cond;
- }
- WordID& prev_src() { return cond.first; }
- WordID& cur_src() { return cond.second; }
- const WordID& prev_src() const { return cond.first; }
- const WordID& cur_src() const { return cond.second; }
- WordID trg;
- private:
- pair<WordID, WordID> cond;
-};
-
-struct Unigram {
- Unigram() : cur_src(), trg() {}
- Unigram(WordID s, WordID t) : cur_src(s), trg(t) {}
- WordID cur_src;
- WordID trg;
-};
-
-ostream& operator<<(ostream& os, const Bigram& b) {
- os << "( " << TD::Convert(b.trg) << " | " << TD::Convert(b.prev_src()) << " , " << TD::Convert(b.cur_src()) << " )";
- return os;
-}
-
-ostream& operator<<(ostream& os, const Unigram& u) {
- os << "( " << TD::Convert(u.trg) << " | " << TD::Convert(u.cur_src) << " )";
- return os;
-}
-
-bool operator==(const Bigram& a, const Bigram& b) {
- return a.trg == b.trg && a.cur_src() == b.cur_src() && a.prev_src() == b.prev_src();
-}
-
-bool operator==(const Unigram& a, const Unigram& b) {
- return a.trg == b.trg && a.cur_src == b.cur_src;
-}
-
-size_t hash_value(const Bigram& b) {
- size_t h = boost::hash_value(b.prev_src());
- boost::hash_combine(h, boost::hash_value(b.cur_src()));
- boost::hash_combine(h, boost::hash_value(b.trg));
- return h;
-}
-
-size_t hash_value(const Unigram& u) {
- size_t h = boost::hash_value(u.cur_src);
- boost::hash_combine(h, boost::hash_value(u.trg));
- return h;
-}
-
-void ReadParallelCorpus(const string& filename,
- vector<vector<WordID> >* f,
- vector<vector<WordID> >* e,
- set<WordID>* vocab_f,
- set<WordID>* vocab_e) {
- f->clear();
- e->clear();
- vocab_f->clear();
- vocab_e->clear();
- istream* in;
- if (filename == "-")
- in = &cin;
- else
- in = new ifstream(filename.c_str());
- assert(*in);
- string line;
- const WordID kDIV = TD::Convert("|||");
- vector<WordID> tmp;
- while(*in) {
- getline(*in, line);
- if (line.empty() && !*in) break;
- e->push_back(vector<int>());
- f->push_back(vector<int>());
- vector<int>& le = e->back();
- vector<int>& lf = f->back();
- tmp.clear();
- TD::ConvertSentence(line, &tmp);
- bool isf = true;
- for (unsigned i = 0; i < tmp.size(); ++i) {
- const int cur = tmp[i];
- if (isf) {
- if (kDIV == cur) { isf = false; } else {
- lf.push_back(cur);
- vocab_f->insert(cur);
- }
- } else {
- assert(cur != kDIV);
- le.push_back(cur);
- vocab_e->insert(cur);
- }
- }
- assert(isf == false);
- }
- if (in != &cin) delete in;
-}
-
-struct UnigramModel {
- UnigramModel(size_t src_voc_size, size_t trg_voc_size) :
- unigrams(TD::NumWords() + 1, CCRP_OneTable<WordID>(1,1,1,1)),
- p0(1.0 / trg_voc_size) {}
-
- void increment(const Bigram& b) {
- unigrams[b.cur_src()].increment(b.trg);
- }
-
- void decrement(const Bigram& b) {
- unigrams[b.cur_src()].decrement(b.trg);
- }
-
- double prob(const Bigram& b) const {
- const double q0 = unigrams[b.cur_src()].prob(b.trg, p0);
- return q0;
- }
-
- double LogLikelihood() const {
- double llh = 0;
- for (unsigned i = 0; i < unigrams.size(); ++i) {
- const CCRP_OneTable<WordID>& crp = unigrams[i];
- if (crp.num_customers() > 0) {
- llh += crp.log_crp_prob();
- llh += crp.num_tables() * log(p0);
- }
- }
- return llh;
- }
-
- void ResampleHyperparameters(MT19937* rng) {
- for (unsigned i = 0; i < unigrams.size(); ++i)
- unigrams[i].resample_hyperparameters(rng);
- }
-
- vector<CCRP_OneTable<WordID> > unigrams; // unigrams[src].prob(trg, p0) = p(trg|src)
-
- const double p0;
-};
-
-struct BigramModel {
- BigramModel(size_t src_voc_size, size_t trg_voc_size) :
- unigrams(TD::NumWords() + 1, CCRP_OneTable<WordID>(1,1,1,1)),
- p0(1.0 / trg_voc_size) {}
-
- void increment(const Bigram& b) {
- BigramMap::iterator it = bigrams.find(b.ConditioningPair());
- if (it == bigrams.end()) {
- it = bigrams.insert(make_pair(b.ConditioningPair(), CCRP_OneTable<WordID>(1,1,1,1))).first;
- }
- if (it->second.increment(b.trg))
- unigrams[b.cur_src()].increment(b.trg);
- }
-
- void decrement(const Bigram& b) {
- BigramMap::iterator it = bigrams.find(b.ConditioningPair());
- assert(it != bigrams.end());
- if (it->second.decrement(b.trg)) {
- unigrams[b.cur_src()].decrement(b.trg);
- if (it->second.num_customers() == 0)
- bigrams.erase(it);
- }
- }
-
- double prob(const Bigram& b) const {
- const double q0 = unigrams[b.cur_src()].prob(b.trg, p0);
- const BigramMap::const_iterator it = bigrams.find(b.ConditioningPair());
- if (it == bigrams.end()) return q0;
- return it->second.prob(b.trg, q0);
- }
-
- double LogLikelihood() const {
- double llh = 0;
- for (unsigned i = 0; i < unigrams.size(); ++i) {
- const CCRP_OneTable<WordID>& crp = unigrams[i];
- if (crp.num_customers() > 0) {
- llh += crp.log_crp_prob();
- llh += crp.num_tables() * log(p0);
- }
- }
- for (BigramMap::const_iterator it = bigrams.begin(); it != bigrams.end(); ++it) {
- const CCRP_OneTable<WordID>& crp = it->second;
- const WordID cur_src = it->first.second;
- llh += crp.log_crp_prob();
- for (CCRP_OneTable<WordID>::const_iterator bit = crp.begin(); bit != crp.end(); ++bit) {
- llh += log(unigrams[cur_src].prob(bit->second, p0));
- }
- }
- return llh;
- }
-
- void ResampleHyperparameters(MT19937* rng) {
- for (unsigned i = 0; i < unigrams.size(); ++i)
- unigrams[i].resample_hyperparameters(rng);
- for (BigramMap::iterator it = bigrams.begin(); it != bigrams.end(); ++it)
- it->second.resample_hyperparameters(rng);
- }
-
- typedef unordered_map<pair<WordID,WordID>, CCRP_OneTable<WordID>, boost::hash<pair<WordID,WordID> > > BigramMap;
- BigramMap bigrams; // bigrams[(src-1,src)].prob(trg, q0) = p(trg|src,src-1)
- vector<CCRP_OneTable<WordID> > unigrams; // unigrams[src].prob(trg, p0) = p(trg|src)
-
- const double p0;
-};
-
-struct BigramAlignmentModel {
- BigramAlignmentModel(size_t src_voc_size, size_t trg_voc_size) : bigrams(TD::NumWords() + 1, CCRP_OneTable<WordID>(1,1,1,1)), p0(1.0 / src_voc_size) {}
- void increment(WordID prev, WordID next) {
- bigrams[prev].increment(next); // hierarchy?
- }
- void decrement(WordID prev, WordID next) {
- bigrams[prev].decrement(next); // hierarchy?
- }
- double prob(WordID prev, WordID next) {
- return bigrams[prev].prob(next, p0);
- }
- double LogLikelihood() const {
- double llh = 0;
- for (unsigned i = 0; i < bigrams.size(); ++i) {
- const CCRP_OneTable<WordID>& crp = bigrams[i];
- if (crp.num_customers() > 0) {
- llh += crp.log_crp_prob();
- llh += crp.num_tables() * log(p0);
- }
- }
- return llh;
- }
-
- vector<CCRP_OneTable<WordID> > bigrams; // bigrams[prev].prob(next, p0) = p(next|prev)
- const double p0;
-};
-
-struct Alignment {
- vector<unsigned char> a;
-};
-
-int main(int argc, char** argv) {
- po::variables_map conf;
- InitCommandLine(argc, argv, &conf);
- const unsigned samples = conf["samples"].as<unsigned>();
-
- boost::shared_ptr<MT19937> prng;
- if (conf.count("random_seed"))
- prng.reset(new MT19937(conf["random_seed"].as<uint32_t>()));
- else
- prng.reset(new MT19937);
- MT19937& rng = *prng;
-
- vector<vector<WordID> > corpuse, corpusf;
- set<WordID> vocabe, vocabf;
- cerr << "Reading corpus...\n";
- ReadParallelCorpus(conf["input"].as<string>(), &corpusf, &corpuse, &vocabf, &vocabe);
- cerr << "F-corpus size: " << corpusf.size() << " sentences\t (" << vocabf.size() << " word types)\n";
- cerr << "E-corpus size: " << corpuse.size() << " sentences\t (" << vocabe.size() << " word types)\n";
- assert(corpusf.size() == corpuse.size());
- const size_t corpus_len = corpusf.size();
- const WordID kNULL = TD::Convert("<eps>");
- const WordID kBOS = TD::Convert("<s>");
- const WordID kEOS = TD::Convert("</s>");
- Bigram TT(kBOS, TD::Convert("我"), TD::Convert("i"));
- Bigram TT2(kBOS, TD::Convert("要"), TD::Convert("i"));
-
- UnigramModel model(vocabf.size(), vocabe.size());
- vector<Alignment> alignments(corpus_len);
- for (unsigned ci = 0; ci < corpus_len; ++ci) {
- const vector<WordID>& src = corpusf[ci];
- const vector<WordID>& trg = corpuse[ci];
- vector<unsigned char>& alg = alignments[ci].a;
- alg.resize(trg.size());
- int lenp1 = src.size() + 1;
- WordID prev_src = kBOS;
- for (int j = 0; j < trg.size(); ++j) {
- int samp = lenp1 * rng.next();
- --samp;
- if (samp < 0) samp = 255;
- alg[j] = samp;
- WordID cur_src = (samp == 255 ? kNULL : src[alg[j]]);
- Bigram b(prev_src, cur_src, trg[j]);
- model.increment(b);
- prev_src = cur_src;
- }
- Bigram b(prev_src, kEOS, kEOS);
- model.increment(b);
- }
- cerr << "Initial LLH: " << model.LogLikelihood() << endl;
-
- SampleSet<double> ss;
- for (unsigned si = 0; si < 50; ++si) {
- for (unsigned ci = 0; ci < corpus_len; ++ci) {
- const vector<WordID>& src = corpusf[ci];
- const vector<WordID>& trg = corpuse[ci];
- vector<unsigned char>& alg = alignments[ci].a;
- WordID prev_src = kBOS;
- for (unsigned j = 0; j < trg.size(); ++j) {
- unsigned char& a_j = alg[j];
- WordID cur_e_a_j = (a_j == 255 ? kNULL : src[a_j]);
- Bigram b(prev_src, cur_e_a_j, trg[j]);
- //cerr << "DEC: " << b << "\t" << nextb << endl;
- model.decrement(b);
- ss.clear();
- for (unsigned i = 0; i <= src.size(); ++i) {
- const WordID cur_src = (i ? src[i-1] : kNULL);
- b.cur_src() = cur_src;
- ss.add(model.prob(b));
- }
- int sampled_a_j = rng.SelectSample(ss);
- a_j = (sampled_a_j ? sampled_a_j - 1 : 255);
- cur_e_a_j = (a_j == 255 ? kNULL : src[a_j]);
- b.cur_src() = cur_e_a_j;
- //cerr << "INC: " << b << "\t" << nextb << endl;
- model.increment(b);
- prev_src = cur_e_a_j;
- }
- }
- cerr << '.' << flush;
- if (si % 10 == 9) {
- cerr << "[LLH prev=" << model.LogLikelihood();
- //model.ResampleHyperparameters(&rng);
- cerr << " new=" << model.LogLikelihood() << "]\n";
- //pair<WordID,WordID> xx = make_pair(kBOS, TD::Convert("我"));
- //PrintTopCustomers(model.bigrams.find(xx)->second);
- cerr << "p(" << TT << ") = " << model.prob(TT) << endl;
- cerr << "p(" << TT2 << ") = " << model.prob(TT2) << endl;
- PrintAlignment(corpusf[0], corpuse[0], alignments[0].a);
- }
- }
- {
- // MODEL 2
- BigramModel model(vocabf.size(), vocabe.size());
- BigramAlignmentModel amodel(vocabf.size(), vocabe.size());
- for (unsigned ci = 0; ci < corpus_len; ++ci) {
- const vector<WordID>& src = corpusf[ci];
- const vector<WordID>& trg = corpuse[ci];
- vector<unsigned char>& alg = alignments[ci].a;
- WordID prev_src = kBOS;
- for (int j = 0; j < trg.size(); ++j) {
- WordID cur_src = (alg[j] == 255 ? kNULL : src[alg[j]]);
- Bigram b(prev_src, cur_src, trg[j]);
- model.increment(b);
- amodel.increment(prev_src, cur_src);
- prev_src = cur_src;
- }
- amodel.increment(prev_src, kEOS);
- Bigram b(prev_src, kEOS, kEOS);
- model.increment(b);
- }
- cerr << "Initial LLH: " << model.LogLikelihood() << " " << amodel.LogLikelihood() << endl;
-
- SampleSet<double> ss;
- for (unsigned si = 0; si < samples; ++si) {
- for (unsigned ci = 0; ci < corpus_len; ++ci) {
- const vector<WordID>& src = corpusf[ci];
- const vector<WordID>& trg = corpuse[ci];
- vector<unsigned char>& alg = alignments[ci].a;
- WordID prev_src = kBOS;
- for (unsigned j = 0; j < trg.size(); ++j) {
- unsigned char& a_j = alg[j];
- WordID cur_e_a_j = (a_j == 255 ? kNULL : src[a_j]);
- Bigram b(prev_src, cur_e_a_j, trg[j]);
- WordID next_src = kEOS;
- WordID next_trg = kEOS;
- if (j < (trg.size() - 1)) {
- next_src = (alg[j+1] == 255 ? kNULL : src[alg[j + 1]]);
- next_trg = trg[j + 1];
- }
- Bigram nextb(cur_e_a_j, next_src, next_trg);
- //cerr << "DEC: " << b << "\t" << nextb << endl;
- model.decrement(b);
- model.decrement(nextb);
- amodel.decrement(prev_src, cur_e_a_j);
- amodel.decrement(cur_e_a_j, next_src);
- ss.clear();
- for (unsigned i = 0; i <= src.size(); ++i) {
- const WordID cur_src = (i ? src[i-1] : kNULL);
- b.cur_src() = cur_src;
- ss.add(model.prob(b) * model.prob(nextb) * amodel.prob(prev_src, cur_src) * amodel.prob(cur_src, next_src));
- //cerr << log(ss[ss.size() - 1]) << "\t" << b << endl;
- }
- int sampled_a_j = rng.SelectSample(ss);
- a_j = (sampled_a_j ? sampled_a_j - 1 : 255);
- cur_e_a_j = (a_j == 255 ? kNULL : src[a_j]);
- b.cur_src() = cur_e_a_j;
- nextb.prev_src() = cur_e_a_j;
- //cerr << "INC: " << b << "\t" << nextb << endl;
- //exit(1);
- model.increment(b);
- model.increment(nextb);
- amodel.increment(prev_src, cur_e_a_j);
- amodel.increment(cur_e_a_j, next_src);
- prev_src = cur_e_a_j;
- }
- }
- cerr << '.' << flush;
- if (si % 10 == 9) {
- cerr << "[LLH prev=" << (model.LogLikelihood() + amodel.LogLikelihood());
- //model.ResampleHyperparameters(&rng);
- cerr << " new=" << model.LogLikelihood() << "]\n";
- pair<WordID,WordID> xx = make_pair(kBOS, TD::Convert("我"));
- cerr << "p(" << TT << ") = " << model.prob(TT) << endl;
- cerr << "p(" << TT2 << ") = " << model.prob(TT2) << endl;
- pair<WordID,WordID> xx2 = make_pair(kBOS, TD::Convert("要"));
- PrintTopCustomers(model.bigrams.find(xx)->second);
- //PrintTopCustomers(amodel.bigrams[TD::Convert("<s>")]);
- //PrintTopCustomers(model.unigrams[TD::Convert("<eps>")]);
- PrintAlignment(corpusf[0], corpuse[0], alignments[0].a);
- }
- }
- }
- return 0;
-}
-
diff --git a/gi/morf-segmentation/filter_docs.pl b/gi/morf-segmentation/filter_docs.pl
deleted file mode 100755
index a78575da..00000000
--- a/gi/morf-segmentation/filter_docs.pl
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/usr/bin/perl
-
-#Filters the phrase&cluster document set to retain only documents that correspond to words or morphs, i.e. not crossing word boundaries.
-
-#Usage: filter_docs.pl [mark]
-# STDIN: data in the doc.txt format (i.e. phrase\t blahblah ), most likely from cdec extractor
-# STDOUT: the matching subset, same format
-
-use utf8;
-my $letter=qr/\p{L}\p{M}*/; # see http://www.regular-expressions.info/unicode.html
-
-my $morph=qr/$letter+/;
-
-my $m = "##"; # marker used to indicate morphemes
-if ((scalar @ARGV) >= 1) {
- $m = $ARGV[0];
- shift;
-}
-print STDERR "Using $m to filter for morphemes\n";
-
-my $expr = qr/^($morph\Q$m\E)? ?(\Q$m\E$morph\Q$m\E)* ?(\Q$m\E$morph)?\t/; #\Q and \E bounded sections are escaped
-while(<>) {
- /$expr/ && print;
-}
diff --git a/gi/morf-segmentation/invalid_vocab.patterns b/gi/morf-segmentation/invalid_vocab.patterns
deleted file mode 100644
index 473ce1b1..00000000
--- a/gi/morf-segmentation/invalid_vocab.patterns
+++ /dev/null
@@ -1,6 +0,0 @@
-[[:digit:]]
-[] !"#$%&()*+,./:;<=>?@[\^_`{|}~]
-^'$
--$
-^-
-^$
diff --git a/gi/morf-segmentation/linestripper.py b/gi/morf-segmentation/linestripper.py
deleted file mode 100755
index 04e9044a..00000000
--- a/gi/morf-segmentation/linestripper.py
+++ /dev/null
@@ -1,40 +0,0 @@
-#!/usr/bin/python
-
-import sys
-
-#linestripper file file maxlen [numlines]
-
-if len(sys.argv) < 3:
- print "linestripper file1 file2 maxlen [numlines]"
- print " outputs subset of file1 to stdout, ..of file2 to stderr"
- sys.exit(1)
-
-
-f1 = open(sys.argv[1],'r')
-f2 = open(sys.argv[2],'r')
-
-maxlen=int(sys.argv[3])
-numlines = 0
-
-if len(sys.argv) > 4:
- numlines = int(sys.argv[4])
-
-count=0
-for line1 in f1:
- line2 = f2.readline()
-
- w1 = len(line1.strip().split())
- w2 = len(line2.strip().split())
-
- if w1 <= maxlen and w2 <= maxlen:
- count = count + 1
- sys.stdout.write(line1)
- sys.stderr.write(line2)
-
- if numlines > 0 and count >= numlines:
- break
-
-f1.close()
-f2.close()
-
-
diff --git a/gi/morf-segmentation/morf-pipeline.pl b/gi/morf-segmentation/morf-pipeline.pl
deleted file mode 100755
index 46eb5b46..00000000
--- a/gi/morf-segmentation/morf-pipeline.pl
+++ /dev/null
@@ -1,486 +0,0 @@
-#!/usr/bin/perl -w
-use strict;
-use File::Copy;
-
-
-# Preprocessing pipeline to take care of word segmentation
-# Learns a segmentation model for each/either side of the parallel corpus using all train/dev/test data
-# Applies the segmentation where necessary.
-# Learns word alignments on the preprocessed training data.
-# Outputs script files used later to score output.
-
-
-my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path cwd /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR; }
-
-use Getopt::Long "GetOptions";
-
-my $GZIP = 'gzip';
-my $ZCAT = 'gunzip -c';
-my $SED = 'sed -e';
-
-my $MORF_TRAIN = "$SCRIPT_DIR/morftrain.sh";
-my $MORF_SEGMENT = "$SCRIPT_DIR/morfsegment.py";
-
-my $LINESTRIPPER = "$SCRIPT_DIR/linestripper.py";
-my $ALIGNER = "/export/ws10smt/software/berkeleyaligner/berkeleyaligner.jar";
-#java -d64 -Xmx10g -jar $ALIGNER ++word-align.conf >> aligner.log
-assert_exec($MORF_TRAIN, $LINESTRIPPER, $MORF_SEGMENT, $ALIGNER);
-
-my $OUTPUT = './morfwork';
-my $PPL_SRC = 50;
-my $PPL_TRG = 50;
-my $MARKER = "#";
-my $MAX_WORDS = 40;
-my $SENTENCES;# = 100000;
-my $SPLIT_TYPE = ""; #possible values: s, t, st, or (empty string)
-my $NAME_SHORTCUT;
-
-usage() unless &GetOptions('max_words=i' => \$MAX_WORDS,
- 'output=s' => \$OUTPUT,
- 'ppl_src=i' => \$PPL_SRC,
- 'ppl_trg=i' => \$PPL_TRG,
- 'sentences=i' => \$SENTENCES,
- 'marker=s' => \$MARKER,
- 'split=s' => \$SPLIT_TYPE,
- 'get_name_only' => \$NAME_SHORTCUT,
- );
-
-usage() unless scalar @ARGV >= 2;
-
-my %CORPUS; # for (src,trg) it has (orig, name, filtered, final)
-
-$CORPUS{'src'}{'orig'} = $ARGV[0];
-open F, "<$CORPUS{'src'}{'orig'}" or die "Can't read $CORPUS{'src'}{'orig'}: $!"; close F;
-$CORPUS{'src'}{'name'} = get_basename($CORPUS{'src'}{'orig'});
-
-$CORPUS{'trg'}{'orig'} = $ARGV[1];
-open F, "<$CORPUS{'trg'}{'orig'}" or die "Can't read $CORPUS{'trg'}{'orig'}: $!"; close F;
-$CORPUS{'trg'}{'name'} = get_basename($CORPUS{'trg'}{'orig'});
-
-my %DEV; # for (src,trg) has (orig, final.split final.unsplit
-if (@ARGV >= 4) {
- $DEV{'src'}{'orig'} = $ARGV[2];
- open F, "<$DEV{'src'}{'orig'}" or die "Can't read $DEV{'src'}{'orig'}: $!"; close F;
- $DEV{'src'}{'name'} = get_basename($DEV{'src'}{'orig'});
- $DEV{'trg'}{'orig'} = $ARGV[3];
- open F, "<$DEV{'trg'}{'orig'}" or die "Can't read $DEV{'trg'}{'orig'}: $!"; close F;
- $DEV{'trg'}{'name'} = get_basename($DEV{'trg'}{'orig'});
-}
-
-my %TEST; # for (src,trg) has (orig, name)
-if (@ARGV >= 6) {
- $TEST{'src'}{'orig'} = $ARGV[4];
- open F, "<$TEST{'src'}{'orig'}" or die "Can't read $TEST{'src'}{'orig'}: $!"; close F;
- $TEST{'src'}{'name'} = get_basename($TEST{'src'}{'orig'});
- $TEST{'trg'}{'orig'} = $ARGV[5];
- open F, "<$TEST{'trg'}{'orig'}" or die "Can't read $TEST{'trg'}{'orig'}: $!"; close F;
- $TEST{'trg'}{'name'} = get_basename($TEST{'trg'}{'orig'});
-}
-
-my $SPLIT_SRC; #use these to check whether that part is being split
-my $SPLIT_TRG;
-
-#OUTPUT WILL GO IN THESE
-my $CORPUS_DIR = $OUTPUT . '/' . corpus_dir(); #subsampled corpus
-my $MODEL_SRC_DIR = $OUTPUT . '/' . model_dir("src"); #splitting..
-my $MODEL_TRG_DIR = $OUTPUT . '/' . model_dir("trg"); # .. models
-my $PROCESSED_DIR = $OUTPUT . '/' . processed_dir(); #segmented copora+alignments
-my $ALIGNMENT_DIR = $PROCESSED_DIR . '/alignments';
-
-$CORPUS{'src'}{'filtered'} = $CORPUS_DIR . "/$CORPUS{'src'}{'name'}";
-$CORPUS{'trg'}{'filtered'} = $CORPUS_DIR . "/$CORPUS{'trg'}{'name'}";
-
-print STDERR "Output: $OUTPUT\n";
-print STDERR "Corpus: $CORPUS_DIR\n";
-print STDERR "Model-src: $MODEL_SRC_DIR\n";
-print STDERR "Model-trg: $MODEL_TRG_DIR\n";
-print STDERR "Finaldir: $PROCESSED_DIR\n";
-
-safemkdir($OUTPUT) or die "Couldn't create output directory $OUTPUT: $!";
-safemkdir($CORPUS_DIR) or die "Couldn't create output directory $CORPUS_DIR: $!";
-filter_corpus();
-
-safemkdir($PROCESSED_DIR);
-safemkdir($ALIGNMENT_DIR);
-
-if ($SPLIT_SRC) {
- safemkdir($MODEL_SRC_DIR) or die "Couldn't create output directory $MODEL_SRC_DIR: $!";
- learn_segmentation("src");
- apply_segmentation_side("src", $MODEL_SRC_DIR);
-}
-
-#assume that unsplit hypotheses will be scored against an aritificially split target test set; thus obtain a target splitting model
-#TODO: add a flag to override this behaviour
-safemkdir($MODEL_TRG_DIR) or die "Couldn't create output directory $MODEL_TRG_DIR: $!";
-learn_segmentation("trg");
-$TEST{'trg'}{'finalunsplit'} = "$PROCESSED_DIR/$TEST{'trg'}{'name'}";
-copy($TEST{'trg'}{'orig'}, $TEST{'trg'}{'finalunsplit'}) or die "Could not copy unsegmented test set";
-
-if ($SPLIT_TRG) {
- apply_segmentation_side("trg", $MODEL_TRG_DIR);
- } else {
- $TEST{'trg'}{'finalsplit'} = "$PROCESSED_DIR/$TEST{'trg'}{'name'}.split";
- apply_segmentation_any($MODEL_TRG_DIR, $TEST{'trg'}{'finalunsplit'}, $TEST{'trg'}{'finalsplit'});
-}
-
-write_eval_sh("$PROCESSED_DIR/eval-devtest.sh");
-
-#copy corpora if they haven't been put in place by splitting operations
-place_missing_data_side('src');
-place_missing_data_side('trg');
-
-do_align();
-
-if ($CORPUS{'src'}{'orig'} && $DEV{'src'}{'orig'} && $TEST{'src'}{'orig'}) {
- print STDERR "Putting the config file entry in $PROCESSED_DIR/exp.config\n";
-#format is:
- # nlfr100k_unsplit /export/ws10smt/jan/nlfr/morfwork/s100k.w40.sp_0 corpus.nl-fr.al fr-3.lm.gz dev.nl dev.fr test2008.nl eval-devtest.sh
- my $line = split_name() . " $PROCESSED_DIR corpus.src-trg.al LMFILE.lm.gz";
- $line = $line . " $DEV{'src'}{'name'} $DEV{'trg'}{'name'}";
- $line = $line . " " . get_basename($TEST{'src'}{$SPLIT_SRC ? "finalsplit" : "finalunsplit"}) . " eval-devtest.sh";
- safesystem("echo '$line' > $PROCESSED_DIR/exp.config");
-}
-
-system("date");
-print STDERR "All done. You now need to train a language model (if target split), put it in the right dir and update the config file.\n\n";
-
-############################## BILINGUAL ###################################
-
-sub filter_corpus {
- print STDERR "\n!!!FILTERING TRAINING COPRUS!!!\n";
- if ( -f $CORPUS{'src'}{'filtered'} && -f $CORPUS{'trg'}{'filtered'}) {
- print STDERR "$CORPUS{'src'}{'filtered'} and $CORPUS{'trg'}{'filtered'} exist, reusing...\n";
- return;
- }
- my $args = "$CORPUS{'src'}{'orig'} $CORPUS{'trg'}{'orig'} $MAX_WORDS";
- if ($SENTENCES) { $args = $args . " $SENTENCES"; }
- safesystem("$LINESTRIPPER $args 1> $CORPUS{'src'}{'filtered'} 2> $CORPUS{'trg'}{'filtered'}") or die "Failed to filter training corpus for length.";
-}
-
-sub learn_segmentation
-{
- my $WHICH = shift;
- my $corpus; my $dev; my $test; my $moddir; my $ppl;
-
- $corpus = $CORPUS{$WHICH}{'filtered'};
- $dev = $DEV{$WHICH}{'orig'};
- $test = $TEST{$WHICH}{'orig'};
-
- if ($WHICH eq "src") {
- $moddir = $MODEL_SRC_DIR;
- $ppl = $PPL_SRC;
- } else {
- $moddir = $MODEL_TRG_DIR;
- $ppl = $PPL_TRG;
- }
- my $cmd = "cat $corpus";
- if ($dev) { $cmd = "$cmd $dev"; }
- if ($test) { $cmd = "$cmd $test"; }
- my $tmpfile = "$CORPUS_DIR/all.tmp.gz";
- safesystem("$cmd | $GZIP > $tmpfile") or die "Failed to concatenate data for model learning..";
- assert_marker($tmpfile);
-
- learn_segmentation_side($tmpfile, $moddir, $ppl, $WHICH);
- safesystem("rm $tmpfile");
-}
-
-sub do_align {
- print STDERR "\n!!!WORD ALIGNMENT!!!\n";
- system("date");
-
- my $ALIGNMENTS = "$ALIGNMENT_DIR/training.align";
- if ( -f $ALIGNMENTS ) {
- print STDERR "$ALIGNMENTS exists, reusing...\n";
- return;
- }
- my $conf_file = "$ALIGNMENT_DIR/word-align.conf";
-
- #decorate training files with identifiers to stop the aligner from training on dev and test when rerun in future.
- safesystem("cd $PROCESSED_DIR && ln -s $CORPUS{'src'}{'name'} corpus.src") or die "Failed to symlink: $!";
- safesystem("cd $PROCESSED_DIR && ln -s $CORPUS{'trg'}{'name'} corpus.trg") or die "Failed to symlink: $!";
-
- write_wconf($conf_file, $PROCESSED_DIR);
- system("java -d64 -Xmx24g -jar $ALIGNER ++$conf_file > $ALIGNMENT_DIR/aligner.log");
-
- if (! -f $ALIGNMENTS) { die "Failed to run word alignment.";}
-
- my $cmd = "paste $PROCESSED_DIR/corpus.src $PROCESSED_DIR/corpus.trg $ALIGNMENTS";
- $cmd = $cmd . " | sed 's/\\t/ \|\|\| /g' > $PROCESSED_DIR/corpus.src-trg.al";
- safesystem($cmd) or die "Failed to paste into aligned corpus file.";
-
-}
-
-############################# MONOLINGUAL #################################
-
-#copy the necessary data files that weren't place by segmentation
-sub place_missing_data_side {
- my $side = shift;
-
- ifne_copy($CORPUS{$side}{'filtered'}, "$PROCESSED_DIR/$CORPUS{$side}{'name'}") ;
-
- if ($DEV{$side}{'orig'} && ! -f "$PROCESSED_DIR/$DEV{$side}{'name'}") {
- $DEV{$side}{'final'} = "$PROCESSED_DIR/$DEV{$side}{'name'}";
- copy($DEV{$side}{'orig'}, $DEV{$side}{'final'}) or die "Copy failed: $!";
- }
-
- if ($TEST{$side}{'orig'} && ! -f "$PROCESSED_DIR/$TEST{$side}{'name'}" && ! $TEST{$side}{'finalunsplit'}) {
- $TEST{$side}{'finalunsplit'} = "$PROCESSED_DIR/$TEST{$side}{'name'}";
- copy($TEST{$side}{'orig'}, $TEST{$side}{'finalunsplit'}) or die "Copy failed: $!";
- }
-
-}
-
-sub apply_segmentation_side {
- my ($side, $moddir) = @_;
-
- print STDERR "\n!!!APPLYING SEGMENTATION MODEL ($side)!!!\n";
- apply_segmentation_any($moddir, $CORPUS{$side}{'filtered'}, "$PROCESSED_DIR/$CORPUS{$side}{'name'}");
- if ($DEV{$side}{'orig'}) {
- $DEV{$side}{'final'} = "$PROCESSED_DIR/$DEV{$side}{'name'}";
- apply_segmentation_any($moddir, $DEV{$side}{'orig'}, "$DEV{$side}{'final'}");
- }
- if ($TEST{$side}{'orig'}) {
- $TEST{$side}{'finalsplit'} = "$PROCESSED_DIR/$TEST{$side}{'name'}.split";
- apply_segmentation_any($moddir, $TEST{$side}{'orig'}, $TEST{$side}{'finalsplit'} );
- }
-
-}
-
-sub learn_segmentation_side {
- my($INPUT_FILE, $SEGOUT_DIR, $PPL, $LANG) = @_;
-
- print STDERR "\n!!!LEARNING SEGMENTATION MODEL ($LANG)!!!\n";
- system("date");
- my $SEG_FILE = $SEGOUT_DIR . "/segmentation.ready";
- if ( -f $SEG_FILE) {
- print STDERR "$SEG_FILE exists, reusing...\n";
- return;
- }
- my $cmd = "$MORF_TRAIN $INPUT_FILE $SEGOUT_DIR $PPL \"$MARKER\"";
- safesystem($cmd) or die "Failed to learn segmentation model";
-}
-
-sub apply_segmentation_any {
- my($moddir, $datfile, $outfile) = @_;
- if ( -f $outfile) {
- print STDERR "$outfile exists, reusing...\n";
- return;
- }
-
- my $args = "$moddir/inputvocab.gz $moddir/segmentation.ready \"$MARKER\"";
- safesystem("cat $datfile | $MORF_SEGMENT $args &> $outfile") or die "Could not segment $datfile";
-}
-
-##################### PATH FUNCTIONS ##########################
-
-sub beautify_numlines {
- return ($SENTENCES ? $SENTENCES : "_all");
-}
-
-sub corpus_dir {
- return "s" . beautify_numlines() . ".w" . $MAX_WORDS;
-}
-
-sub model_dir {
- my $lang = shift;
- if ($lang eq "src") {
- return corpus_dir() . ".PPL" . $PPL_SRC . ".src";
- } elsif ($lang eq "trg") {
- return corpus_dir() . ".PPL" . $PPL_TRG . ".trg";
- } else {
- return "PPLundef";
- }
-}
-
-sub processed_dir {
- return corpus_dir() . "." . split_name();
-}
-
-########################## HELPER FUNCTIONS ############################
-
-sub ifne_copy {
- my ($src, $dest) = @_;
- if (! -f $dest) {
- copy($src, $dest) or die "Copy failed: $!";
- }
-}
-
-sub split_name {
- #parses SPLIT_TYPE, which can have the following values
- # t|s|ts|st (last 2 are equiv)
- # or is undefined when no splitting is done
- my $name = "";
-
- if ($SPLIT_TYPE) {
- $SPLIT_SRC = lc($SPLIT_TYPE) =~ /s/;
- $SPLIT_TRG = lc($SPLIT_TYPE) =~ /t/;
- $name = $name . ($SPLIT_SRC ? $PPL_SRC : "0");
- $name = $name . "_" . ($SPLIT_TRG ? $PPL_TRG : "0");
- } else {
- #no splitting
- $name = "0";
- }
-
- return "sp_" . $name;
-
-}
-
-sub usage {
- print <<EOT;
-
-Usage: $0 [OPTIONS] corpus.src corpus.trg [dev.src dev.trg [test.src test.trg]]
-
-Learns a segmentation model and splits up corpora as necessary. Word alignments are trained on a specified subset of the training corpus.
-
-EOT
- exit 1;
-};
-
-sub safemkdir {
- my $dir = shift;
- if (-d $dir) { return 1; }
- return mkdir($dir);
-}
-
-sub assert_exec {
- my @files = @_;
- for my $file (@files) {
- die "Can't find $file - did you run make?\n" unless -e $file;
- die "Can't execute $file" unless -e $file;
- }
-};
-sub safesystem {
- print STDERR "Executing: @_\n";
- system(@_);
- if ($? == -1) {
- print STDERR "ERROR: Failed to execute: @_\n $!\n";
- exit(1);
- }
- elsif ($? & 127) {
- printf STDERR "ERROR: Execution of: @_\n died with signal %d, %s coredump\n",
- ($? & 127), ($? & 128) ? 'with' : 'without';
- exit(1);
- }
- else {
- my $exitcode = $? >> 8;
- print STDERR "Exit code: $exitcode\n" if $exitcode;
- return ! $exitcode;
- }
-}
-
-sub get_basename
-{
- my $x = shift;
- $x = `basename $x`;
- $x =~ s/\n//;
- return $x;
-}
-
-sub assert_marker {
- my $file = shift;
- my $result = `zcat $file| grep '$MARKER' | wc -l` or die "Cannot read $file: $!";
- print $result;
- if (scalar($result) != 0) { die "Data contains marker '$MARKER'; use something else.";}
-}
-########################### Dynamic config files ##############################
-
-sub write_wconf {
- my ($filename, $train_dir) = @_;
- open WCONF, ">$filename" or die "Can't write $filename: $!";
-
- print WCONF <<EOT;
-## ----------------------
-## This is an example training script for the Berkeley
-## word aligner. In this configuration it uses two HMM
-## alignment models trained jointly and then decoded
-## using the competitive thresholding heuristic.
-
-##########################################
-# Training: Defines the training regimen
-##########################################
-forwardModels MODEL1 HMM
-reverseModels MODEL1 HMM
-mode JOINT JOINT
-iters 5 5
-
-###############################################
-# Execution: Controls output and program flow
-###############################################
-execDir $ALIGNMENT_DIR
-create
-overwriteExecDir
-saveParams true
-numThreads 1
-msPerLine 10000
-alignTraining
-
-#################
-# Language/Data
-#################
-foreignSuffix src
-englishSuffix trg
-
-# Choose the training sources, which can either be directories or files that list files/directories
-trainSources $train_dir/
-#trainSources $train_dir/sources
-testSources
-sentences MAX
-
-#################
-# 1-best output
-#################
-competitiveThresholding
-
-EOT
- close WCONF;
-}
-
-sub write_eval_sh
-{
- my ($filename) = @_;
- open EVALFILE, ">$filename" or die "Can't write $filename: $!";
-
- print EVALFILE <<EOT;
-#!/bin/bash
-
-EVAL_MAIN=/export/ws10smt/data/eval.sh
-marker="$MARKER"
-EOT
-
- if ($SPLIT_TRG) {
- print EVALFILE <<EOT;
-echo "OUTPUT EVALUATION"
-echo "-----------------"
-\$EVAL_MAIN "\$1" $TEST{'trg'}{'finalsplit'}
-
-echo "RECOMBINED OUTPUT EVALUATION"
-echo "----------------------------"
-cat "\$1" | sed -e "s/\$marker \$marker//g" -e "s/\$marker//g" > "\$1.recombined"
-
-\$EVAL_MAIN "\$1.recombined" $TEST{'trg'}{'finalunsplit'}
-EOT
-
- } else {
- print EVALFILE <<EOT;
-echo "ARTIFICIAL SPLIT EVALUATION"
-echo "--------------------------"
-
-#split the output translation
-cat "\$1" | $MORF_SEGMENT $MODEL_TRG_DIR/inputvocab.gz $MODEL_TRG_DIR/segmentation.ready "\$MARKER" > "\$1.split"
-
-\$EVAL_MAIN "\$1.split" $TEST{'trg'}{'finalsplit'}
-
-echo "DIRECT EVALUATION"
-echo "--------------------------"
-\$EVAL_MAIN "\$1" $TEST{'trg'}{'finalunsplit'}
-
-EOT
-
- }
- close EVALFILE;
-
-}
-
-
-
-
diff --git a/gi/morf-segmentation/morfsegment.py b/gi/morf-segmentation/morfsegment.py
deleted file mode 100755
index 85b9d4fb..00000000
--- a/gi/morf-segmentation/morfsegment.py
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/usr/bin/python
-
-import sys
-import gzip
-
-#usage: morfsegment.py inputvocab.gz segmentation.ready
-# stdin: the data to segment
-# stdout: the segmented data
-
-if len(sys.argv) < 3:
- print "usage: morfsegment.py inputvocab.gz segmentation.ready [marker]"
- print " stdin: the data to segment"
- print " stdout: the segmented data"
- sys.exit()
-
-#read index:
-split_index={}
-
-marker="##"
-
-if len(sys.argv) > 3:
- marker=sys.argv[3]
-
-word_vocab=gzip.open(sys.argv[1], 'rb') #inputvocab.gz
-seg_vocab=open(sys.argv[2], 'r') #segm.ready..
-
-for seg in seg_vocab:
- #seg = ver# #wonder\n
- #wordline = 1 verwonder\n
- word = word_vocab.readline().strip().split(' ')
- assert(len(word) == 2)
- word = word[1]
- seg=seg.strip()
-
- if seg != word:
- split_index[word] = seg
-
-word_vocab.close()
-seg_vocab.close()
-
-for line in sys.stdin:
- words = line.strip().split()
-
- newsent = []
- for word in words:
- splitword = split_index.get(word, word)
- newsent.append(splitword)
-
- print ' '.join(newsent)
-
diff --git a/gi/morf-segmentation/morftrain.sh b/gi/morf-segmentation/morftrain.sh
deleted file mode 100755
index 9004922f..00000000
--- a/gi/morf-segmentation/morftrain.sh
+++ /dev/null
@@ -1,110 +0,0 @@
-#!/bin/bash
-
-if [[ $# -lt 3 ]]; then
- echo "Trains a morfessor model and places the result in writedir"
- echo
- echo "Usage: `basename $0` corpus_input_file writedir [PPL] [marker] [lines]"
- echo -e "\tcorpus_input_file contains a sentence per line."
- exit 1
-fi
-
-MORFESSOR_DIR="/export/ws10smt/software/morfessor_catmap0.9.2"
-SCRIPT_DIR=$(dirname `readlink -f $0`)
-
-MORFBINDIR="$MORFESSOR_DIR/bin"
-MORFMAKEFILE_TRAIN="$MORFESSOR_DIR/train/Makefile"
-VOCABEXT="$SCRIPT_DIR/vocabextractor.sh"
-
-MARKER="#"
-
-if [[ ! -f $VOCABEXT ]]; then
- echo "$VOCABEXT doesn't exist!"
- exit 1
-fi
-if [[ ! -f $MORFMAKEFILE_TRAIN ]]; then
- echo "$MORFMAKEFILE_TRAIN doesn't exist!"
- exit 1
-fi
-
-
-CORPUS="$1"
-WRITETODIR=$2
-
-if [[ ! -f $CORPUS ]]; then
- echo "$CORPUS doesn't exist!"
- exit 1
-fi
-
-PPL=10
-LINES=0
-if [[ $# -gt 2 ]]; then
- PPL=$3
-fi
-if [[ $# -gt 3 ]]; then
- MARKER="$4"
-fi
-if [[ $# -gt 4 ]]; then
- LINES=$5
-fi
-
-mkdir -p $WRITETODIR
-
-#extract vocabulary to train on
-echo "Extracting vocabulary..."
-if [[ -f $WRITETODIR/inputvocab.gz ]]; then
- echo " ....$WRITETODIR/inputvocab.gz exists, reusing."
-else
- if [[ $LINES -gt 0 ]]; then
- $VOCABEXT $CORPUS $LINES | gzip > $WRITETODIR/inputvocab.gz
- else
- $VOCABEXT $CORPUS | gzip > $WRITETODIR/inputvocab.gz
- fi
-fi
-
-
-#train it
-echo "Training morf model..."
-if [[ -f $WRITETODIR/segmentation.final.gz ]]; then
- echo " ....$WRITETODIR/segmentation.final.gz exists, reusing.."
-else
- OLDPWD=`pwd`
- cd $WRITETODIR
-
- #put the training Makefile in place, with appropriate modifications
- sed -e "s/^GZIPPEDINPUTDATA = .*$/GZIPPEDINPUTDATA = inputvocab.gz/" \
- -e "s/^PPLTHRESH = .*$/PPLTHRESH = $PPL/" \
- -e "s;^BINDIR = .*$;BINDIR = $MORFBINDIR;" \
- $MORFMAKEFILE_TRAIN > ./Makefile
-
- date
- make > ./trainmorf.log 2>&1
- cd $OLDPWD
-
-
- echo "Post processing..."
- #remove comments, counts and morph types
- #mark morphs
-
- if [[ ! -f $WRITETODIR/segmentation.final.gz ]]; then
- echo "Failed to learn segmentation model: $WRITETODIR/segmentation.final.gz not written"
- exit 1
- fi
-
- zcat $WRITETODIR/segmentation.final.gz | \
- awk '$1 !~ /^#/ {print}' | \
- cut -d ' ' --complement -f 1 | \
- sed -e "s/\/...//g" -e "s/ + /$MARKER $MARKER/g" \
- > $WRITETODIR/segmentation.ready
-
- if [[ ! -f $WRITETODIR/segmentation.ready ]]; then
- echo "Failed to learn segmentation model: $WRITETODIR/segmentation.final.gz not written"
- exit 1
- fi
-
-
-
- echo "Done training."
- date
-fi
-echo "Segmentation model is $WRITETODIR/segmentation.ready."
-
diff --git a/gi/morf-segmentation/vocabextractor.sh b/gi/morf-segmentation/vocabextractor.sh
deleted file mode 100755
index 00ae7109..00000000
--- a/gi/morf-segmentation/vocabextractor.sh
+++ /dev/null
@@ -1,40 +0,0 @@
-#!/bin/bash
-
-d=$(dirname `readlink -f $0`)
-if [ $# -lt 1 ]; then
- echo "Extracts unique words and their frequencies from a subset of a corpus."
- echo
- echo "Usage: `basename $0` input_file [number_of_lines] > output_file"
- echo -e "\tinput_file contains a sentence per line."
- echo
- echo "Script also removes words from the vocabulary if they contain a digit or a special character. Output is printed to stdout in a format suitable for use with Morfessor."
- echo
- exit
-fi
-
-srcname=$1
-reallen=0
-
-if [[ $# -gt 1 ]]; then
- reallen=$2
-fi
-
-pattern_file=$d/invalid_vocab.patterns
-
-if [[ ! -f $pattern_file ]]; then
- echo "Pattern file missing"
- exit 1
-fi
-
-#this awk strips entries from the vocabulary if they contain invalid characters
-#invalid characters are digits and punctuation marks, and words beginning or ending with a dash
-#uniq -c extracts the unique words and counts the occurrences
-
-if [[ $reallen -eq 0 ]]; then
- #when a zero is passed, use the whole file
- zcat -f $srcname | sed 's/ /\n/g' | egrep -v -f $pattern_file | sort | uniq -c | sed 's/^ *//'
-
-else
- zcat -f $srcname | head -n $reallen | sed 's/ /\n/g' | egrep -v -f $pattern_file | sort | uniq -c | sed 's/^ *//'
-fi
-
diff --git a/gi/pf/Makefile.am b/gi/pf/Makefile.am
deleted file mode 100644
index 86f8e07b..00000000
--- a/gi/pf/Makefile.am
+++ /dev/null
@@ -1,44 +0,0 @@
-bin_PROGRAMS = cbgi brat dpnaive pfbrat pfdist itg pfnaive condnaive align-lexonly-pyp learn_cfg pyp_lm nuisance_test align-tl pf_test bayes_lattice_score
-
-noinst_LIBRARIES = libpf.a
-
-libpf_a_SOURCES = base_distributions.cc reachability.cc cfg_wfst_composer.cc corpus.cc unigrams.cc ngram_base.cc transliterations.cc backward.cc hpyp_tm.cc pyp_tm.cc
-
-bayes_lattice_score_SOURCES = bayes_lattice_score.cc
-bayes_lattice_score_LDADD = libpf.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz
-
-pf_test_SOURCES = pf_test.cc
-pf_test_LDADD = libpf.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz
-
-nuisance_test_SOURCES = nuisance_test.cc
-nuisance_test_LDADD = libpf.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz
-
-align_lexonly_pyp_SOURCES = align-lexonly-pyp.cc
-align_lexonly_pyp_LDADD = libpf.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz
-
-align_tl_SOURCES = align-tl.cc
-align_tl_LDADD = libpf.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz
-
-itg_SOURCES = itg.cc
-
-pyp_lm_SOURCES = pyp_lm.cc
-
-learn_cfg_SOURCES = learn_cfg.cc
-
-condnaive_SOURCES = condnaive.cc
-
-dpnaive_SOURCES = dpnaive.cc
-
-pfdist_SOURCES = pfdist.cc
-
-pfnaive_SOURCES = pfnaive.cc
-
-cbgi_SOURCES = cbgi.cc
-
-brat_SOURCES = brat.cc
-
-pfbrat_SOURCES = pfbrat.cc
-
-AM_CPPFLAGS = -W -Wall -Wno-sign-compare -funroll-loops -I$(top_srcdir)/utils $(GTEST_CPPFLAGS) -I$(top_srcdir)/decoder -I$(top_srcdir)/klm
-
-AM_LDFLAGS = libpf.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a $(top_srcdir)/utils/libutils.a -lz
diff --git a/gi/pf/README b/gi/pf/README
deleted file mode 100644
index 62e47541..00000000
--- a/gi/pf/README
+++ /dev/null
@@ -1,2 +0,0 @@
-Experimental Bayesian alignment tools. Nothing to see here.
-
diff --git a/gi/pf/align-lexonly-pyp.cc b/gi/pf/align-lexonly-pyp.cc
deleted file mode 100644
index e7509f57..00000000
--- a/gi/pf/align-lexonly-pyp.cc
+++ /dev/null
@@ -1,243 +0,0 @@
-#include <iostream>
-#include <queue>
-
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-
-#include "tdict.h"
-#include "stringlib.h"
-#include "filelib.h"
-#include "array2d.h"
-#include "sampler.h"
-#include "corpus.h"
-#include "pyp_tm.h"
-#include "hpyp_tm.h"
-#include "quasi_model2.h"
-
-using namespace std;
-namespace po = boost::program_options;
-
-void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
- po::options_description opts("Configuration options");
- opts.add_options()
- ("samples,s",po::value<unsigned>()->default_value(1000),"Number of samples")
- ("infer_alignment_hyperparameters,I", "Infer alpha and p_null, otherwise fixed values will be assumed")
- ("p_null,0", po::value<double>()->default_value(0.08), "probability of aligning to null")
- ("align_alpha,a", po::value<double>()->default_value(4.0), "how 'tight' is the bias toward be along the diagonal?")
- ("input,i",po::value<string>(),"Read parallel data from")
- ("random_seed,S",po::value<uint32_t>(), "Random seed");
- po::options_description clo("Command line options");
- clo.add_options()
- ("config", po::value<string>(), "Configuration file")
- ("help,h", "Print this help message and exit");
- po::options_description dconfig_options, dcmdline_options;
- dconfig_options.add(opts);
- dcmdline_options.add(opts).add(clo);
-
- po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
- if (conf->count("config")) {
- ifstream config((*conf)["config"].as<string>().c_str());
- po::store(po::parse_config_file(config, dconfig_options), *conf);
- }
- po::notify(*conf);
-
- if (conf->count("help") || (conf->count("input") == 0)) {
- cerr << dcmdline_options << endl;
- exit(1);
- }
-}
-
-MT19937* prng;
-
-struct LexicalAlignment {
- unsigned char src_index;
- bool is_transliteration;
- vector<pair<short, short> > derivation;
-};
-
-struct AlignedSentencePair {
- vector<WordID> src;
- vector<WordID> trg;
- vector<LexicalAlignment> a;
- Array2D<short> posterior;
-};
-
-template <class LexicalTranslationModel>
-struct Aligner {
- Aligner(const vector<vector<WordID> >& lets,
- int vocab_size,
- int num_letters,
- const po::variables_map& conf,
- vector<AlignedSentencePair>* c) :
- corpus(*c),
- paj_model(conf["align_alpha"].as<double>(), conf["p_null"].as<double>()),
- infer_paj(conf.count("infer_alignment_hyperparameters") > 0),
- model(lets, vocab_size, num_letters),
- kNULL(TD::Convert("NULL")) {
- assert(lets[kNULL].size() == 0);
- }
-
- vector<AlignedSentencePair>& corpus;
- QuasiModel2 paj_model;
- const bool infer_paj;
- LexicalTranslationModel model;
- const WordID kNULL;
-
- void ResampleHyperparameters() {
- model.ResampleHyperparameters(prng);
- if (infer_paj) paj_model.ResampleHyperparameters(prng);
- }
-
- void InitializeRandom() {
- cerr << "Initializing with random alignments ...\n";
- for (unsigned i = 0; i < corpus.size(); ++i) {
- AlignedSentencePair& asp = corpus[i];
- asp.a.resize(asp.trg.size());
- for (unsigned j = 0; j < asp.trg.size(); ++j) {
- unsigned char& a_j = asp.a[j].src_index;
- a_j = prng->next() * (1 + asp.src.size());
- const WordID f_a_j = (a_j ? asp.src[a_j - 1] : kNULL);
- model.Increment(f_a_j, asp.trg[j], &*prng);
- paj_model.Increment(a_j, j, asp.src.size(), asp.trg.size());
- }
- }
- cerr << "Corpus intialized randomly." << endl;
- cerr << "LLH = " << Likelihood() << " \t(Amodel=" << paj_model.Likelihood()
- << " TModel=" << model.Likelihood() << ") contexts=" << model.UniqueConditioningContexts() << endl;
- }
-
- void ResampleCorpus() {
- for (unsigned i = 0; i < corpus.size(); ++i) {
- AlignedSentencePair& asp = corpus[i];
- SampleSet<prob_t> ss; ss.resize(asp.src.size() + 1);
- for (unsigned j = 0; j < asp.trg.size(); ++j) {
- unsigned char& a_j = asp.a[j].src_index;
- const WordID e_j = asp.trg[j];
- WordID f_a_j = (a_j ? asp.src[a_j - 1] : kNULL);
- model.Decrement(f_a_j, e_j, prng);
- paj_model.Decrement(a_j, j, asp.src.size(), asp.trg.size());
-
- for (unsigned prop_a_j = 0; prop_a_j <= asp.src.size(); ++prop_a_j) {
- const WordID prop_f = (prop_a_j ? asp.src[prop_a_j - 1] : kNULL);
- ss[prop_a_j] = model.Prob(prop_f, e_j);
- ss[prop_a_j] *= paj_model.Prob(prop_a_j, j, asp.src.size(), asp.trg.size());
- }
- a_j = prng->SelectSample(ss);
- f_a_j = (a_j ? asp.src[a_j - 1] : kNULL);
- model.Increment(f_a_j, e_j, prng);
- paj_model.Increment(a_j, j, asp.src.size(), asp.trg.size());
- }
- }
- }
-
- prob_t Likelihood() const {
- return model.Likelihood() * paj_model.Likelihood();
- }
-};
-
-void ExtractLetters(const set<WordID>& v, vector<vector<WordID> >* l, set<WordID>* letset = NULL) {
- for (set<WordID>::const_iterator it = v.begin(); it != v.end(); ++it) {
- vector<WordID>& letters = (*l)[*it];
- if (letters.size()) continue; // if e and f have the same word
-
- const string& w = TD::Convert(*it);
-
- size_t cur = 0;
- while (cur < w.size()) {
- const size_t len = UTF8Len(w[cur]);
- letters.push_back(TD::Convert(w.substr(cur, len)));
- if (letset) letset->insert(letters.back());
- cur += len;
- }
- }
-}
-
-void Debug(const AlignedSentencePair& asp) {
- cerr << TD::GetString(asp.src) << endl << TD::GetString(asp.trg) << endl;
- Array2D<bool> a(asp.src.size(), asp.trg.size());
- for (unsigned j = 0; j < asp.trg.size(); ++j) {
- assert(asp.a[j].src_index <= asp.src.size());
- if (asp.a[j].src_index) a(asp.a[j].src_index - 1, j) = true;
- }
- cerr << a << endl;
-}
-
-void AddSample(AlignedSentencePair* asp) {
- for (unsigned j = 0; j < asp->trg.size(); ++j)
- asp->posterior(asp->a[j].src_index, j)++;
-}
-
-void WriteAlignments(const AlignedSentencePair& asp) {
- bool first = true;
- for (unsigned j = 0; j < asp.trg.size(); ++j) {
- int src_index = -1;
- int mc = -1;
- for (unsigned i = 0; i <= asp.src.size(); ++i) {
- if (asp.posterior(i, j) > mc) {
- mc = asp.posterior(i, j);
- src_index = i;
- }
- }
-
- if (src_index) {
- if (first) first = false; else cout << ' ';
- cout << (src_index - 1) << '-' << j;
- }
- }
- cout << endl;
-}
-
-int main(int argc, char** argv) {
- po::variables_map conf;
- InitCommandLine(argc, argv, &conf);
-
- if (conf.count("random_seed"))
- prng = new MT19937(conf["random_seed"].as<uint32_t>());
- else
- prng = new MT19937;
-
- vector<vector<int> > corpuse, corpusf;
- set<int> vocabe, vocabf;
- corpus::ReadParallelCorpus(conf["input"].as<string>(), &corpusf, &corpuse, &vocabf, &vocabe);
- cerr << "f-Corpus size: " << corpusf.size() << " sentences\n";
- cerr << "f-Vocabulary size: " << vocabf.size() << " types\n";
- cerr << "f-Corpus size: " << corpuse.size() << " sentences\n";
- cerr << "f-Vocabulary size: " << vocabe.size() << " types\n";
- assert(corpusf.size() == corpuse.size());
-
- vector<AlignedSentencePair> corpus(corpuse.size());
- for (unsigned i = 0; i < corpuse.size(); ++i) {
- corpus[i].src.swap(corpusf[i]);
- corpus[i].trg.swap(corpuse[i]);
- corpus[i].posterior.resize(corpus[i].src.size() + 1, corpus[i].trg.size());
- }
- corpusf.clear(); corpuse.clear();
-
- vocabf.insert(TD::Convert("NULL"));
- vector<vector<WordID> > letters(TD::NumWords());
- set<WordID> letset;
- ExtractLetters(vocabe, &letters, &letset);
- ExtractLetters(vocabf, &letters, NULL);
- letters[TD::Convert("NULL")].clear();
-
- //Aligner<PYPLexicalTranslation> aligner(letters, vocabe.size(), letset.size(), conf, &corpus);
- Aligner<HPYPLexicalTranslation> aligner(letters, vocabe.size(), letset.size(), conf, &corpus);
- aligner.InitializeRandom();
-
- const unsigned samples = conf["samples"].as<unsigned>();
- for (int i = 0; i < samples; ++i) {
- for (int j = 65; j < 67; ++j) Debug(corpus[j]);
- if (i % 10 == 9) {
- aligner.ResampleHyperparameters();
- cerr << "LLH = " << aligner.Likelihood() << " \t(Amodel=" << aligner.paj_model.Likelihood()
- << " TModel=" << aligner.model.Likelihood() << ") contexts=" << aligner.model.UniqueConditioningContexts() << endl;
- }
- aligner.ResampleCorpus();
- if (i > (samples / 5) && (i % 6 == 5)) for (int j = 0; j < corpus.size(); ++j) AddSample(&corpus[j]);
- }
- for (unsigned i = 0; i < corpus.size(); ++i)
- WriteAlignments(corpus[i]);
- aligner.model.Summary();
-
- return 0;
-}
diff --git a/gi/pf/align-tl.cc b/gi/pf/align-tl.cc
deleted file mode 100644
index f6608f1d..00000000
--- a/gi/pf/align-tl.cc
+++ /dev/null
@@ -1,339 +0,0 @@
-#include <iostream>
-#include <tr1/memory>
-#include <queue>
-
-#include <boost/multi_array.hpp>
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-
-#include "backward.h"
-#include "array2d.h"
-#include "base_distributions.h"
-#include "monotonic_pseg.h"
-#include "conditional_pseg.h"
-#include "trule.h"
-#include "tdict.h"
-#include "stringlib.h"
-#include "filelib.h"
-#include "dict.h"
-#include "sampler.h"
-#include "mfcr.h"
-#include "corpus.h"
-#include "ngram_base.h"
-#include "transliterations.h"
-
-using namespace std;
-using namespace tr1;
-namespace po = boost::program_options;
-
-void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
- po::options_description opts("Configuration options");
- opts.add_options()
- ("samples,s",po::value<unsigned>()->default_value(1000),"Number of samples")
- ("input,i",po::value<string>(),"Read parallel data from")
- ("s2t", po::value<string>(), "character level source-to-target prior transliteration probabilities")
- ("t2s", po::value<string>(), "character level target-to-source prior transliteration probabilities")
- ("max_src_chunk", po::value<unsigned>()->default_value(4), "Maximum size of translitered chunk in source")
- ("max_trg_chunk", po::value<unsigned>()->default_value(4), "Maximum size of translitered chunk in target")
- ("expected_src_to_trg_ratio", po::value<double>()->default_value(1.0), "If a word is transliterated, what is the expected length ratio from source to target?")
- ("random_seed,S",po::value<uint32_t>(), "Random seed");
- po::options_description clo("Command line options");
- clo.add_options()
- ("config", po::value<string>(), "Configuration file")
- ("help,h", "Print this help message and exit");
- po::options_description dconfig_options, dcmdline_options;
- dconfig_options.add(opts);
- dcmdline_options.add(opts).add(clo);
-
- po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
- if (conf->count("config")) {
- ifstream config((*conf)["config"].as<string>().c_str());
- po::store(po::parse_config_file(config, dconfig_options), *conf);
- }
- po::notify(*conf);
-
- if (conf->count("help") || (conf->count("input") == 0)) {
- cerr << dcmdline_options << endl;
- exit(1);
- }
-}
-
-boost::shared_ptr<MT19937> prng;
-
-struct LexicalAlignment {
- unsigned char src_index;
- bool is_transliteration;
- vector<pair<short, short> > derivation;
-};
-
-struct AlignedSentencePair {
- vector<WordID> src;
- vector<WordID> trg;
- vector<LexicalAlignment> a;
- Array2D<short> posterior;
-};
-
-struct HierarchicalWordBase {
- explicit HierarchicalWordBase(const unsigned vocab_e_size) :
- base(prob_t::One()), r(1,1,1,1,0.66,50.0), u0(-log(vocab_e_size)), l(1,prob_t::One()), v(1, prob_t::Zero()) {}
-
- void ResampleHyperparameters(MT19937* rng) {
- r.resample_hyperparameters(rng);
- }
-
- inline double logp0(const vector<WordID>& s) const {
- return Md::log_poisson(s.size(), 7.5) + s.size() * u0;
- }
-
- // return p0 of rule.e_
- prob_t operator()(const TRule& rule) const {
- v[0].logeq(logp0(rule.e_));
- return r.prob(rule.e_, v.begin(), l.begin());
- }
-
- void Increment(const TRule& rule) {
- v[0].logeq(logp0(rule.e_));
- if (r.increment(rule.e_, v.begin(), l.begin(), &*prng).count) {
- base *= v[0] * l[0];
- }
- }
-
- void Decrement(const TRule& rule) {
- if (r.decrement(rule.e_, &*prng).count) {
- base /= prob_t(exp(logp0(rule.e_)));
- }
- }
-
- prob_t Likelihood() const {
- prob_t p; p.logeq(r.log_crp_prob());
- p *= base;
- return p;
- }
-
- void Summary() const {
- cerr << "NUMBER OF CUSTOMERS: " << r.num_customers() << " (d=" << r.discount() << ",s=" << r.strength() << ')' << endl;
- for (MFCR<1,vector<WordID> >::const_iterator it = r.begin(); it != r.end(); ++it)
- cerr << " " << it->second.total_dish_count_ << " (on " << it->second.table_counts_.size() << " tables) " << TD::GetString(it->first) << endl;
- }
-
- prob_t base;
- MFCR<1,vector<WordID> > r;
- const double u0;
- const vector<prob_t> l;
- mutable vector<prob_t> v;
-};
-
-struct BasicLexicalAlignment {
- explicit BasicLexicalAlignment(const vector<vector<WordID> >& lets,
- const unsigned words_e,
- const unsigned letters_e,
- vector<AlignedSentencePair>* corp) :
- letters(lets),
- corpus(*corp),
- //up0(words_e),
- //up0("en.chars.1gram", letters_e),
- //up0("en.words.1gram"),
- up0(letters_e),
- //up0("en.chars.2gram"),
- tmodel(up0) {
- }
-
- void InstantiateRule(const WordID src,
- const WordID trg,
- TRule* rule) const {
- static const WordID kX = TD::Convert("X") * -1;
- rule->lhs_ = kX;
- rule->e_ = letters[trg];
- rule->f_ = letters[src];
- }
-
- void InitializeRandom() {
- const WordID kNULL = TD::Convert("NULL");
- cerr << "Initializing with random alignments ...\n";
- for (unsigned i = 0; i < corpus.size(); ++i) {
- AlignedSentencePair& asp = corpus[i];
- asp.a.resize(asp.trg.size());
- for (unsigned j = 0; j < asp.trg.size(); ++j) {
- const unsigned char a_j = prng->next() * (1 + asp.src.size());
- const WordID f_a_j = (a_j ? asp.src[a_j - 1] : kNULL);
- TRule r;
- InstantiateRule(f_a_j, asp.trg[j], &r);
- asp.a[j].is_transliteration = false;
- asp.a[j].src_index = a_j;
- if (tmodel.IncrementRule(r, &*prng))
- up0.Increment(r);
- }
- }
- cerr << " LLH = " << Likelihood() << endl;
- }
-
- prob_t Likelihood() const {
- prob_t p = tmodel.Likelihood();
- p *= up0.Likelihood();
- return p;
- }
-
- void ResampleHyperparemeters() {
- tmodel.ResampleHyperparameters(&*prng);
- up0.ResampleHyperparameters(&*prng);
- cerr << " (base d=" << up0.r.discount() << ",s=" << up0.r.strength() << ")\n";
- }
-
- void ResampleCorpus();
-
- const vector<vector<WordID> >& letters; // spelling dictionary
- vector<AlignedSentencePair>& corpus;
- //PhraseConditionalUninformativeBase up0;
- //PhraseConditionalUninformativeUnigramBase up0;
- //UnigramWordBase up0;
- //HierarchicalUnigramBase up0;
- HierarchicalWordBase up0;
- //CompletelyUniformBase up0;
- //FixedNgramBase up0;
- //ConditionalTranslationModel<PhraseConditionalUninformativeBase> tmodel;
- //ConditionalTranslationModel<PhraseConditionalUninformativeUnigramBase> tmodel;
- //ConditionalTranslationModel<UnigramWordBase> tmodel;
- //ConditionalTranslationModel<HierarchicalUnigramBase> tmodel;
- MConditionalTranslationModel<HierarchicalWordBase> tmodel;
- //ConditionalTranslationModel<FixedNgramBase> tmodel;
- //ConditionalTranslationModel<CompletelyUniformBase> tmodel;
-};
-
-void BasicLexicalAlignment::ResampleCorpus() {
- static const WordID kNULL = TD::Convert("NULL");
- for (unsigned i = 0; i < corpus.size(); ++i) {
- AlignedSentencePair& asp = corpus[i];
- SampleSet<prob_t> ss; ss.resize(asp.src.size() + 1);
- for (unsigned j = 0; j < asp.trg.size(); ++j) {
- TRule r;
- unsigned char& a_j = asp.a[j].src_index;
- WordID f_a_j = (a_j ? asp.src[a_j - 1] : kNULL);
- InstantiateRule(f_a_j, asp.trg[j], &r);
- if (tmodel.DecrementRule(r, &*prng))
- up0.Decrement(r);
-
- for (unsigned prop_a_j = 0; prop_a_j <= asp.src.size(); ++prop_a_j) {
- const WordID prop_f = (prop_a_j ? asp.src[prop_a_j - 1] : kNULL);
- InstantiateRule(prop_f, asp.trg[j], &r);
- ss[prop_a_j] = tmodel.RuleProbability(r);
- }
- a_j = prng->SelectSample(ss);
- f_a_j = (a_j ? asp.src[a_j - 1] : kNULL);
- InstantiateRule(f_a_j, asp.trg[j], &r);
- if (tmodel.IncrementRule(r, &*prng))
- up0.Increment(r);
- }
- }
- cerr << " LLH = " << Likelihood() << endl;
-}
-
-void ExtractLetters(const set<WordID>& v, vector<vector<WordID> >* l, set<WordID>* letset = NULL) {
- for (set<WordID>::const_iterator it = v.begin(); it != v.end(); ++it) {
- vector<WordID>& letters = (*l)[*it];
- if (letters.size()) continue; // if e and f have the same word
-
- const string& w = TD::Convert(*it);
-
- size_t cur = 0;
- while (cur < w.size()) {
- const size_t len = UTF8Len(w[cur]);
- letters.push_back(TD::Convert(w.substr(cur, len)));
- if (letset) letset->insert(letters.back());
- cur += len;
- }
- }
-}
-
-void Debug(const AlignedSentencePair& asp) {
- cerr << TD::GetString(asp.src) << endl << TD::GetString(asp.trg) << endl;
- Array2D<bool> a(asp.src.size(), asp.trg.size());
- for (unsigned j = 0; j < asp.trg.size(); ++j)
- if (asp.a[j].src_index) a(asp.a[j].src_index - 1, j) = true;
- cerr << a << endl;
-}
-
-void AddSample(AlignedSentencePair* asp) {
- for (unsigned j = 0; j < asp->trg.size(); ++j)
- asp->posterior(asp->a[j].src_index, j)++;
-}
-
-void WriteAlignments(const AlignedSentencePair& asp) {
- bool first = true;
- for (unsigned j = 0; j < asp.trg.size(); ++j) {
- int src_index = -1;
- int mc = -1;
- for (unsigned i = 0; i <= asp.src.size(); ++i) {
- if (asp.posterior(i, j) > mc) {
- mc = asp.posterior(i, j);
- src_index = i;
- }
- }
-
- if (src_index) {
- if (first) first = false; else cout << ' ';
- cout << (src_index - 1) << '-' << j;
- }
- }
- cout << endl;
-}
-
-int main(int argc, char** argv) {
- po::variables_map conf;
- InitCommandLine(argc, argv, &conf);
-
- if (conf.count("random_seed"))
- prng.reset(new MT19937(conf["random_seed"].as<uint32_t>()));
- else
- prng.reset(new MT19937);
-// MT19937& rng = *prng;
-
- vector<vector<int> > corpuse, corpusf;
- set<int> vocabe, vocabf;
- corpus::ReadParallelCorpus(conf["input"].as<string>(), &corpusf, &corpuse, &vocabf, &vocabe);
- cerr << "f-Corpus size: " << corpusf.size() << " sentences\n";
- cerr << "f-Vocabulary size: " << vocabf.size() << " types\n";
- cerr << "f-Corpus size: " << corpuse.size() << " sentences\n";
- cerr << "f-Vocabulary size: " << vocabe.size() << " types\n";
- assert(corpusf.size() == corpuse.size());
-
- vector<AlignedSentencePair> corpus(corpuse.size());
- for (unsigned i = 0; i < corpuse.size(); ++i) {
- corpus[i].src.swap(corpusf[i]);
- corpus[i].trg.swap(corpuse[i]);
- corpus[i].posterior.resize(corpus[i].src.size() + 1, corpus[i].trg.size());
- }
- corpusf.clear(); corpuse.clear();
-
- vocabf.insert(TD::Convert("NULL"));
- vector<vector<WordID> > letters(TD::NumWords() + 1);
- set<WordID> letset;
- ExtractLetters(vocabe, &letters, &letset);
- ExtractLetters(vocabf, &letters, NULL);
- letters[TD::Convert("NULL")].clear();
-
- // TODO configure this
- const int max_src_chunk = conf["max_src_chunk"].as<unsigned>();
- const int max_trg_chunk = conf["max_trg_chunk"].as<unsigned>();
- const double s2t_rat = conf["expected_src_to_trg_ratio"].as<double>();
- const BackwardEstimator be(conf["s2t"].as<string>(), conf["t2s"].as<string>());
- Transliterations tl(max_src_chunk, max_trg_chunk, s2t_rat, be);
-
- cerr << "Initializing transliteration graph structures ...\n";
- for (int i = 0; i < corpus.size(); ++i) {
- const vector<int>& src = corpus[i].src;
- const vector<int>& trg = corpus[i].trg;
- for (int j = 0; j < src.size(); ++j) {
- const vector<int>& src_let = letters[src[j]];
- for (int k = 0; k < trg.size(); ++k) {
- const vector<int>& trg_let = letters[trg[k]];
- tl.Initialize(src[j], src_let, trg[k], trg_let);
- //if (src_let.size() < min_trans_src)
- // tl.Forbid(src[j], src_let, trg[k], trg_let);
- }
- }
- }
- cerr << endl;
- tl.GraphSummary();
-
- return 0;
-}
diff --git a/gi/pf/backward.cc b/gi/pf/backward.cc
deleted file mode 100644
index b92629fd..00000000
--- a/gi/pf/backward.cc
+++ /dev/null
@@ -1,89 +0,0 @@
-#include "backward.h"
-
-#include <queue>
-#include <utility>
-
-#include "array2d.h"
-#include "reachability.h"
-#include "base_distributions.h"
-
-using namespace std;
-
-BackwardEstimator::BackwardEstimator(const string& s2t,
- const string& t2s) : m1(new Model1(s2t)), m1inv(new Model1(t2s)) {}
-
-BackwardEstimator::~BackwardEstimator() {
- delete m1; m1 = NULL;
- delete m1inv; m1inv = NULL;
-}
-
-float BackwardEstimator::ComputeBackwardProb(const std::vector<WordID>& src,
- const std::vector<WordID>& trg,
- unsigned src_covered,
- unsigned trg_covered,
- double s2t_ratio) const {
- if (src_covered == src.size() || trg_covered == trg.size()) {
- assert(src_covered == src.size());
- assert(trg_covered == trg.size());
- return 0;
- }
- static const WordID kNULL = TD::Convert("<eps>");
- const prob_t uniform_alignment(1.0 / (src.size() - src_covered + 1));
- // TODO factor in expected length ratio
- prob_t e; e.logeq(Md::log_poisson(trg.size() - trg_covered, (src.size() - src_covered) * s2t_ratio)); // p(trg len remaining | src len remaining)
- for (unsigned j = trg_covered; j < trg.size(); ++j) {
- prob_t p = (*m1)(kNULL, trg[j]) + prob_t(1e-12);
- for (unsigned i = src_covered; i < src.size(); ++i)
- p += (*m1)(src[i], trg[j]);
- if (p.is_0()) {
- cerr << "ERROR: p(" << TD::Convert(trg[j]) << " | " << TD::GetString(src) << ") = 0!\n";
- assert(!"failed");
- }
- p *= uniform_alignment;
- e *= p;
- }
- // TODO factor in expected length ratio
- const prob_t inv_uniform(1.0 / (trg.size() - trg_covered + 1.0));
- prob_t inv;
- inv.logeq(Md::log_poisson(src.size() - src_covered, (trg.size() - trg_covered) / s2t_ratio));
- for (unsigned i = src_covered; i < src.size(); ++i) {
- prob_t p = (*m1inv)(kNULL, src[i]) + prob_t(1e-12);
- for (unsigned j = trg_covered; j < trg.size(); ++j)
- p += (*m1inv)(trg[j], src[i]);
- if (p.is_0()) {
- cerr << "ERROR: p_inv(" << TD::Convert(src[i]) << " | " << TD::GetString(trg) << ") = 0!\n";
- assert(!"failed");
- }
- p *= inv_uniform;
- inv *= p;
- }
- return (log(e) + log(inv)) / 2;
-}
-
-void BackwardEstimator::InitializeGrid(const vector<WordID>& src,
- const vector<WordID>& trg,
- const Reachability& r,
- double s2t_ratio,
- float* grid) const {
- queue<pair<int,int> > q;
- q.push(make_pair(0,0));
- Array2D<bool> done(src.size()+1, trg.size()+1, false);
- //cerr << TD::GetString(src) << " ||| " << TD::GetString(trg) << endl;
- while(!q.empty()) {
- const pair<int,int> n = q.front();
- q.pop();
- if (done(n.first,n.second)) continue;
- done(n.first,n.second) = true;
-
- float lp = ComputeBackwardProb(src, trg, n.first, n.second, s2t_ratio);
- if (n.first == 0 && n.second == 0) grid[0] = lp;
- //cerr << " " << n.first << "," << n.second << "\t" << lp << endl;
-
- if (n.first == src.size() || n.second == trg.size()) continue;
- const vector<pair<short,short> >& edges = r.valid_deltas[n.first][n.second];
- for (int i = 0; i < edges.size(); ++i)
- q.push(make_pair(n.first + edges[i].first, n.second + edges[i].second));
- }
- //static int cc = 0; ++cc; if (cc == 80) exit(1);
-}
-
diff --git a/gi/pf/backward.h b/gi/pf/backward.h
deleted file mode 100644
index e67eff0c..00000000
--- a/gi/pf/backward.h
+++ /dev/null
@@ -1,33 +0,0 @@
-#ifndef _BACKWARD_H_
-#define _BACKWARD_H_
-
-#include <vector>
-#include <string>
-#include "wordid.h"
-
-struct Reachability;
-struct Model1;
-
-struct BackwardEstimator {
- BackwardEstimator(const std::string& s2t,
- const std::string& t2s);
- ~BackwardEstimator();
-
- void InitializeGrid(const std::vector<WordID>& src,
- const std::vector<WordID>& trg,
- const Reachability& r,
- double src2trg_ratio,
- float* grid) const;
-
- private:
- float ComputeBackwardProb(const std::vector<WordID>& src,
- const std::vector<WordID>& trg,
- unsigned src_covered,
- unsigned trg_covered,
- double src2trg_ratio) const;
-
- Model1* m1;
- Model1* m1inv;
-};
-
-#endif
diff --git a/gi/pf/base_distributions.cc b/gi/pf/base_distributions.cc
deleted file mode 100644
index d9761005..00000000
--- a/gi/pf/base_distributions.cc
+++ /dev/null
@@ -1,241 +0,0 @@
-#include "base_distributions.h"
-
-#include <iostream>
-
-#include "filelib.h"
-
-using namespace std;
-
-TableLookupBase::TableLookupBase(const string& fname) {
- cerr << "TableLookupBase reading from " << fname << " ..." << endl;
- ReadFile rf(fname);
- istream& in = *rf.stream();
- string line;
- unsigned lc = 0;
- const WordID kDIV = TD::Convert("|||");
- vector<WordID> tmp;
- vector<int> le, lf;
- TRule x;
- x.lhs_ = -TD::Convert("X");
- bool flag = false;
- while(getline(in, line)) {
- ++lc;
- if (lc % 1000000 == 0) { cerr << " [" << lc << ']' << endl; flag = false; }
- else if (lc % 25000 == 0) { cerr << '.' << flush; flag = true; }
- tmp.clear();
- TD::ConvertSentence(line, &tmp);
- x.f_.clear();
- x.e_.clear();
- size_t pos = 0;
- int cc = 0;
- while(pos < tmp.size()) {
- const WordID cur = tmp[pos++];
- if (cur == kDIV) {
- ++cc;
- } else if (cc == 0) {
- x.f_.push_back(cur);
- } else if (cc == 1) {
- x.e_.push_back(cur);
- } else if (cc == 2) {
- table[x].logeq(atof(TD::Convert(cur)));
- ++cc;
- } else {
- if (flag) cerr << endl;
- cerr << "Bad format in " << lc << ": " << line << endl; abort();
- }
- }
- if (cc != 3) {
- if (flag) cerr << endl;
- cerr << "Bad format in " << lc << ": " << line << endl; abort();
- }
- }
- if (flag) cerr << endl;
- cerr << " read " << lc << " entries\n";
-}
-
-prob_t PhraseConditionalUninformativeUnigramBase::p0(const vector<WordID>& vsrc,
- const vector<WordID>& vtrg,
- int start_src, int start_trg) const {
- const int flen = vsrc.size() - start_src;
- const int elen = vtrg.size() - start_trg;
- prob_t p;
- p.logeq(Md::log_poisson(elen, flen + 0.01)); // elen | flen ~Pois(flen + 0.01)
- //p.logeq(log_poisson(elen, 1)); // elen | flen ~Pois(flen + 0.01)
- for (int i = 0; i < elen; ++i)
- p *= u(vtrg[i + start_trg]); // draw e_i ~Uniform
- return p;
-}
-
-prob_t PhraseConditionalUninformativeBase::p0(const vector<WordID>& vsrc,
- const vector<WordID>& vtrg,
- int start_src, int start_trg) const {
- const int flen = vsrc.size() - start_src;
- const int elen = vtrg.size() - start_trg;
- prob_t p;
- //p.logeq(log_poisson(elen, flen + 0.01)); // elen | flen ~Pois(flen + 0.01)
- p.logeq(Md::log_poisson(elen, 1)); // elen | flen ~Pois(flen + 0.01)
- for (int i = 0; i < elen; ++i)
- p *= kUNIFORM_TARGET; // draw e_i ~Uniform
- return p;
-}
-
-void Model1::LoadModel1(const string& fname) {
- cerr << "Loading Model 1 parameters from " << fname << " ..." << endl;
- ReadFile rf(fname);
- istream& in = *rf.stream();
- string line;
- unsigned lc = 0;
- while(getline(in, line)) {
- ++lc;
- int cur = 0;
- int start = 0;
- while(cur < line.size() && line[cur] != ' ') { ++cur; }
- assert(cur != line.size());
- line[cur] = 0;
- const WordID src = TD::Convert(&line[0]);
- ++cur;
- start = cur;
- while(cur < line.size() && line[cur] != ' ') { ++cur; }
- assert(cur != line.size());
- line[cur] = 0;
- WordID trg = TD::Convert(&line[start]);
- const double logprob = strtod(&line[cur + 1], NULL);
- if (src >= ttable.size()) ttable.resize(src + 1);
- ttable[src][trg].logeq(logprob);
- }
- cerr << " read " << lc << " parameters.\n";
-}
-
-prob_t PhraseConditionalBase::p0(const vector<WordID>& vsrc,
- const vector<WordID>& vtrg,
- int start_src, int start_trg) const {
- const int flen = vsrc.size() - start_src;
- const int elen = vtrg.size() - start_trg;
- prob_t uniform_src_alignment; uniform_src_alignment.logeq(-log(flen + 1));
- prob_t p;
- p.logeq(Md::log_poisson(elen, flen + 0.01)); // elen | flen ~Pois(flen + 0.01)
- for (int i = 0; i < elen; ++i) { // for each position i in e-RHS
- const WordID trg = vtrg[i + start_trg];
- prob_t tp = prob_t::Zero();
- for (int j = -1; j < flen; ++j) {
- const WordID src = j < 0 ? 0 : vsrc[j + start_src];
- tp += kM1MIXTURE * model1(src, trg);
- tp += kUNIFORM_MIXTURE * kUNIFORM_TARGET;
- }
- tp *= uniform_src_alignment; // draw a_i ~uniform
- p *= tp; // draw e_i ~Model1(f_a_i) / uniform
- }
- if (p.is_0()) {
- cerr << "Zero! " << vsrc << "\nTRG=" << vtrg << endl;
- abort();
- }
- return p;
-}
-
-prob_t PhraseJointBase::p0(const vector<WordID>& vsrc,
- const vector<WordID>& vtrg,
- int start_src, int start_trg) const {
- const int flen = vsrc.size() - start_src;
- const int elen = vtrg.size() - start_trg;
- prob_t uniform_src_alignment; uniform_src_alignment.logeq(-log(flen + 1));
- prob_t p;
- p.logeq(Md::log_poisson(flen, 1.0)); // flen ~Pois(1)
- // elen | flen ~Pois(flen + 0.01)
- prob_t ptrglen; ptrglen.logeq(Md::log_poisson(elen, flen + 0.01));
- p *= ptrglen;
- p *= kUNIFORM_SOURCE.pow(flen); // each f in F ~Uniform
- for (int i = 0; i < elen; ++i) { // for each position i in E
- const WordID trg = vtrg[i + start_trg];
- prob_t tp = prob_t::Zero();
- for (int j = -1; j < flen; ++j) {
- const WordID src = j < 0 ? 0 : vsrc[j + start_src];
- tp += kM1MIXTURE * model1(src, trg);
- tp += kUNIFORM_MIXTURE * kUNIFORM_TARGET;
- }
- tp *= uniform_src_alignment; // draw a_i ~uniform
- p *= tp; // draw e_i ~Model1(f_a_i) / uniform
- }
- if (p.is_0()) {
- cerr << "Zero! " << vsrc << "\nTRG=" << vtrg << endl;
- abort();
- }
- return p;
-}
-
-prob_t PhraseJointBase_BiDir::p0(const vector<WordID>& vsrc,
- const vector<WordID>& vtrg,
- int start_src, int start_trg) const {
- const int flen = vsrc.size() - start_src;
- const int elen = vtrg.size() - start_trg;
- prob_t uniform_src_alignment; uniform_src_alignment.logeq(-log(flen + 1));
- prob_t uniform_trg_alignment; uniform_trg_alignment.logeq(-log(elen + 1));
-
- prob_t p1;
- p1.logeq(Md::log_poisson(flen, 1.0)); // flen ~Pois(1)
- // elen | flen ~Pois(flen + 0.01)
- prob_t ptrglen; ptrglen.logeq(Md::log_poisson(elen, flen + 0.01));
- p1 *= ptrglen;
- p1 *= kUNIFORM_SOURCE.pow(flen); // each f in F ~Uniform
- for (int i = 0; i < elen; ++i) { // for each position i in E
- const WordID trg = vtrg[i + start_trg];
- prob_t tp = prob_t::Zero();
- for (int j = -1; j < flen; ++j) {
- const WordID src = j < 0 ? 0 : vsrc[j + start_src];
- tp += kM1MIXTURE * model1(src, trg);
- tp += kUNIFORM_MIXTURE * kUNIFORM_TARGET;
- }
- tp *= uniform_src_alignment; // draw a_i ~uniform
- p1 *= tp; // draw e_i ~Model1(f_a_i) / uniform
- }
- if (p1.is_0()) {
- cerr << "Zero! " << vsrc << "\nTRG=" << vtrg << endl;
- abort();
- }
-
- prob_t p2;
- p2.logeq(Md::log_poisson(elen, 1.0)); // elen ~Pois(1)
- // flen | elen ~Pois(flen + 0.01)
- prob_t psrclen; psrclen.logeq(Md::log_poisson(flen, elen + 0.01));
- p2 *= psrclen;
- p2 *= kUNIFORM_TARGET.pow(elen); // each f in F ~Uniform
- for (int i = 0; i < flen; ++i) { // for each position i in E
- const WordID src = vsrc[i + start_src];
- prob_t tp = prob_t::Zero();
- for (int j = -1; j < elen; ++j) {
- const WordID trg = j < 0 ? 0 : vtrg[j + start_trg];
- tp += kM1MIXTURE * invmodel1(trg, src);
- tp += kUNIFORM_MIXTURE * kUNIFORM_SOURCE;
- }
- tp *= uniform_trg_alignment; // draw a_i ~uniform
- p2 *= tp; // draw e_i ~Model1(f_a_i) / uniform
- }
- if (p2.is_0()) {
- cerr << "Zero! " << vsrc << "\nTRG=" << vtrg << endl;
- abort();
- }
-
- static const prob_t kHALF(0.5);
- return (p1 + p2) * kHALF;
-}
-
-JumpBase::JumpBase() : p(200) {
- for (unsigned src_len = 1; src_len < 200; ++src_len) {
- map<int, prob_t>& cpd = p[src_len];
- int min_jump = 1 - src_len;
- int max_jump = src_len;
- prob_t z;
- for (int j = min_jump; j <= max_jump; ++j) {
- prob_t& cp = cpd[j];
- if (j < 0)
- cp.logeq(Md::log_poisson(1.5-j, 1));
- else if (j > 0)
- cp.logeq(Md::log_poisson(j, 1));
- cp.poweq(0.2);
- z += cp;
- }
- for (int j = min_jump; j <= max_jump; ++j) {
- cpd[j] /= z;
- }
- }
-}
-
diff --git a/gi/pf/base_distributions.h b/gi/pf/base_distributions.h
deleted file mode 100644
index 41b513f8..00000000
--- a/gi/pf/base_distributions.h
+++ /dev/null
@@ -1,238 +0,0 @@
-#ifndef _BASE_MEASURES_H_
-#define _BASE_MEASURES_H_
-
-#include <vector>
-#include <map>
-#include <string>
-#include <cmath>
-#include <iostream>
-#include <cassert>
-
-#include "unigrams.h"
-#include "trule.h"
-#include "prob.h"
-#include "tdict.h"
-#include "sampler.h"
-#include "m.h"
-#include "os_phrase.h"
-
-struct Model1 {
- explicit Model1(const std::string& fname) :
- kNULL(TD::Convert("<eps>")),
- kZERO() {
- LoadModel1(fname);
- }
-
- void LoadModel1(const std::string& fname);
-
- // returns prob 0 if src or trg is not found
- const prob_t& operator()(WordID src, WordID trg) const {
- if (src == 0) src = kNULL;
- if (src < ttable.size()) {
- const std::map<WordID, prob_t>& cpd = ttable[src];
- const std::map<WordID, prob_t>::const_iterator it = cpd.find(trg);
- if (it != cpd.end())
- return it->second;
- }
- return kZERO;
- }
-
- const WordID kNULL;
- const prob_t kZERO;
- std::vector<std::map<WordID, prob_t> > ttable;
-};
-
-struct PoissonUniformUninformativeBase {
- explicit PoissonUniformUninformativeBase(const unsigned ves) : kUNIFORM(1.0 / ves) {}
- prob_t operator()(const TRule& r) const {
- prob_t p; p.logeq(Md::log_poisson(r.e_.size(), 1.0));
- prob_t q = kUNIFORM; q.poweq(r.e_.size());
- p *= q;
- return p;
- }
- void Summary() const {}
- void ResampleHyperparameters(MT19937*) {}
- void Increment(const TRule&) {}
- void Decrement(const TRule&) {}
- prob_t Likelihood() const { return prob_t::One(); }
- const prob_t kUNIFORM;
-};
-
-struct CompletelyUniformBase {
- explicit CompletelyUniformBase(const unsigned ves) : kUNIFORM(1.0 / ves) {}
- prob_t operator()(const TRule&) const {
- return kUNIFORM;
- }
- void Summary() const {}
- void ResampleHyperparameters(MT19937*) {}
- void Increment(const TRule&) {}
- void Decrement(const TRule&) {}
- prob_t Likelihood() const { return prob_t::One(); }
- const prob_t kUNIFORM;
-};
-
-struct UnigramWordBase {
- explicit UnigramWordBase(const std::string& fname) : un(fname) {}
- prob_t operator()(const TRule& r) const {
- return un(r.e_);
- }
- const UnigramWordModel un;
-};
-
-struct RuleHasher {
- size_t operator()(const TRule& r) const {
- return hash_value(r);
- }
-};
-
-struct TableLookupBase {
- TableLookupBase(const std::string& fname);
-
- prob_t operator()(const TRule& rule) const {
- const std::tr1::unordered_map<TRule,prob_t,RuleHasher>::const_iterator it = table.find(rule);
- if (it == table.end()) {
- std::cerr << rule << " not found\n";
- abort();
- }
- return it->second;
- }
-
- void ResampleHyperparameters(MT19937*) {}
- void Increment(const TRule&) {}
- void Decrement(const TRule&) {}
- prob_t Likelihood() const { return prob_t::One(); }
- void Summary() const {}
-
- std::tr1::unordered_map<TRule,prob_t,RuleHasher> table;
-};
-
-struct PhraseConditionalUninformativeBase {
- explicit PhraseConditionalUninformativeBase(const unsigned vocab_e_size) :
- kUNIFORM_TARGET(1.0 / vocab_e_size) {
- assert(vocab_e_size > 0);
- }
-
- // return p0 of rule.e_ | rule.f_
- prob_t operator()(const TRule& rule) const {
- return p0(rule.f_, rule.e_, 0, 0);
- }
-
- prob_t p0(const std::vector<WordID>& vsrc, const std::vector<WordID>& vtrg, int start_src, int start_trg) const;
-
- void Summary() const {}
- void ResampleHyperparameters(MT19937*) {}
- void Increment(const TRule&) {}
- void Decrement(const TRule&) {}
- prob_t Likelihood() const { return prob_t::One(); }
- const prob_t kUNIFORM_TARGET;
-};
-
-struct PhraseConditionalUninformativeUnigramBase {
- explicit PhraseConditionalUninformativeUnigramBase(const std::string& file, const unsigned vocab_e_size) : u(file, vocab_e_size) {}
-
- // return p0 of rule.e_ | rule.f_
- prob_t operator()(const TRule& rule) const {
- return p0(rule.f_, rule.e_, 0, 0);
- }
-
- prob_t p0(const std::vector<WordID>& vsrc, const std::vector<WordID>& vtrg, int start_src, int start_trg) const;
-
- const UnigramModel u;
-};
-
-struct PhraseConditionalBase {
- explicit PhraseConditionalBase(const Model1& m1, const double m1mixture, const unsigned vocab_e_size) :
- model1(m1),
- kM1MIXTURE(m1mixture),
- kUNIFORM_MIXTURE(1.0 - m1mixture),
- kUNIFORM_TARGET(1.0 / vocab_e_size) {
- assert(m1mixture >= 0.0 && m1mixture <= 1.0);
- assert(vocab_e_size > 0);
- }
-
- // return p0 of rule.e_ | rule.f_
- prob_t operator()(const TRule& rule) const {
- return p0(rule.f_, rule.e_, 0, 0);
- }
-
- prob_t p0(const std::vector<WordID>& vsrc, const std::vector<WordID>& vtrg, int start_src, int start_trg) const;
-
- const Model1& model1;
- const prob_t kM1MIXTURE; // Model 1 mixture component
- const prob_t kUNIFORM_MIXTURE; // uniform mixture component
- const prob_t kUNIFORM_TARGET;
-};
-
-struct PhraseJointBase {
- explicit PhraseJointBase(const Model1& m1, const double m1mixture, const unsigned vocab_e_size, const unsigned vocab_f_size) :
- model1(m1),
- kM1MIXTURE(m1mixture),
- kUNIFORM_MIXTURE(1.0 - m1mixture),
- kUNIFORM_SOURCE(1.0 / vocab_f_size),
- kUNIFORM_TARGET(1.0 / vocab_e_size) {
- assert(m1mixture >= 0.0 && m1mixture <= 1.0);
- assert(vocab_e_size > 0);
- }
-
- // return p0 of rule.e_ , rule.f_
- prob_t operator()(const TRule& rule) const {
- return p0(rule.f_, rule.e_, 0, 0);
- }
-
- prob_t p0(const std::vector<WordID>& vsrc, const std::vector<WordID>& vtrg, int start_src, int start_trg) const;
-
- const Model1& model1;
- const prob_t kM1MIXTURE; // Model 1 mixture component
- const prob_t kUNIFORM_MIXTURE; // uniform mixture component
- const prob_t kUNIFORM_SOURCE;
- const prob_t kUNIFORM_TARGET;
-};
-
-struct PhraseJointBase_BiDir {
- explicit PhraseJointBase_BiDir(const Model1& m1,
- const Model1& im1,
- const double m1mixture,
- const unsigned vocab_e_size,
- const unsigned vocab_f_size) :
- model1(m1),
- invmodel1(im1),
- kM1MIXTURE(m1mixture),
- kUNIFORM_MIXTURE(1.0 - m1mixture),
- kUNIFORM_SOURCE(1.0 / vocab_f_size),
- kUNIFORM_TARGET(1.0 / vocab_e_size) {
- assert(m1mixture >= 0.0 && m1mixture <= 1.0);
- assert(vocab_e_size > 0);
- }
-
- // return p0 of rule.e_ , rule.f_
- prob_t operator()(const TRule& rule) const {
- return p0(rule.f_, rule.e_, 0, 0);
- }
-
- prob_t p0(const std::vector<WordID>& vsrc, const std::vector<WordID>& vtrg, int start_src, int start_trg) const;
-
- const Model1& model1;
- const Model1& invmodel1;
- const prob_t kM1MIXTURE; // Model 1 mixture component
- const prob_t kUNIFORM_MIXTURE; // uniform mixture component
- const prob_t kUNIFORM_SOURCE;
- const prob_t kUNIFORM_TARGET;
-};
-
-// base distribution for jump size multinomials
-// basically p(0) = 0 and then, p(1) is max, and then
-// you drop as you move to the max jump distance
-struct JumpBase {
- JumpBase();
-
- const prob_t& operator()(int jump, unsigned src_len) const {
- assert(jump != 0);
- const std::map<int, prob_t>::const_iterator it = p[src_len].find(jump);
- assert(it != p[src_len].end());
- return it->second;
- }
- std::vector<std::map<int, prob_t> > p;
-};
-
-
-#endif
diff --git a/gi/pf/bayes_lattice_score.cc b/gi/pf/bayes_lattice_score.cc
deleted file mode 100644
index 70cb8dc2..00000000
--- a/gi/pf/bayes_lattice_score.cc
+++ /dev/null
@@ -1,309 +0,0 @@
-#include <iostream>
-#include <queue>
-
-#include <boost/functional.hpp>
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-
-#include "inside_outside.h"
-#include "hg.h"
-#include "hg_io.h"
-#include "bottom_up_parser.h"
-#include "fdict.h"
-#include "grammar.h"
-#include "m.h"
-#include "trule.h"
-#include "tdict.h"
-#include "filelib.h"
-#include "dict.h"
-#include "sampler.h"
-#include "ccrp.h"
-#include "ccrp_onetable.h"
-
-using namespace std;
-using namespace tr1;
-namespace po = boost::program_options;
-
-boost::shared_ptr<MT19937> prng;
-
-void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
- po::options_description opts("Configuration options");
- opts.add_options()
- ("samples,s",po::value<unsigned>()->default_value(1000),"Number of samples")
- ("input,i",po::value<string>(),"Read parallel data from")
- ("random_seed,S",po::value<uint32_t>(), "Random seed");
- po::options_description clo("Command line options");
- clo.add_options()
- ("config", po::value<string>(), "Configuration file")
- ("help", "Print this help message and exit");
- po::options_description dconfig_options, dcmdline_options;
- dconfig_options.add(opts);
- dcmdline_options.add(opts).add(clo);
-
- po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
- if (conf->count("config")) {
- ifstream config((*conf)["config"].as<string>().c_str());
- po::store(po::parse_config_file(config, dconfig_options), *conf);
- }
- po::notify(*conf);
-
- if (conf->count("help") || (conf->count("input") == 0)) {
- cerr << dcmdline_options << endl;
- exit(1);
- }
-}
-
-unsigned ReadCorpus(const string& filename,
- vector<Lattice>* e,
- set<WordID>* vocab_e) {
- e->clear();
- vocab_e->clear();
- ReadFile rf(filename);
- istream* in = rf.stream();
- assert(*in);
- string line;
- unsigned toks = 0;
- while(*in) {
- getline(*in, line);
- if (line.empty() && !*in) break;
- e->push_back(Lattice());
- Lattice& le = e->back();
- LatticeTools::ConvertTextOrPLF(line, & le);
- for (unsigned i = 0; i < le.size(); ++i)
- for (unsigned j = 0; j < le[i].size(); ++j)
- vocab_e->insert(le[i][j].label);
- toks += le.size();
- }
- return toks;
-}
-
-struct BaseModel {
- explicit BaseModel(unsigned tc) :
- unif(1.0 / tc), p(prob_t::One()) {}
- prob_t prob(const TRule& r) const {
- return unif;
- }
- void increment(const TRule& r, MT19937* rng) {
- p *= prob(r);
- }
- void decrement(const TRule& r, MT19937* rng) {
- p /= prob(r);
- }
- prob_t Likelihood() const {
- return p;
- }
- const prob_t unif;
- prob_t p;
-};
-
-struct UnigramModel {
- explicit UnigramModel(unsigned tc) : base(tc), crp(1,1,1,1), glue(1,1,1,1) {}
- BaseModel base;
- CCRP<TRule> crp;
- CCRP<TRule> glue;
-
- prob_t Prob(const TRule& r) const {
- if (r.Arity() != 0) {
- return glue.prob(r, prob_t(0.5));
- }
- return crp.prob(r, base.prob(r));
- }
-
- int Increment(const TRule& r, MT19937* rng) {
- if (r.Arity() != 0) {
- glue.increment(r, 0.5, rng);
- return 0;
- } else {
- if (crp.increment(r, base.prob(r), rng)) {
- base.increment(r, rng);
- return 1;
- }
- return 0;
- }
- }
-
- int Decrement(const TRule& r, MT19937* rng) {
- if (r.Arity() != 0) {
- glue.decrement(r, rng);
- return 0;
- } else {
- if (crp.decrement(r, rng)) {
- base.decrement(r, rng);
- return -1;
- }
- return 0;
- }
- }
-
- prob_t Likelihood() const {
- prob_t p;
- p.logeq(crp.log_crp_prob() + glue.log_crp_prob());
- p *= base.Likelihood();
- return p;
- }
-
- void ResampleHyperparameters(MT19937* rng) {
- crp.resample_hyperparameters(rng);
- glue.resample_hyperparameters(rng);
- cerr << " d=" << crp.discount() << ", s=" << crp.strength() << "\t STOP d=" << glue.discount() << ", s=" << glue.strength() << endl;
- }
-};
-
-UnigramModel* plm;
-
-void SampleDerivation(const Hypergraph& hg, MT19937* rng, vector<unsigned>* sampled_deriv) {
- vector<prob_t> node_probs;
- Inside<prob_t, EdgeProb>(hg, &node_probs);
- queue<unsigned> q;
- q.push(hg.nodes_.size() - 2);
- while(!q.empty()) {
- unsigned cur_node_id = q.front();
-// cerr << "NODE=" << cur_node_id << endl;
- q.pop();
- const Hypergraph::Node& node = hg.nodes_[cur_node_id];
- const unsigned num_in_edges = node.in_edges_.size();
- unsigned sampled_edge = 0;
- if (num_in_edges == 1) {
- sampled_edge = node.in_edges_[0];
- } else {
- //prob_t z;
- assert(num_in_edges > 1);
- SampleSet<prob_t> ss;
- for (unsigned j = 0; j < num_in_edges; ++j) {
- const Hypergraph::Edge& edge = hg.edges_[node.in_edges_[j]];
- prob_t p = edge.edge_prob_;
- for (unsigned k = 0; k < edge.tail_nodes_.size(); ++k)
- p *= node_probs[edge.tail_nodes_[k]];
- ss.add(p);
-// cerr << log(ss[j]) << " ||| " << edge.rule_->AsString() << endl;
- //z += p;
- }
-// for (unsigned j = 0; j < num_in_edges; ++j) {
-// const Hypergraph::Edge& edge = hg.edges_[node.in_edges_[j]];
-// cerr << exp(log(ss[j] / z)) << " ||| " << edge.rule_->AsString() << endl;
-// }
-// cerr << " --- \n";
- sampled_edge = node.in_edges_[rng->SelectSample(ss)];
- }
- sampled_deriv->push_back(sampled_edge);
- const Hypergraph::Edge& edge = hg.edges_[sampled_edge];
- for (unsigned j = 0; j < edge.tail_nodes_.size(); ++j) {
- q.push(edge.tail_nodes_[j]);
- }
- }
-// for (unsigned i = 0; i < sampled_deriv->size(); ++i) {
-// cerr << *hg.edges_[(*sampled_deriv)[i]].rule_ << endl;
-// }
-}
-
-void IncrementDerivation(const Hypergraph& hg, const vector<unsigned>& d, UnigramModel* plm, MT19937* rng) {
- for (unsigned i = 0; i < d.size(); ++i)
- plm->Increment(*hg.edges_[d[i]].rule_, rng);
-}
-
-void DecrementDerivation(const Hypergraph& hg, const vector<unsigned>& d, UnigramModel* plm, MT19937* rng) {
- for (unsigned i = 0; i < d.size(); ++i)
- plm->Decrement(*hg.edges_[d[i]].rule_, rng);
-}
-
-prob_t TotalProb(const Hypergraph& hg) {
- return Inside<prob_t, EdgeProb>(hg);
-}
-
-void IncrementLatticePath(const Hypergraph& hg, const vector<unsigned>& d, Lattice* pl) {
- Lattice& lat = *pl;
- for (int i = 0; i < d.size(); ++i) {
- const Hypergraph::Edge& edge = hg.edges_[d[i]];
- if (edge.rule_->Arity() != 0) continue;
- WordID sym = edge.rule_->e_[0];
- vector<LatticeArc>& las = lat[edge.i_];
- int dist = edge.j_ - edge.i_;
- assert(dist > 0);
- for (int j = 0; j < las.size(); ++j) {
- if (las[j].dist2next == dist &&
- las[j].label == sym) {
- las[j].cost += 1;
- }
- }
- }
-}
-
-int main(int argc, char** argv) {
- po::variables_map conf;
-
- InitCommandLine(argc, argv, &conf);
- vector<GrammarPtr> grammars(2);
- grammars[0].reset(new GlueGrammar("S","X"));
- const unsigned samples = conf["samples"].as<unsigned>();
-
- if (conf.count("random_seed"))
- prng.reset(new MT19937(conf["random_seed"].as<uint32_t>()));
- else
- prng.reset(new MT19937);
- MT19937& rng = *prng;
- vector<Lattice> corpuse;
- set<WordID> vocabe;
- cerr << "Reading corpus...\n";
- const unsigned toks = ReadCorpus(conf["input"].as<string>(), &corpuse, &vocabe);
- cerr << "E-corpus size: " << corpuse.size() << " lattices\t (" << vocabe.size() << " word types)\n";
- UnigramModel lm(vocabe.size());
- vector<Hypergraph> hgs(corpuse.size());
- vector<vector<unsigned> > derivs(corpuse.size());
- for (int i = 0; i < corpuse.size(); ++i) {
- grammars[1].reset(new PassThroughGrammar(corpuse[i], "X"));
- ExhaustiveBottomUpParser parser("S", grammars);
- bool res = parser.Parse(corpuse[i], &hgs[i]); // exhaustive parse
- assert(res);
- }
-
- double csamples = 0;
- for (int SS=0; SS < samples; ++SS) {
- const bool is_last = ((samples - 1) == SS);
- prob_t dlh = prob_t::One();
- bool record_sample = (SS > (samples * 1 / 3) && (SS % 5 == 3));
- if (record_sample) csamples++;
- for (int ci = 0; ci < corpuse.size(); ++ci) {
- Lattice& lat = corpuse[ci];
- Hypergraph& hg = hgs[ci];
- vector<unsigned>& d = derivs[ci];
- if (!is_last) DecrementDerivation(hg, d, &lm, &rng);
- for (unsigned i = 0; i < hg.edges_.size(); ++i) {
- TRule& r = *hg.edges_[i].rule_;
- if (r.Arity() != 0)
- hg.edges_[i].edge_prob_ = prob_t::One();
- else
- hg.edges_[i].edge_prob_ = lm.Prob(r);
- }
- if (!is_last) {
- d.clear();
- SampleDerivation(hg, &rng, &d);
- IncrementDerivation(hg, derivs[ci], &lm, &rng);
- } else {
- prob_t p = TotalProb(hg);
- dlh *= p;
- cerr << " p(sentence) = " << log(p) << "\t" << log(dlh) << endl;
- }
- if (record_sample) IncrementLatticePath(hg, derivs[ci], &lat);
- }
- double llh = log(lm.Likelihood());
- cerr << "LLH=" << llh << "\tENTROPY=" << (-llh / log(2) / toks) << "\tPPL=" << pow(2, -llh / log(2) / toks) << endl;
- if (SS % 10 == 9) lm.ResampleHyperparameters(&rng);
- if (is_last) {
- double z = log(dlh);
- cerr << "TOTAL_PROB=" << z << "\tENTROPY=" << (-z / log(2) / toks) << "\tPPL=" << pow(2, -z / log(2) / toks) << endl;
- }
- }
- cerr << lm.crp << endl;
- cerr << lm.glue << endl;
- for (int i = 0; i < corpuse.size(); ++i) {
- for (int j = 0; j < corpuse[i].size(); ++j)
- for (int k = 0; k < corpuse[i][j].size(); ++k) {
- corpuse[i][j][k].cost /= csamples;
- corpuse[i][j][k].cost += 1e-3;
- corpuse[i][j][k].cost = log(corpuse[i][j][k].cost);
- }
- cout << HypergraphIO::AsPLF(corpuse[i]) << endl;
- }
- return 0;
-}
-
diff --git a/gi/pf/brat.cc b/gi/pf/brat.cc
deleted file mode 100644
index 832f22cf..00000000
--- a/gi/pf/brat.cc
+++ /dev/null
@@ -1,543 +0,0 @@
-#include <iostream>
-#include <tr1/memory>
-#include <queue>
-
-#include <boost/functional.hpp>
-#include <boost/multi_array.hpp>
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-
-#include "viterbi.h"
-#include "hg.h"
-#include "trule.h"
-#include "tdict.h"
-#include "filelib.h"
-#include "dict.h"
-#include "sampler.h"
-#include "ccrp_nt.h"
-#include "cfg_wfst_composer.h"
-
-using namespace std;
-using namespace tr1;
-namespace po = boost::program_options;
-
-static unsigned kMAX_SRC_PHRASE;
-static unsigned kMAX_TRG_PHRASE;
-struct FSTState;
-
-double log_poisson(unsigned x, const double& lambda) {
- assert(lambda > 0.0);
- return log(lambda) * x - lgamma(x + 1) - lambda;
-}
-
-struct ConditionalBase {
- explicit ConditionalBase(const double m1mixture, const unsigned vocab_e_size, const string& model1fname) :
- kM1MIXTURE(m1mixture),
- kUNIFORM_MIXTURE(1.0 - m1mixture),
- kUNIFORM_TARGET(1.0 / vocab_e_size),
- kNULL(TD::Convert("<eps>")) {
- assert(m1mixture >= 0.0 && m1mixture <= 1.0);
- assert(vocab_e_size > 0);
- LoadModel1(model1fname);
- }
-
- void LoadModel1(const string& fname) {
- cerr << "Loading Model 1 parameters from " << fname << " ..." << endl;
- ReadFile rf(fname);
- istream& in = *rf.stream();
- string line;
- unsigned lc = 0;
- while(getline(in, line)) {
- ++lc;
- int cur = 0;
- int start = 0;
- while(cur < line.size() && line[cur] != ' ') { ++cur; }
- assert(cur != line.size());
- line[cur] = 0;
- const WordID src = TD::Convert(&line[0]);
- ++cur;
- start = cur;
- while(cur < line.size() && line[cur] != ' ') { ++cur; }
- assert(cur != line.size());
- line[cur] = 0;
- WordID trg = TD::Convert(&line[start]);
- const double logprob = strtod(&line[cur + 1], NULL);
- if (src >= ttable.size()) ttable.resize(src + 1);
- ttable[src][trg].logeq(logprob);
- }
- cerr << " read " << lc << " parameters.\n";
- }
-
- // return logp0 of rule.e_ | rule.f_
- prob_t operator()(const TRule& rule) const {
- const int flen = rule.f_.size();
- const int elen = rule.e_.size();
- prob_t uniform_src_alignment; uniform_src_alignment.logeq(-log(flen + 1));
- prob_t p;
- p.logeq(log_poisson(elen, flen + 0.01)); // elen | flen ~Pois(flen + 0.01)
- for (int i = 0; i < elen; ++i) { // for each position i in e-RHS
- const WordID trg = rule.e_[i];
- prob_t tp = prob_t::Zero();
- for (int j = -1; j < flen; ++j) {
- const WordID src = j < 0 ? kNULL : rule.f_[j];
- const map<WordID, prob_t>::const_iterator it = ttable[src].find(trg);
- if (it != ttable[src].end()) {
- tp += kM1MIXTURE * it->second;
- }
- tp += kUNIFORM_MIXTURE * kUNIFORM_TARGET;
- }
- tp *= uniform_src_alignment; // draw a_i ~uniform
- p *= tp; // draw e_i ~Model1(f_a_i) / uniform
- }
- return p;
- }
-
- const prob_t kM1MIXTURE; // Model 1 mixture component
- const prob_t kUNIFORM_MIXTURE; // uniform mixture component
- const prob_t kUNIFORM_TARGET;
- const WordID kNULL;
- vector<map<WordID, prob_t> > ttable;
-};
-
-void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
- po::options_description opts("Configuration options");
- opts.add_options()
- ("samples,s",po::value<unsigned>()->default_value(1000),"Number of samples")
- ("input,i",po::value<string>(),"Read parallel data from")
- ("max_src_phrase",po::value<unsigned>()->default_value(3),"Maximum length of source language phrases")
- ("max_trg_phrase",po::value<unsigned>()->default_value(3),"Maximum length of target language phrases")
- ("model1,m",po::value<string>(),"Model 1 parameters (used in base distribution)")
- ("model1_interpolation_weight",po::value<double>()->default_value(0.95),"Mixing proportion of model 1 with uniform target distribution")
- ("random_seed,S",po::value<uint32_t>(), "Random seed");
- po::options_description clo("Command line options");
- clo.add_options()
- ("config", po::value<string>(), "Configuration file")
- ("help,h", "Print this help message and exit");
- po::options_description dconfig_options, dcmdline_options;
- dconfig_options.add(opts);
- dcmdline_options.add(opts).add(clo);
-
- po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
- if (conf->count("config")) {
- ifstream config((*conf)["config"].as<string>().c_str());
- po::store(po::parse_config_file(config, dconfig_options), *conf);
- }
- po::notify(*conf);
-
- if (conf->count("help") || (conf->count("input") == 0)) {
- cerr << dcmdline_options << endl;
- exit(1);
- }
-}
-
-void ReadParallelCorpus(const string& filename,
- vector<vector<WordID> >* f,
- vector<vector<int> >* e,
- set<int>* vocab_f,
- set<int>* vocab_e) {
- f->clear();
- e->clear();
- vocab_f->clear();
- vocab_e->clear();
- istream* in;
- if (filename == "-")
- in = &cin;
- else
- in = new ifstream(filename.c_str());
- assert(*in);
- string line;
- const WordID kDIV = TD::Convert("|||");
- vector<WordID> tmp;
- while(*in) {
- getline(*in, line);
- if (line.empty() && !*in) break;
- e->push_back(vector<int>());
- f->push_back(vector<int>());
- vector<int>& le = e->back();
- vector<int>& lf = f->back();
- tmp.clear();
- TD::ConvertSentence(line, &tmp);
- bool isf = true;
- for (unsigned i = 0; i < tmp.size(); ++i) {
- const int cur = tmp[i];
- if (isf) {
- if (kDIV == cur) { isf = false; } else {
- lf.push_back(cur);
- vocab_f->insert(cur);
- }
- } else {
- assert(cur != kDIV);
- le.push_back(cur);
- vocab_e->insert(cur);
- }
- }
- assert(isf == false);
- }
- if (in != &cin) delete in;
-}
-
-struct UniphraseLM {
- UniphraseLM(const vector<vector<int> >& corpus,
- const set<int>& vocab,
- const po::variables_map& conf) :
- phrases_(1,1),
- gen_(1,1),
- corpus_(corpus),
- uniform_word_(1.0 / vocab.size()),
- gen_p0_(0.5),
- p_end_(0.5),
- use_poisson_(conf.count("poisson_length") > 0) {}
-
- void ResampleHyperparameters(MT19937* rng) {
- phrases_.resample_hyperparameters(rng);
- gen_.resample_hyperparameters(rng);
- cerr << " " << phrases_.alpha();
- }
-
- CCRP_NoTable<vector<int> > phrases_;
- CCRP_NoTable<bool> gen_;
- vector<vector<bool> > z_; // z_[i] is there a phrase boundary after the ith word
- const vector<vector<int> >& corpus_;
- const double uniform_word_;
- const double gen_p0_;
- const double p_end_; // in base length distribution, p of the end of a phrase
- const bool use_poisson_;
-};
-
-struct Reachability {
- boost::multi_array<bool, 4> edges; // edges[src_covered][trg_covered][x][trg_delta] is this edge worth exploring?
- boost::multi_array<short, 2> max_src_delta; // msd[src_covered][trg_covered] -- the largest src delta that's valid
-
- Reachability(int srclen, int trglen, int src_max_phrase_len, int trg_max_phrase_len) :
- edges(boost::extents[srclen][trglen][src_max_phrase_len+1][trg_max_phrase_len+1]),
- max_src_delta(boost::extents[srclen][trglen]) {
- ComputeReachability(srclen, trglen, src_max_phrase_len, trg_max_phrase_len);
- }
-
- private:
- struct SState {
- SState() : prev_src_covered(), prev_trg_covered() {}
- SState(int i, int j) : prev_src_covered(i), prev_trg_covered(j) {}
- int prev_src_covered;
- int prev_trg_covered;
- };
-
- struct NState {
- NState() : next_src_covered(), next_trg_covered() {}
- NState(int i, int j) : next_src_covered(i), next_trg_covered(j) {}
- int next_src_covered;
- int next_trg_covered;
- };
-
- void ComputeReachability(int srclen, int trglen, int src_max_phrase_len, int trg_max_phrase_len) {
- typedef boost::multi_array<vector<SState>, 2> array_type;
- array_type a(boost::extents[srclen + 1][trglen + 1]);
- a[0][0].push_back(SState());
- for (int i = 0; i < srclen; ++i) {
- for (int j = 0; j < trglen; ++j) {
- if (a[i][j].size() == 0) continue;
- const SState prev(i,j);
- for (int k = 1; k <= src_max_phrase_len; ++k) {
- if ((i + k) > srclen) continue;
- for (int l = 1; l <= trg_max_phrase_len; ++l) {
- if ((j + l) > trglen) continue;
- a[i + k][j + l].push_back(prev);
- }
- }
- }
- }
- a[0][0].clear();
- cerr << "Final cell contains " << a[srclen][trglen].size() << " back pointers\n";
- assert(a[srclen][trglen].size() > 0);
-
- typedef boost::multi_array<bool, 2> rarray_type;
- rarray_type r(boost::extents[srclen + 1][trglen + 1]);
-// typedef boost::multi_array<vector<NState>, 2> narray_type;
-// narray_type b(boost::extents[srclen + 1][trglen + 1]);
- r[srclen][trglen] = true;
- for (int i = srclen; i >= 0; --i) {
- for (int j = trglen; j >= 0; --j) {
- vector<SState>& prevs = a[i][j];
- if (!r[i][j]) { prevs.clear(); }
-// const NState nstate(i,j);
- for (int k = 0; k < prevs.size(); ++k) {
- r[prevs[k].prev_src_covered][prevs[k].prev_trg_covered] = true;
- int src_delta = i - prevs[k].prev_src_covered;
- edges[prevs[k].prev_src_covered][prevs[k].prev_trg_covered][src_delta][j - prevs[k].prev_trg_covered] = true;
- short &msd = max_src_delta[prevs[k].prev_src_covered][prevs[k].prev_trg_covered];
- if (src_delta > msd) msd = src_delta;
-// b[prevs[k].prev_src_covered][prevs[k].prev_trg_covered].push_back(nstate);
- }
- }
- }
- assert(!edges[0][0][1][0]);
- assert(!edges[0][0][0][1]);
- assert(!edges[0][0][0][0]);
- cerr << " MAX SRC DELTA[0][0] = " << max_src_delta[0][0] << endl;
- assert(max_src_delta[0][0] > 0);
- //cerr << "First cell contains " << b[0][0].size() << " forward pointers\n";
- //for (int i = 0; i < b[0][0].size(); ++i) {
- // cerr << " -> (" << b[0][0][i].next_src_covered << "," << b[0][0][i].next_trg_covered << ")\n";
- //}
- }
-};
-
-ostream& operator<<(ostream& os, const FSTState& q);
-struct FSTState {
- explicit FSTState(int src_size) :
- trg_covered_(),
- src_covered_(),
- src_coverage_(src_size) {}
-
- FSTState(short trg_covered, short src_covered, const vector<bool>& src_coverage, const vector<short>& src_prefix) :
- trg_covered_(trg_covered),
- src_covered_(src_covered),
- src_coverage_(src_coverage),
- src_prefix_(src_prefix) {
- if (src_coverage_.size() == src_covered) {
- assert(src_prefix.size() == 0);
- }
- }
-
- // if we extend by the word at src_position, what are
- // the next states that are reachable and lie on a valid
- // path to the final state?
- vector<FSTState> Extensions(int src_position, int src_len, int trg_len, const Reachability& r) const {
- assert(src_position < src_coverage_.size());
- if (src_coverage_[src_position]) {
- cerr << "Trying to extend " << *this << " with position " << src_position << endl;
- abort();
- }
- vector<bool> ncvg = src_coverage_;
- ncvg[src_position] = true;
-
- vector<FSTState> res;
- const int trg_remaining = trg_len - trg_covered_;
- if (trg_remaining <= 0) {
- cerr << "Target appears to have been covered: " << *this << " (trg_len=" << trg_len << ",trg_covered=" << trg_covered_ << ")" << endl;
- abort();
- }
- const int src_remaining = src_len - src_covered_;
- if (src_remaining <= 0) {
- cerr << "Source appears to have been covered: " << *this << endl;
- abort();
- }
-
- for (int tc = 1; tc <= kMAX_TRG_PHRASE; ++tc) {
- if (r.edges[src_covered_][trg_covered_][src_prefix_.size() + 1][tc]) {
- int nc = src_prefix_.size() + 1 + src_covered_;
- res.push_back(FSTState(trg_covered_ + tc, nc, ncvg, vector<short>()));
- }
- }
-
- if ((src_prefix_.size() + 1) < r.max_src_delta[src_covered_][trg_covered_]) {
- vector<short> nsp = src_prefix_;
- nsp.push_back(src_position);
- res.push_back(FSTState(trg_covered_, src_covered_, ncvg, nsp));
- }
-
- if (res.size() == 0) {
- cerr << *this << " can't be extended!\n";
- abort();
- }
- return res;
- }
-
- short trg_covered_, src_covered_;
- vector<bool> src_coverage_;
- vector<short> src_prefix_;
-};
-bool operator<(const FSTState& q, const FSTState& r) {
- if (q.trg_covered_ != r.trg_covered_) return q.trg_covered_ < r.trg_covered_;
- if (q.src_covered_!= r.src_covered_) return q.src_covered_ < r.src_covered_;
- if (q.src_coverage_ != r.src_coverage_) return q.src_coverage_ < r.src_coverage_;
- return q.src_prefix_ < r.src_prefix_;
-}
-
-ostream& operator<<(ostream& os, const FSTState& q) {
- os << "[" << q.trg_covered_ << " : ";
- for (int i = 0; i < q.src_coverage_.size(); ++i)
- os << q.src_coverage_[i];
- os << " : <";
- for (int i = 0; i < q.src_prefix_.size(); ++i) {
- if (i != 0) os << ' ';
- os << q.src_prefix_[i];
- }
- return os << ">]";
-}
-
-struct MyModel {
- MyModel(ConditionalBase& rcp0) : rp0(rcp0) {}
- typedef unordered_map<vector<WordID>, CCRP_NoTable<TRule>, boost::hash<vector<WordID> > > SrcToRuleCRPMap;
-
- void DecrementRule(const TRule& rule) {
- SrcToRuleCRPMap::iterator it = rules.find(rule.f_);
- assert(it != rules.end());
- it->second.decrement(rule);
- if (it->second.num_customers() == 0) rules.erase(it);
- }
-
- void IncrementRule(const TRule& rule) {
- SrcToRuleCRPMap::iterator it = rules.find(rule.f_);
- if (it == rules.end()) {
- CCRP_NoTable<TRule> crp(1,1);
- it = rules.insert(make_pair(rule.f_, crp)).first;
- }
- it->second.increment(rule);
- }
-
- // conditioned on rule.f_
- prob_t RuleConditionalProbability(const TRule& rule) const {
- const prob_t base = rp0(rule);
- SrcToRuleCRPMap::const_iterator it = rules.find(rule.f_);
- if (it == rules.end()) {
- return base;
- } else {
- const double lp = it->second.logprob(rule, log(base));
- prob_t q; q.logeq(lp);
- return q;
- }
- }
-
- const ConditionalBase& rp0;
- SrcToRuleCRPMap rules;
-};
-
-struct MyFST : public WFST {
- MyFST(const vector<WordID>& ssrc, const vector<WordID>& strg, MyModel* m) :
- src(ssrc), trg(strg),
- r(src.size(),trg.size(),kMAX_SRC_PHRASE, kMAX_TRG_PHRASE),
- model(m) {
- FSTState in(src.size());
- cerr << " INIT: " << in << endl;
- init = GetNode(in);
- for (int i = 0; i < in.src_coverage_.size(); ++i) in.src_coverage_[i] = true;
- in.src_covered_ = src.size();
- in.trg_covered_ = trg.size();
- cerr << "FINAL: " << in << endl;
- final = GetNode(in);
- }
- virtual const WFSTNode* Final() const;
- virtual const WFSTNode* Initial() const;
-
- const WFSTNode* GetNode(const FSTState& q);
- map<FSTState, boost::shared_ptr<WFSTNode> > m;
- const vector<WordID>& src;
- const vector<WordID>& trg;
- Reachability r;
- const WFSTNode* init;
- const WFSTNode* final;
- MyModel* model;
-};
-
-struct MyNode : public WFSTNode {
- MyNode(const FSTState& q, MyFST* fst) : state(q), container(fst) {}
- virtual vector<pair<const WFSTNode*, TRulePtr> > ExtendInput(unsigned srcindex) const;
- const FSTState state;
- mutable MyFST* container;
-};
-
-vector<pair<const WFSTNode*, TRulePtr> > MyNode::ExtendInput(unsigned srcindex) const {
- cerr << "EXTEND " << state << " with " << srcindex << endl;
- vector<FSTState> ext = state.Extensions(srcindex, container->src.size(), container->trg.size(), container->r);
- vector<pair<const WFSTNode*,TRulePtr> > res(ext.size());
- for (unsigned i = 0; i < ext.size(); ++i) {
- res[i].first = container->GetNode(ext[i]);
- if (ext[i].src_prefix_.size() == 0) {
- const unsigned trg_from = state.trg_covered_;
- const unsigned trg_to = ext[i].trg_covered_;
- const unsigned prev_prfx_size = state.src_prefix_.size();
- res[i].second.reset(new TRule);
- res[i].second->lhs_ = -TD::Convert("X");
- vector<WordID>& src = res[i].second->f_;
- vector<WordID>& trg = res[i].second->e_;
- src.resize(prev_prfx_size + 1);
- for (unsigned j = 0; j < prev_prfx_size; ++j)
- src[j] = container->src[state.src_prefix_[j]];
- src[prev_prfx_size] = container->src[srcindex];
- for (unsigned j = trg_from; j < trg_to; ++j)
- trg.push_back(container->trg[j]);
- res[i].second->scores_.set_value(FD::Convert("Proposal"), log(container->model->RuleConditionalProbability(*res[i].second)));
- }
- }
- return res;
-}
-
-const WFSTNode* MyFST::GetNode(const FSTState& q) {
- boost::shared_ptr<WFSTNode>& res = m[q];
- if (!res) {
- res.reset(new MyNode(q, this));
- }
- return &*res;
-}
-
-const WFSTNode* MyFST::Final() const {
- return final;
-}
-
-const WFSTNode* MyFST::Initial() const {
- return init;
-}
-
-int main(int argc, char** argv) {
- po::variables_map conf;
- InitCommandLine(argc, argv, &conf);
- kMAX_TRG_PHRASE = conf["max_trg_phrase"].as<unsigned>();
- kMAX_SRC_PHRASE = conf["max_src_phrase"].as<unsigned>();
-
- if (!conf.count("model1")) {
- cerr << argv[0] << "Please use --model1 to specify model 1 parameters\n";
- return 1;
- }
- boost::shared_ptr<MT19937> prng;
- if (conf.count("random_seed"))
- prng.reset(new MT19937(conf["random_seed"].as<uint32_t>()));
- else
- prng.reset(new MT19937);
- MT19937& rng = *prng;
-
- vector<vector<int> > corpuse, corpusf;
- set<int> vocabe, vocabf;
- ReadParallelCorpus(conf["input"].as<string>(), &corpusf, &corpuse, &vocabf, &vocabe);
- cerr << "f-Corpus size: " << corpusf.size() << " sentences\n";
- cerr << "f-Vocabulary size: " << vocabf.size() << " types\n";
- cerr << "f-Corpus size: " << corpuse.size() << " sentences\n";
- cerr << "f-Vocabulary size: " << vocabe.size() << " types\n";
- assert(corpusf.size() == corpuse.size());
-
- ConditionalBase lp0(conf["model1_interpolation_weight"].as<double>(),
- vocabe.size(),
- conf["model1"].as<string>());
- MyModel m(lp0);
-
- TRule x("[X] ||| kAnwntR myN ||| at the convent ||| 0");
- m.IncrementRule(x);
- TRule y("[X] ||| nY dyN ||| gave ||| 0");
- m.IncrementRule(y);
-
-
- MyFST fst(corpusf[0], corpuse[0], &m);
- ifstream in("./kimura.g");
- assert(in);
- CFG_WFSTComposer comp(fst);
- Hypergraph hg;
- bool succeed = comp.Compose(&in, &hg);
- hg.PrintGraphviz();
- if (succeed) { cerr << "SUCCESS.\n"; } else { cerr << "FAILURE REPORTED.\n"; }
-
-#if 0
- ifstream in2("./amnabooks.g");
- assert(in2);
- MyFST fst2(corpusf[1], corpuse[1], &m);
- CFG_WFSTComposer comp2(fst2);
- Hypergraph hg2;
- bool succeed2 = comp2.Compose(&in2, &hg2);
- if (succeed2) { cerr << "SUCCESS.\n"; } else { cerr << "FAILURE REPORTED.\n"; }
-#endif
-
- SparseVector<double> w; w.set_value(FD::Convert("Proposal"), 1.0);
- hg.Reweight(w);
- cerr << ViterbiFTree(hg) << endl;
- return 0;
-}
-
diff --git a/gi/pf/cbgi.cc b/gi/pf/cbgi.cc
deleted file mode 100644
index 97f1ba34..00000000
--- a/gi/pf/cbgi.cc
+++ /dev/null
@@ -1,330 +0,0 @@
-#include <queue>
-#include <sstream>
-#include <iostream>
-
-#include <boost/unordered_map.hpp>
-#include <boost/functional/hash.hpp>
-
-#include "sampler.h"
-#include "filelib.h"
-#include "hg_io.h"
-#include "hg.h"
-#include "ccrp_nt.h"
-#include "trule.h"
-#include "inside_outside.h"
-
-using namespace std;
-using namespace std::tr1;
-
-double log_poisson(unsigned x, const double& lambda) {
- assert(lambda > 0.0);
- return log(lambda) * x - lgamma(x + 1) - lambda;
-}
-
-double log_decay(unsigned x, const double& b) {
- assert(b > 1.0);
- assert(x > 0);
- return log(b - 1) - x * log(b);
-}
-
-struct SimpleBase {
- SimpleBase(unsigned esize, unsigned fsize, unsigned ntsize = 144) :
- uniform_e(-log(esize)),
- uniform_f(-log(fsize)),
- uniform_nt(-log(ntsize)) {
- }
-
- // binomial coefficient
- static double choose(unsigned n, unsigned k) {
- return exp(lgamma(n + 1) - lgamma(k + 1) - lgamma(n - k + 1));
- }
-
- // count the number of patterns of terminals and NTs in the rule, given elen and flen
- static double log_number_of_patterns(const unsigned flen, const unsigned elen) {
- static vector<vector<double> > counts;
- if (elen >= counts.size()) counts.resize(elen + 1);
- if (flen >= counts[elen].size()) counts[elen].resize(flen + 1);
- double& count = counts[elen][flen];
- if (count) return log(count);
- const unsigned max_arity = min(elen, flen);
- for (unsigned a = 0; a <= max_arity; ++a)
- count += choose(elen, a) * choose(flen, a);
- return log(count);
- }
-
- // return logp0 of rule | LHS
- double operator()(const TRule& rule) const {
- const unsigned flen = rule.f_.size();
- const unsigned elen = rule.e_.size();
-#if 0
- double p = 0;
- p += log_poisson(flen, 0.5); // flen ~Pois(0.5)
- p += log_poisson(elen, flen); // elen | flen ~Pois(flen)
- p -= log_number_of_patterns(flen, elen); // pattern | flen,elen ~Uniform
- for (unsigned i = 0; i < flen; ++i) { // for each position in f-RHS
- if (rule.f_[i] <= 0) // according to pattern
- p += uniform_nt; // draw NT ~Uniform
- else
- p += uniform_f; // draw f terminal ~Uniform
- }
- p -= lgamma(rule.Arity() + 1); // draw permutation ~Uniform
- for (unsigned i = 0; i < elen; ++i) { // for each position in e-RHS
- if (rule.e_[i] > 0) // according to pattern
- p += uniform_e; // draw e|f term ~Uniform
- // TODO this should prob be model 1
- }
-#else
- double p = 0;
- bool is_abstract = rule.f_[0] <= 0;
- p += log(0.5);
- if (is_abstract) {
- if (flen == 2) p += log(0.99); else p += log(0.01);
- } else {
- p += log_decay(flen, 3);
- }
-
- for (unsigned i = 0; i < flen; ++i) { // for each position in f-RHS
- if (rule.f_[i] <= 0) // according to pattern
- p += uniform_nt; // draw NT ~Uniform
- else
- p += uniform_f; // draw f terminal ~Uniform
- }
-#endif
- return p;
- }
- const double uniform_e;
- const double uniform_f;
- const double uniform_nt;
- vector<double> arities;
-};
-
-MT19937* rng = NULL;
-
-template <typename Base>
-struct MHSamplerEdgeProb {
- MHSamplerEdgeProb(const Hypergraph& hg,
- const map<int, CCRP_NoTable<TRule> >& rdp,
- const Base& logp0,
- const bool exclude_multiword_terminals) : edge_probs(hg.edges_.size()) {
- for (int i = 0; i < edge_probs.size(); ++i) {
- const TRule& rule = *hg.edges_[i].rule_;
- const map<int, CCRP_NoTable<TRule> >::const_iterator it = rdp.find(rule.lhs_);
- assert(it != rdp.end());
- const CCRP_NoTable<TRule>& crp = it->second;
- edge_probs[i].logeq(crp.logprob(rule, logp0(rule)));
- if (exclude_multiword_terminals && rule.f_[0] > 0 && rule.f_.size() > 1)
- edge_probs[i] = prob_t::Zero();
- }
- }
- inline prob_t operator()(const Hypergraph::Edge& e) const {
- return edge_probs[e.id_];
- }
- prob_t DerivationProb(const vector<int>& d) const {
- prob_t p = prob_t::One();
- for (unsigned i = 0; i < d.size(); ++i)
- p *= edge_probs[d[i]];
- return p;
- }
- vector<prob_t> edge_probs;
-};
-
-template <typename Base>
-struct ModelAndData {
- ModelAndData() :
- base_lh(prob_t::One()),
- logp0(10000, 10000),
- mh_samples(),
- mh_rejects() {}
-
- void SampleCorpus(const string& hgpath, int i);
- void ResampleHyperparameters() {
- for (map<int, CCRP_NoTable<TRule> >::iterator it = rules.begin(); it != rules.end(); ++it)
- it->second.resample_hyperparameters(rng);
- }
-
- CCRP_NoTable<TRule>& RuleCRP(int lhs) {
- map<int, CCRP_NoTable<TRule> >::iterator it = rules.find(lhs);
- if (it == rules.end()) {
- rules.insert(make_pair(lhs, CCRP_NoTable<TRule>(1,1)));
- it = rules.find(lhs);
- }
- return it->second;
- }
-
- void IncrementRule(const TRule& rule) {
- CCRP_NoTable<TRule>& crp = RuleCRP(rule.lhs_);
- if (crp.increment(rule)) {
- prob_t p; p.logeq(logp0(rule));
- base_lh *= p;
- }
- }
-
- void DecrementRule(const TRule& rule) {
- CCRP_NoTable<TRule>& crp = RuleCRP(rule.lhs_);
- if (crp.decrement(rule)) {
- prob_t p; p.logeq(logp0(rule));
- base_lh /= p;
- }
- }
-
- void DecrementDerivation(const Hypergraph& hg, const vector<int>& d) {
- for (unsigned i = 0; i < d.size(); ++i) {
- const TRule& rule = *hg.edges_[d[i]].rule_;
- DecrementRule(rule);
- }
- }
-
- void IncrementDerivation(const Hypergraph& hg, const vector<int>& d) {
- for (unsigned i = 0; i < d.size(); ++i) {
- const TRule& rule = *hg.edges_[d[i]].rule_;
- IncrementRule(rule);
- }
- }
-
- prob_t Likelihood() const {
- prob_t p = prob_t::One();
- for (map<int, CCRP_NoTable<TRule> >::const_iterator it = rules.begin(); it != rules.end(); ++it) {
- prob_t q; q.logeq(it->second.log_crp_prob());
- p *= q;
- }
- p *= base_lh;
- return p;
- }
-
- void ResampleDerivation(const Hypergraph& hg, vector<int>* sampled_derivation);
-
- map<int, CCRP_NoTable<TRule> > rules; // [lhs] -> distribution over RHSs
- prob_t base_lh;
- SimpleBase logp0;
- vector<vector<int> > samples; // sampled derivations
- unsigned int mh_samples;
- unsigned int mh_rejects;
-};
-
-template <typename Base>
-void ModelAndData<Base>::SampleCorpus(const string& hgpath, int n) {
- vector<Hypergraph> hgs(n); hgs.clear();
- boost::unordered_map<TRule, unsigned> acc;
- map<int, unsigned> tot;
- for (int i = 0; i < n; ++i) {
- ostringstream os;
- os << hgpath << '/' << i << ".json.gz";
- if (!FileExists(os.str())) continue;
- hgs.push_back(Hypergraph());
- ReadFile rf(os.str());
- HypergraphIO::ReadFromJSON(rf.stream(), &hgs.back());
- }
- cerr << "Read " << hgs.size() << " alignment hypergraphs.\n";
- samples.resize(hgs.size());
- const unsigned SAMPLES = 2000;
- const unsigned burnin = 3 * SAMPLES / 4;
- const unsigned every = 20;
- for (unsigned s = 0; s < SAMPLES; ++s) {
- if (s % 10 == 0) {
- if (s > 0) { cerr << endl; ResampleHyperparameters(); }
- cerr << "[" << s << " LLH=" << log(Likelihood()) << " REJECTS=" << ((double)mh_rejects / mh_samples) << " LHS's=" << rules.size() << " base=" << log(base_lh) << "] ";
- }
- cerr << '.';
- for (unsigned i = 0; i < hgs.size(); ++i) {
- ResampleDerivation(hgs[i], &samples[i]);
- if (s > burnin && s % every == 0) {
- for (unsigned j = 0; j < samples[i].size(); ++j) {
- const TRule& rule = *hgs[i].edges_[samples[i][j]].rule_;
- ++acc[rule];
- ++tot[rule.lhs_];
- }
- }
- }
- }
- cerr << endl;
- for (boost::unordered_map<TRule,unsigned>::iterator it = acc.begin(); it != acc.end(); ++it) {
- cout << it->first << " MyProb=" << log(it->second)-log(tot[it->first.lhs_]) << endl;
- }
-}
-
-template <typename Base>
-void ModelAndData<Base>::ResampleDerivation(const Hypergraph& hg, vector<int>* sampled_deriv) {
- vector<int> cur;
- cur.swap(*sampled_deriv);
-
- const prob_t p_cur = Likelihood();
- DecrementDerivation(hg, cur);
- if (cur.empty()) {
- // first iteration, create restaurants
- for (int i = 0; i < hg.edges_.size(); ++i)
- RuleCRP(hg.edges_[i].rule_->lhs_);
- }
- MHSamplerEdgeProb<SimpleBase> wf(hg, rules, logp0, cur.empty());
-// MHSamplerEdgeProb<SimpleBase> wf(hg, rules, logp0, false);
- const prob_t q_cur = wf.DerivationProb(cur);
- vector<prob_t> node_probs;
- Inside<prob_t, MHSamplerEdgeProb<SimpleBase> >(hg, &node_probs, wf);
- queue<unsigned> q;
- q.push(hg.nodes_.size() - 3);
- while(!q.empty()) {
- unsigned cur_node_id = q.front();
-// cerr << "NODE=" << cur_node_id << endl;
- q.pop();
- const Hypergraph::Node& node = hg.nodes_[cur_node_id];
- const unsigned num_in_edges = node.in_edges_.size();
- unsigned sampled_edge = 0;
- if (num_in_edges == 1) {
- sampled_edge = node.in_edges_[0];
- } else {
- prob_t z;
- assert(num_in_edges > 1);
- SampleSet<prob_t> ss;
- for (unsigned j = 0; j < num_in_edges; ++j) {
- const Hypergraph::Edge& edge = hg.edges_[node.in_edges_[j]];
- prob_t p = wf.edge_probs[edge.id_]; // edge proposal prob
- for (unsigned k = 0; k < edge.tail_nodes_.size(); ++k)
- p *= node_probs[edge.tail_nodes_[k]];
- ss.add(p);
-// cerr << log(ss[j]) << " ||| " << edge.rule_->AsString() << endl;
- z += p;
- }
-// for (unsigned j = 0; j < num_in_edges; ++j) {
-// const Hypergraph::Edge& edge = hg.edges_[node.in_edges_[j]];
-// cerr << exp(log(ss[j] / z)) << " ||| " << edge.rule_->AsString() << endl;
-// }
-// cerr << " --- \n";
- sampled_edge = node.in_edges_[rng->SelectSample(ss)];
- }
- sampled_deriv->push_back(sampled_edge);
- const Hypergraph::Edge& edge = hg.edges_[sampled_edge];
- for (unsigned j = 0; j < edge.tail_nodes_.size(); ++j) {
- q.push(edge.tail_nodes_[j]);
- }
- }
- IncrementDerivation(hg, *sampled_deriv);
-
-// cerr << "sampled derivation contains " << sampled_deriv->size() << " edges\n";
-// cerr << "DERIV:\n";
-// for (int i = 0; i < sampled_deriv->size(); ++i) {
-// cerr << " " << hg.edges_[(*sampled_deriv)[i]].rule_->AsString() << endl;
-// }
-
- if (cur.empty()) return; // accept first sample
-
- ++mh_samples;
- // only need to do MH if proposal is different to current state
- if (cur != *sampled_deriv) {
- const prob_t q_prop = wf.DerivationProb(*sampled_deriv);
- const prob_t p_prop = Likelihood();
- if (!rng->AcceptMetropolisHastings(p_prop, p_cur, q_prop, q_cur)) {
- ++mh_rejects;
- DecrementDerivation(hg, *sampled_deriv);
- IncrementDerivation(hg, cur);
- swap(cur, *sampled_deriv);
- }
- }
-}
-
-int main(int argc, char** argv) {
- rng = new MT19937;
- ModelAndData<SimpleBase> m;
- m.SampleCorpus("./hgs", 50);
- // m.SampleCorpus("./btec/hgs", 5000);
- return 0;
-}
-
diff --git a/gi/pf/cfg_wfst_composer.cc b/gi/pf/cfg_wfst_composer.cc
deleted file mode 100644
index 19c0875d..00000000
--- a/gi/pf/cfg_wfst_composer.cc
+++ /dev/null
@@ -1,731 +0,0 @@
-#include "cfg_wfst_composer.h"
-
-#include <iostream>
-#include <fstream>
-#include <map>
-#include <queue>
-#include <tr1/unordered_map>
-#include <tr1/unordered_set>
-
-#include <boost/shared_ptr.hpp>
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-#include "fast_lexical_cast.hpp"
-
-#include "phrasetable_fst.h"
-#include "sparse_vector.h"
-#include "tdict.h"
-#include "hg.h"
-#include "hg_remove_eps.h"
-
-namespace po = boost::program_options;
-using namespace std;
-using namespace std::tr1;
-
-WFSTNode::~WFSTNode() {}
-WFST::~WFST() {}
-
-// Define the following macro if you want to see lots of debugging output
-// when you run the chart parser
-#undef DEBUG_CHART_PARSER
-
-// A few constants used by the chart parser ///////////////
-static const int kMAX_NODES = 2000000;
-static const string kPHRASE_STRING = "X";
-static bool constants_need_init = true;
-static WordID kUNIQUE_START;
-static WordID kPHRASE;
-static TRulePtr kX1X2;
-static TRulePtr kX1;
-static WordID kEPS;
-static TRulePtr kEPSRule;
-
-static void InitializeConstants() {
- if (constants_need_init) {
- kPHRASE = TD::Convert(kPHRASE_STRING) * -1;
- kUNIQUE_START = TD::Convert("S") * -1;
- kX1X2.reset(new TRule("[X] ||| [X,1] [X,2] ||| [X,1] [X,2]"));
- kX1.reset(new TRule("[X] ||| [X,1] ||| [X,1]"));
- kEPSRule.reset(new TRule("[X] ||| <eps> ||| <eps>"));
- kEPS = TD::Convert("<eps>");
- constants_need_init = false;
- }
-}
-////////////////////////////////////////////////////////////
-
-class EGrammarNode {
- friend bool CFG_WFSTComposer::Compose(const Hypergraph& src_forest, Hypergraph* trg_forest);
- friend void AddGrammarRule(const string& r, map<WordID, EGrammarNode>* g);
- public:
-#ifdef DEBUG_CHART_PARSER
- string hint;
-#endif
- EGrammarNode() : is_some_rule_complete(false), is_root(false) {}
- const map<WordID, EGrammarNode>& GetTerminals() const { return tptr; }
- const map<WordID, EGrammarNode>& GetNonTerminals() const { return ntptr; }
- bool HasNonTerminals() const { return (!ntptr.empty()); }
- bool HasTerminals() const { return (!tptr.empty()); }
- bool RuleCompletes() const {
- return (is_some_rule_complete || (ntptr.empty() && tptr.empty()));
- }
- bool GrammarContinues() const {
- return !(ntptr.empty() && tptr.empty());
- }
- bool IsRoot() const {
- return is_root;
- }
- // these are the features associated with the rule from the start
- // node up to this point. If you use these features, you must
- // not Extend() this rule.
- const SparseVector<double>& GetCFGProductionFeatures() const {
- return input_features;
- }
-
- const EGrammarNode* Extend(const WordID& t) const {
- if (t < 0) {
- map<WordID, EGrammarNode>::const_iterator it = ntptr.find(t);
- if (it == ntptr.end()) return NULL;
- return &it->second;
- } else {
- map<WordID, EGrammarNode>::const_iterator it = tptr.find(t);
- if (it == tptr.end()) return NULL;
- return &it->second;
- }
- }
-
- private:
- map<WordID, EGrammarNode> tptr;
- map<WordID, EGrammarNode> ntptr;
- SparseVector<double> input_features;
- bool is_some_rule_complete;
- bool is_root;
-};
-typedef map<WordID, EGrammarNode> EGrammar; // indexed by the rule LHS
-
-// edges are immutable once created
-struct Edge {
-#ifdef DEBUG_CHART_PARSER
- static int id_count;
- const int id;
-#endif
- const WordID cat; // lhs side of rule proved/being proved
- const EGrammarNode* const dot; // dot position
- const WFSTNode* const q; // start of span
- const WFSTNode* const r; // end of span
- const Edge* const active_parent; // back pointer, NULL for PREDICT items
- const Edge* const passive_parent; // back pointer, NULL for SCAN and PREDICT items
- TRulePtr tps; // translations
- boost::shared_ptr<SparseVector<double> > features; // features from CFG rule
-
- bool IsPassive() const {
- // when a rule is completed, this value will be set
- return static_cast<bool>(features);
- }
- bool IsActive() const { return !IsPassive(); }
- bool IsInitial() const {
- return !(active_parent || passive_parent);
- }
- bool IsCreatedByScan() const {
- return active_parent && !passive_parent && !dot->IsRoot();
- }
- bool IsCreatedByPredict() const {
- return dot->IsRoot();
- }
- bool IsCreatedByComplete() const {
- return active_parent && passive_parent;
- }
-
- // constructor for PREDICT
- Edge(WordID c, const EGrammarNode* d, const WFSTNode* q_and_r) :
-#ifdef DEBUG_CHART_PARSER
- id(++id_count),
-#endif
- cat(c), dot(d), q(q_and_r), r(q_and_r), active_parent(NULL), passive_parent(NULL), tps() {}
- Edge(WordID c, const EGrammarNode* d, const WFSTNode* q_and_r, const Edge* act_parent) :
-#ifdef DEBUG_CHART_PARSER
- id(++id_count),
-#endif
- cat(c), dot(d), q(q_and_r), r(q_and_r), active_parent(act_parent), passive_parent(NULL), tps() {}
-
- // constructors for SCAN
- Edge(WordID c, const EGrammarNode* d, const WFSTNode* i, const WFSTNode* j,
- const Edge* act_par, const TRulePtr& translations) :
-#ifdef DEBUG_CHART_PARSER
- id(++id_count),
-#endif
- cat(c), dot(d), q(i), r(j), active_parent(act_par), passive_parent(NULL), tps(translations) {}
-
- Edge(WordID c, const EGrammarNode* d, const WFSTNode* i, const WFSTNode* j,
- const Edge* act_par, const TRulePtr& translations,
- const SparseVector<double>& feats) :
-#ifdef DEBUG_CHART_PARSER
- id(++id_count),
-#endif
- cat(c), dot(d), q(i), r(j), active_parent(act_par), passive_parent(NULL), tps(translations),
- features(new SparseVector<double>(feats)) {}
-
- // constructors for COMPLETE
- Edge(WordID c, const EGrammarNode* d, const WFSTNode* i, const WFSTNode* j,
- const Edge* act_par, const Edge *pas_par) :
-#ifdef DEBUG_CHART_PARSER
- id(++id_count),
-#endif
- cat(c), dot(d), q(i), r(j), active_parent(act_par), passive_parent(pas_par), tps() {
- assert(pas_par->IsPassive());
- assert(act_par->IsActive());
- }
-
- Edge(WordID c, const EGrammarNode* d, const WFSTNode* i, const WFSTNode* j,
- const Edge* act_par, const Edge *pas_par, const SparseVector<double>& feats) :
-#ifdef DEBUG_CHART_PARSER
- id(++id_count),
-#endif
- cat(c), dot(d), q(i), r(j), active_parent(act_par), passive_parent(pas_par), tps(),
- features(new SparseVector<double>(feats)) {
- assert(pas_par->IsPassive());
- assert(act_par->IsActive());
- }
-
- // constructor for COMPLETE query
- Edge(const WFSTNode* _r) :
-#ifdef DEBUG_CHART_PARSER
- id(0),
-#endif
- cat(0), dot(NULL), q(NULL),
- r(_r), active_parent(NULL), passive_parent(NULL), tps() {}
- // constructor for MERGE quere
- Edge(const WFSTNode* _q, int) :
-#ifdef DEBUG_CHART_PARSER
- id(0),
-#endif
- cat(0), dot(NULL), q(_q),
- r(NULL), active_parent(NULL), passive_parent(NULL), tps() {}
-};
-#ifdef DEBUG_CHART_PARSER
-int Edge::id_count = 0;
-#endif
-
-ostream& operator<<(ostream& os, const Edge& e) {
- string type = "PREDICT";
- if (e.IsCreatedByScan())
- type = "SCAN";
- else if (e.IsCreatedByComplete())
- type = "COMPLETE";
- os << "["
-#ifdef DEBUG_CHART_PARSER
- << '(' << e.id << ") "
-#else
- << '(' << &e << ") "
-#endif
- << "q=" << e.q << ", r=" << e.r
- << ", cat="<< TD::Convert(e.cat*-1) << ", dot="
- << e.dot
-#ifdef DEBUG_CHART_PARSER
- << e.dot->hint
-#endif
- << (e.IsActive() ? ", Active" : ", Passive")
- << ", " << type;
-#ifdef DEBUG_CHART_PARSER
- if (e.active_parent) { os << ", act.parent=(" << e.active_parent->id << ')'; }
- if (e.passive_parent) { os << ", psv.parent=(" << e.passive_parent->id << ')'; }
-#endif
- if (e.tps) { os << ", tps=" << e.tps->AsString(); }
- return os << ']';
-}
-
-struct Traversal {
- const Edge* const edge; // result from the active / passive combination
- const Edge* const active;
- const Edge* const passive;
- Traversal(const Edge* me, const Edge* a, const Edge* p) : edge(me), active(a), passive(p) {}
-};
-
-struct UniqueTraversalHash {
- size_t operator()(const Traversal* t) const {
- size_t x = 5381;
- x = ((x << 5) + x) ^ reinterpret_cast<size_t>(t->active);
- x = ((x << 5) + x) ^ reinterpret_cast<size_t>(t->passive);
- x = ((x << 5) + x) ^ t->edge->IsActive();
- return x;
- }
-};
-
-struct UniqueTraversalEquals {
- size_t operator()(const Traversal* a, const Traversal* b) const {
- return (a->passive == b->passive && a->active == b->active && a->edge->IsActive() == b->edge->IsActive());
- }
-};
-
-struct UniqueEdgeHash {
- size_t operator()(const Edge* e) const {
- size_t x = 5381;
- if (e->IsActive()) {
- x = ((x << 5) + x) ^ reinterpret_cast<size_t>(e->dot);
- x = ((x << 5) + x) ^ reinterpret_cast<size_t>(e->q);
- x = ((x << 5) + x) ^ reinterpret_cast<size_t>(e->r);
- x = ((x << 5) + x) ^ static_cast<size_t>(e->cat);
- x += 13;
- } else { // with passive edges, we don't care about the dot
- x = ((x << 5) + x) ^ reinterpret_cast<size_t>(e->q);
- x = ((x << 5) + x) ^ reinterpret_cast<size_t>(e->r);
- x = ((x << 5) + x) ^ static_cast<size_t>(e->cat);
- }
- return x;
- }
-};
-
-struct UniqueEdgeEquals {
- bool operator()(const Edge* a, const Edge* b) const {
- if (a->IsActive() != b->IsActive()) return false;
- if (a->IsActive()) {
- return (a->cat == b->cat) && (a->dot == b->dot) && (a->q == b->q) && (a->r == b->r);
- } else {
- return (a->cat == b->cat) && (a->q == b->q) && (a->r == b->r);
- }
- }
-};
-
-struct REdgeHash {
- size_t operator()(const Edge* e) const {
- size_t x = 5381;
- x = ((x << 5) + x) ^ reinterpret_cast<size_t>(e->r);
- return x;
- }
-};
-
-struct REdgeEquals {
- bool operator()(const Edge* a, const Edge* b) const {
- return (a->r == b->r);
- }
-};
-
-struct QEdgeHash {
- size_t operator()(const Edge* e) const {
- size_t x = 5381;
- x = ((x << 5) + x) ^ reinterpret_cast<size_t>(e->q);
- return x;
- }
-};
-
-struct QEdgeEquals {
- bool operator()(const Edge* a, const Edge* b) const {
- return (a->q == b->q);
- }
-};
-
-struct EdgeQueue {
- queue<const Edge*> q;
- EdgeQueue() {}
- void clear() { while(!q.empty()) q.pop(); }
- bool HasWork() const { return !q.empty(); }
- const Edge* Next() { const Edge* res = q.front(); q.pop(); return res; }
- void AddEdge(const Edge* s) { q.push(s); }
-};
-
-class CFG_WFSTComposerImpl {
- public:
- CFG_WFSTComposerImpl(WordID start_cat,
- const WFSTNode* q_0,
- const WFSTNode* q_final) : start_cat_(start_cat), q_0_(q_0), q_final_(q_final) {}
-
- // returns false if the intersection is empty
- bool Compose(const EGrammar& g, Hypergraph* forest) {
- goal_node = NULL;
- EGrammar::const_iterator sit = g.find(start_cat_);
- forest->ReserveNodes(kMAX_NODES);
- assert(sit != g.end());
- Edge* init = new Edge(start_cat_, &sit->second, q_0_);
- assert(IncorporateNewEdge(init));
- while (exp_agenda.HasWork() || agenda.HasWork()) {
- while(exp_agenda.HasWork()) {
- const Edge* edge = exp_agenda.Next();
- FinishEdge(edge, forest);
- }
- if (agenda.HasWork()) {
- const Edge* edge = agenda.Next();
-#ifdef DEBUG_CHART_PARSER
- cerr << "processing (" << edge->id << ')' << endl;
-#endif
- if (edge->IsActive()) {
- if (edge->dot->HasTerminals())
- DoScan(edge);
- if (edge->dot->HasNonTerminals()) {
- DoMergeWithPassives(edge);
- DoPredict(edge, g);
- }
- } else {
- DoComplete(edge);
- }
- }
- }
- if (goal_node) {
- forest->PruneUnreachable(goal_node->id_);
- RemoveEpsilons(forest, kEPS);
- }
- FreeAll();
- return goal_node;
- }
-
- void FreeAll() {
- for (int i = 0; i < free_list_.size(); ++i)
- delete free_list_[i];
- free_list_.clear();
- for (int i = 0; i < traversal_free_list_.size(); ++i)
- delete traversal_free_list_[i];
- traversal_free_list_.clear();
- all_traversals.clear();
- exp_agenda.clear();
- agenda.clear();
- tps2node.clear();
- edge2node.clear();
- all_edges.clear();
- passive_edges.clear();
- active_edges.clear();
- }
-
- ~CFG_WFSTComposerImpl() {
- FreeAll();
- }
-
- // returns the total number of edges created during composition
- int EdgesCreated() const {
- return free_list_.size();
- }
-
- private:
- void DoScan(const Edge* edge) {
- // here, we assume that the FST will potentially have many more outgoing
- // edges than the grammar, which will be just a couple. If you want to
- // efficiently handle the case where both are relatively large, this code
- // will need to change how the intersection is done. The best general
- // solution would probably be the Baeza-Yates double binary search.
-
- const EGrammarNode* dot = edge->dot;
- const WFSTNode* r = edge->r;
- const map<WordID, EGrammarNode>& terms = dot->GetTerminals();
- for (map<WordID, EGrammarNode>::const_iterator git = terms.begin();
- git != terms.end(); ++git) {
-
- if (!(TD::Convert(git->first)[0] >= '0' && TD::Convert(git->first)[0] <= '9')) {
- std::cerr << "TERMINAL SYMBOL: " << TD::Convert(git->first) << endl;
- abort();
- }
- std::vector<std::pair<const WFSTNode*, TRulePtr> > extensions = r->ExtendInput(atoi(TD::Convert(git->first)));
- for (unsigned nsi = 0; nsi < extensions.size(); ++nsi) {
- const WFSTNode* next_r = extensions[nsi].first;
- const EGrammarNode* next_dot = &git->second;
- const bool grammar_continues = next_dot->GrammarContinues();
- const bool rule_completes = next_dot->RuleCompletes();
- if (extensions[nsi].second)
- cerr << "!!! " << extensions[nsi].second->AsString() << endl;
- // cerr << " rule completes: " << rule_completes << " after consuming " << TD::Convert(git->first) << endl;
- assert(grammar_continues || rule_completes);
- const SparseVector<double>& input_features = next_dot->GetCFGProductionFeatures();
- if (rule_completes)
- IncorporateNewEdge(new Edge(edge->cat, next_dot, edge->q, next_r, edge, extensions[nsi].second, input_features));
- if (grammar_continues)
- IncorporateNewEdge(new Edge(edge->cat, next_dot, edge->q, next_r, edge, extensions[nsi].second));
- }
- }
- }
-
- void DoPredict(const Edge* edge, const EGrammar& g) {
- const EGrammarNode* dot = edge->dot;
- const map<WordID, EGrammarNode>& non_terms = dot->GetNonTerminals();
- for (map<WordID, EGrammarNode>::const_iterator git = non_terms.begin();
- git != non_terms.end(); ++git) {
- const WordID nt_to_predict = git->first;
- //cerr << edge->id << " -- " << TD::Convert(nt_to_predict*-1) << endl;
- EGrammar::const_iterator egi = g.find(nt_to_predict);
- if (egi == g.end()) {
- cerr << "[ERROR] Can't find any grammar rules with a LHS of type "
- << TD::Convert(-1*nt_to_predict) << '!' << endl;
- continue;
- }
- assert(edge->IsActive());
- const EGrammarNode* new_dot = &egi->second;
- Edge* new_edge = new Edge(nt_to_predict, new_dot, edge->r, edge);
- IncorporateNewEdge(new_edge);
- }
- }
-
- void DoComplete(const Edge* passive) {
-#ifdef DEBUG_CHART_PARSER
- cerr << " complete: " << *passive << endl;
-#endif
- const WordID completed_nt = passive->cat;
- const WFSTNode* q = passive->q;
- const WFSTNode* next_r = passive->r;
- const Edge query(q);
- const pair<unordered_multiset<const Edge*, REdgeHash, REdgeEquals>::iterator,
- unordered_multiset<const Edge*, REdgeHash, REdgeEquals>::iterator > p =
- active_edges.equal_range(&query);
- for (unordered_multiset<const Edge*, REdgeHash, REdgeEquals>::iterator it = p.first;
- it != p.second; ++it) {
- const Edge* active = *it;
-#ifdef DEBUG_CHART_PARSER
- cerr << " pos: " << *active << endl;
-#endif
- const EGrammarNode* next_dot = active->dot->Extend(completed_nt);
- if (!next_dot) continue;
- const SparseVector<double>& input_features = next_dot->GetCFGProductionFeatures();
- // add up to 2 rules
- if (next_dot->RuleCompletes())
- IncorporateNewEdge(new Edge(active->cat, next_dot, active->q, next_r, active, passive, input_features));
- if (next_dot->GrammarContinues())
- IncorporateNewEdge(new Edge(active->cat, next_dot, active->q, next_r, active, passive));
- }
- }
-
- void DoMergeWithPassives(const Edge* active) {
- // edge is active, has non-terminals, we need to find the passives that can extend it
- assert(active->IsActive());
- assert(active->dot->HasNonTerminals());
-#ifdef DEBUG_CHART_PARSER
- cerr << " merge active with passives: ACT=" << *active << endl;
-#endif
- const Edge query(active->r, 1);
- const pair<unordered_multiset<const Edge*, QEdgeHash, QEdgeEquals>::iterator,
- unordered_multiset<const Edge*, QEdgeHash, QEdgeEquals>::iterator > p =
- passive_edges.equal_range(&query);
- for (unordered_multiset<const Edge*, QEdgeHash, QEdgeEquals>::iterator it = p.first;
- it != p.second; ++it) {
- const Edge* passive = *it;
- const EGrammarNode* next_dot = active->dot->Extend(passive->cat);
- if (!next_dot) continue;
- const WFSTNode* next_r = passive->r;
- const SparseVector<double>& input_features = next_dot->GetCFGProductionFeatures();
- if (next_dot->RuleCompletes())
- IncorporateNewEdge(new Edge(active->cat, next_dot, active->q, next_r, active, passive, input_features));
- if (next_dot->GrammarContinues())
- IncorporateNewEdge(new Edge(active->cat, next_dot, active->q, next_r, active, passive));
- }
- }
-
- // take ownership of edge memory, add to various indexes, etc
- // returns true if this edge is new
- bool IncorporateNewEdge(Edge* edge) {
- free_list_.push_back(edge);
- if (edge->passive_parent && edge->active_parent) {
- Traversal* t = new Traversal(edge, edge->active_parent, edge->passive_parent);
- traversal_free_list_.push_back(t);
- if (all_traversals.find(t) != all_traversals.end()) {
- return false;
- } else {
- all_traversals.insert(t);
- }
- }
- exp_agenda.AddEdge(edge);
- return true;
- }
-
- bool FinishEdge(const Edge* edge, Hypergraph* hg) {
- bool is_new = false;
- if (all_edges.find(edge) == all_edges.end()) {
-#ifdef DEBUG_CHART_PARSER
- cerr << *edge << " is NEW\n";
-#endif
- all_edges.insert(edge);
- is_new = true;
- if (edge->IsPassive()) passive_edges.insert(edge);
- if (edge->IsActive()) active_edges.insert(edge);
- agenda.AddEdge(edge);
- } else {
-#ifdef DEBUG_CHART_PARSER
- cerr << *edge << " is NOT NEW.\n";
-#endif
- }
- AddEdgeToTranslationForest(edge, hg);
- return is_new;
- }
-
- // build the translation forest
- void AddEdgeToTranslationForest(const Edge* edge, Hypergraph* hg) {
- assert(hg->nodes_.size() < kMAX_NODES);
- Hypergraph::Node* tps = NULL;
- // first add any target language rules
- if (edge->tps) {
- Hypergraph::Node*& node = tps2node[(size_t)edge->tps.get()];
- if (!node) {
- // cerr << "Creating phrases for " << edge->tps << endl;
- const TRulePtr& rule = edge->tps;
- node = hg->AddNode(kPHRASE);
- Hypergraph::Edge* hg_edge = hg->AddEdge(rule, Hypergraph::TailNodeVector());
- hg_edge->feature_values_ += rule->GetFeatureValues();
- hg->ConnectEdgeToHeadNode(hg_edge, node);
- }
- tps = node;
- }
- Hypergraph::Node*& head_node = edge2node[edge];
- if (!head_node)
- head_node = hg->AddNode(kPHRASE);
- if (edge->cat == start_cat_ && edge->q == q_0_ && edge->r == q_final_ && edge->IsPassive()) {
- assert(goal_node == NULL || goal_node == head_node);
- goal_node = head_node;
- }
- Hypergraph::TailNodeVector tail;
- SparseVector<double> extra;
- if (edge->IsCreatedByPredict()) {
- // extra.set_value(FD::Convert("predict"), 1);
- } else if (edge->IsCreatedByScan()) {
- tail.push_back(edge2node[edge->active_parent]->id_);
- if (tps) {
- tail.push_back(tps->id_);
- }
- //extra.set_value(FD::Convert("scan"), 1);
- } else if (edge->IsCreatedByComplete()) {
- tail.push_back(edge2node[edge->active_parent]->id_);
- tail.push_back(edge2node[edge->passive_parent]->id_);
- //extra.set_value(FD::Convert("complete"), 1);
- } else {
- assert(!"unexpected edge type!");
- }
- //cerr << head_node->id_ << "<--" << *edge << endl;
-
-#ifdef DEBUG_CHART_PARSER
- for (int i = 0; i < tail.size(); ++i)
- if (tail[i] == head_node->id_) {
- cerr << "ERROR: " << *edge << "\n i=" << i << endl;
- if (i == 1) { cerr << "\tP: " << *edge->passive_parent << endl; }
- if (i == 0) { cerr << "\tA: " << *edge->active_parent << endl; }
- assert(!"self-loop found!");
- }
-#endif
- Hypergraph::Edge* hg_edge = NULL;
- if (tail.size() == 0) {
- hg_edge = hg->AddEdge(kEPSRule, tail);
- } else if (tail.size() == 1) {
- hg_edge = hg->AddEdge(kX1, tail);
- } else if (tail.size() == 2) {
- hg_edge = hg->AddEdge(kX1X2, tail);
- }
- if (edge->features)
- hg_edge->feature_values_ += *edge->features;
- hg_edge->feature_values_ += extra;
- hg->ConnectEdgeToHeadNode(hg_edge, head_node);
- }
-
- Hypergraph::Node* goal_node;
- EdgeQueue exp_agenda;
- EdgeQueue agenda;
- unordered_map<size_t, Hypergraph::Node*> tps2node;
- unordered_map<const Edge*, Hypergraph::Node*, UniqueEdgeHash, UniqueEdgeEquals> edge2node;
- unordered_set<const Traversal*, UniqueTraversalHash, UniqueTraversalEquals> all_traversals;
- unordered_set<const Edge*, UniqueEdgeHash, UniqueEdgeEquals> all_edges;
- unordered_multiset<const Edge*, QEdgeHash, QEdgeEquals> passive_edges;
- unordered_multiset<const Edge*, REdgeHash, REdgeEquals> active_edges;
- vector<Edge*> free_list_;
- vector<Traversal*> traversal_free_list_;
- const WordID start_cat_;
- const WFSTNode* const q_0_;
- const WFSTNode* const q_final_;
-};
-
-#ifdef DEBUG_CHART_PARSER
-static string TrimRule(const string& r) {
- size_t start = r.find(" |||") + 5;
- size_t end = r.rfind(" |||");
- return r.substr(start, end - start);
-}
-#endif
-
-void AddGrammarRule(const string& r, EGrammar* g) {
- const size_t pos = r.find(" ||| ");
- if (pos == string::npos || r[0] != '[') {
- cerr << "Bad rule: " << r << endl;
- return;
- }
- const size_t rpos = r.rfind(" ||| ");
- string feats;
- string rs = r;
- if (rpos != pos) {
- feats = r.substr(rpos + 5);
- rs = r.substr(0, rpos);
- }
- string rhs = rs.substr(pos + 5);
- string trule = rs + " ||| " + rhs + " ||| " + feats;
- TRule tr(trule);
- cerr << "X: " << tr.e_[0] << endl;
-#ifdef DEBUG_CHART_PARSER
- string hint_last_rule;
-#endif
- EGrammarNode* cur = &(*g)[tr.GetLHS()];
- cur->is_root = true;
- for (int i = 0; i < tr.FLength(); ++i) {
- WordID sym = tr.f()[i];
-#ifdef DEBUG_CHART_PARSER
- hint_last_rule = TD::Convert(sym < 0 ? -sym : sym);
- cur->hint += " <@@> (*" + hint_last_rule + ") " + TrimRule(tr.AsString());
-#endif
- if (sym < 0)
- cur = &cur->ntptr[sym];
- else
- cur = &cur->tptr[sym];
- }
-#ifdef DEBUG_CHART_PARSER
- cur->hint += " <@@> (" + hint_last_rule + "*) " + TrimRule(tr.AsString());
-#endif
- cur->is_some_rule_complete = true;
- cur->input_features = tr.GetFeatureValues();
-}
-
-CFG_WFSTComposer::~CFG_WFSTComposer() {
- delete pimpl_;
-}
-
-CFG_WFSTComposer::CFG_WFSTComposer(const WFST& wfst) {
- InitializeConstants();
- pimpl_ = new CFG_WFSTComposerImpl(kUNIQUE_START, wfst.Initial(), wfst.Final());
-}
-
-bool CFG_WFSTComposer::Compose(const Hypergraph& src_forest, Hypergraph* trg_forest) {
- // first, convert the src forest into an EGrammar
- EGrammar g;
- const int nedges = src_forest.edges_.size();
- const int nnodes = src_forest.nodes_.size();
- vector<int> cats(nnodes);
- bool assign_cats = false;
- for (int i = 0; i < nnodes; ++i)
- if (assign_cats) {
- cats[i] = TD::Convert("CAT_" + boost::lexical_cast<string>(i)) * -1;
- } else {
- cats[i] = src_forest.nodes_[i].cat_;
- }
- // construct the grammar
- for (int i = 0; i < nedges; ++i) {
- const Hypergraph::Edge& edge = src_forest.edges_[i];
- const vector<WordID>& src = edge.rule_->f();
- EGrammarNode* cur = &g[cats[edge.head_node_]];
- cur->is_root = true;
- int ntc = 0;
- for (int j = 0; j < src.size(); ++j) {
- WordID sym = src[j];
- if (sym <= 0) {
- sym = cats[edge.tail_nodes_[ntc]];
- ++ntc;
- cur = &cur->ntptr[sym];
- } else {
- cur = &cur->tptr[sym];
- }
- }
- cur->is_some_rule_complete = true;
- cur->input_features = edge.feature_values_;
- }
- EGrammarNode& goal_rule = g[kUNIQUE_START];
- assert((goal_rule.ntptr.size() == 1 && goal_rule.tptr.size() == 0) ||
- (goal_rule.ntptr.size() == 0 && goal_rule.tptr.size() == 1));
-
- return pimpl_->Compose(g, trg_forest);
-}
-
-bool CFG_WFSTComposer::Compose(istream* in, Hypergraph* trg_forest) {
- EGrammar g;
- while(*in) {
- string line;
- getline(*in, line);
- if (line.empty()) continue;
- AddGrammarRule(line, &g);
- }
-
- return pimpl_->Compose(g, trg_forest);
-}
diff --git a/gi/pf/cfg_wfst_composer.h b/gi/pf/cfg_wfst_composer.h
deleted file mode 100644
index cf47f459..00000000
--- a/gi/pf/cfg_wfst_composer.h
+++ /dev/null
@@ -1,46 +0,0 @@
-#ifndef _CFG_WFST_COMPOSER_H_
-#define _CFG_WFST_COMPOSER_H_
-
-#include <iostream>
-#include <vector>
-#include <utility>
-
-#include "trule.h"
-#include "wordid.h"
-
-class CFG_WFSTComposerImpl;
-class Hypergraph;
-
-struct WFSTNode {
- virtual ~WFSTNode();
- // returns the next states reachable by consuming srcindex (which identifies a word)
- // paired with the output string generated by taking that transition.
- virtual std::vector<std::pair<const WFSTNode*,TRulePtr> > ExtendInput(unsigned srcindex) const = 0;
-};
-
-struct WFST {
- virtual ~WFST();
- virtual const WFSTNode* Final() const = 0;
- virtual const WFSTNode* Initial() const = 0;
-};
-
-class CFG_WFSTComposer {
- public:
- ~CFG_WFSTComposer();
- explicit CFG_WFSTComposer(const WFST& wfst);
- bool Compose(const Hypergraph& in_forest, Hypergraph* trg_forest);
-
- // reads the grammar from a file. There must be a single top-level
- // S -> X rule. Anything else is possible. Format is:
- // [S] ||| [SS,1]
- // [SS] ||| [NP,1] [VP,2] ||| Feature1=0.2 Feature2=-2.3
- // [SS] ||| [VP,1] [NP,2] ||| Feature1=0.8
- // [NP] ||| [DET,1] [N,2] ||| Feature3=2
- // ...
- bool Compose(std::istream* grammar_file, Hypergraph* trg_forest);
-
- private:
- CFG_WFSTComposerImpl* pimpl_;
-};
-
-#endif
diff --git a/gi/pf/conditional_pseg.h b/gi/pf/conditional_pseg.h
deleted file mode 100644
index 81ddb206..00000000
--- a/gi/pf/conditional_pseg.h
+++ /dev/null
@@ -1,275 +0,0 @@
-#ifndef _CONDITIONAL_PSEG_H_
-#define _CONDITIONAL_PSEG_H_
-
-#include <vector>
-#include <tr1/unordered_map>
-#include <boost/functional/hash.hpp>
-#include <iostream>
-
-#include "m.h"
-#include "prob.h"
-#include "ccrp_nt.h"
-#include "mfcr.h"
-#include "trule.h"
-#include "base_distributions.h"
-#include "tdict.h"
-
-template <typename ConditionalBaseMeasure>
-struct MConditionalTranslationModel {
- explicit MConditionalTranslationModel(ConditionalBaseMeasure& rcp0) :
- rp0(rcp0), d(0.5), strength(1.0), lambdas(1, prob_t::One()), p0s(1) {}
-
- void Summary() const {
- std::cerr << "Number of conditioning contexts: " << r.size() << std::endl;
- for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) {
- std::cerr << TD::GetString(it->first) << " \t(d=" << it->second.discount() << ",s=" << it->second.strength() << ") --------------------------" << std::endl;
- for (MFCR<1,TRule>::const_iterator i2 = it->second.begin(); i2 != it->second.end(); ++i2)
- std::cerr << " " << i2->second.total_dish_count_ << '\t' << i2->first << std::endl;
- }
- }
-
- double log_likelihood(const double& dd, const double& aa) const {
- if (aa <= -dd) return -std::numeric_limits<double>::infinity();
- //double llh = Md::log_beta_density(dd, 10, 3) + Md::log_gamma_density(aa, 1, 1);
- double llh = Md::log_beta_density(dd, 1, 1) +
- Md::log_gamma_density(dd + aa, 1, 1);
- typename std::tr1::unordered_map<std::vector<WordID>, MFCR<1,TRule>, boost::hash<std::vector<WordID> > >::const_iterator it;
- for (it = r.begin(); it != r.end(); ++it)
- llh += it->second.log_crp_prob(dd, aa);
- return llh;
- }
-
- struct DiscountResampler {
- DiscountResampler(const MConditionalTranslationModel& m) : m_(m) {}
- const MConditionalTranslationModel& m_;
- double operator()(const double& proposed_discount) const {
- return m_.log_likelihood(proposed_discount, m_.strength);
- }
- };
-
- struct AlphaResampler {
- AlphaResampler(const MConditionalTranslationModel& m) : m_(m) {}
- const MConditionalTranslationModel& m_;
- double operator()(const double& proposed_strength) const {
- return m_.log_likelihood(m_.d, proposed_strength);
- }
- };
-
- void ResampleHyperparameters(MT19937* rng) {
- typename std::tr1::unordered_map<std::vector<WordID>, MFCR<1,TRule>, boost::hash<std::vector<WordID> > >::iterator it;
-#if 1
- for (it = r.begin(); it != r.end(); ++it) {
- it->second.resample_hyperparameters(rng);
- }
-#else
- const unsigned nloop = 5;
- const unsigned niterations = 10;
- DiscountResampler dr(*this);
- AlphaResampler ar(*this);
- for (int iter = 0; iter < nloop; ++iter) {
- strength = slice_sampler1d(ar, strength, *rng, -d + std::numeric_limits<double>::min(),
- std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations);
- double min_discount = std::numeric_limits<double>::min();
- if (strength < 0.0) min_discount -= strength;
- d = slice_sampler1d(dr, d, *rng, min_discount,
- 1.0, 0.0, niterations, 100*niterations);
- }
- strength = slice_sampler1d(ar, strength, *rng, -d,
- std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations);
- std::cerr << "MConditionalTranslationModel(d=" << d << ",s=" << strength << ") = " << log_likelihood(d, strength) << std::endl;
- for (it = r.begin(); it != r.end(); ++it) {
- it->second.set_discount(d);
- it->second.set_strength(strength);
- }
-#endif
- }
-
- int DecrementRule(const TRule& rule, MT19937* rng) {
- RuleModelHash::iterator it = r.find(rule.f_);
- assert(it != r.end());
- const TableCount delta = it->second.decrement(rule, rng);
- if (delta.count) {
- if (it->second.num_customers() == 0) r.erase(it);
- }
- return delta.count;
- }
-
- int IncrementRule(const TRule& rule, MT19937* rng) {
- RuleModelHash::iterator it = r.find(rule.f_);
- if (it == r.end()) {
- //it = r.insert(make_pair(rule.f_, MFCR<1,TRule>(d, strength))).first;
- it = r.insert(make_pair(rule.f_, MFCR<1,TRule>(1,1,1,1,0.6, -0.12))).first;
- }
- p0s[0] = rp0(rule);
- TableCount delta = it->second.increment(rule, p0s.begin(), lambdas.begin(), rng);
- return delta.count;
- }
-
- prob_t RuleProbability(const TRule& rule) const {
- prob_t p;
- RuleModelHash::const_iterator it = r.find(rule.f_);
- if (it == r.end()) {
- p = rp0(rule);
- } else {
- p0s[0] = rp0(rule);
- p = it->second.prob(rule, p0s.begin(), lambdas.begin());
- }
- return p;
- }
-
- prob_t Likelihood() const {
- prob_t p; p.logeq(log_likelihood(d, strength));
- return p;
- }
-
- const ConditionalBaseMeasure& rp0;
- typedef std::tr1::unordered_map<std::vector<WordID>,
- MFCR<1, TRule>,
- boost::hash<std::vector<WordID> > > RuleModelHash;
- RuleModelHash r;
- double d, strength;
- std::vector<prob_t> lambdas;
- mutable std::vector<prob_t> p0s;
-};
-
-template <typename ConditionalBaseMeasure>
-struct ConditionalTranslationModel {
- explicit ConditionalTranslationModel(ConditionalBaseMeasure& rcp0) :
- rp0(rcp0) {}
-
- void Summary() const {
- std::cerr << "Number of conditioning contexts: " << r.size() << std::endl;
- for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) {
- std::cerr << TD::GetString(it->first) << " \t(\\alpha = " << it->second.alpha() << ") --------------------------" << std::endl;
- for (CCRP_NoTable<TRule>::const_iterator i2 = it->second.begin(); i2 != it->second.end(); ++i2)
- std::cerr << " " << i2->second << '\t' << i2->first << std::endl;
- }
- }
-
- void ResampleHyperparameters(MT19937* rng) {
- for (RuleModelHash::iterator it = r.begin(); it != r.end(); ++it)
- it->second.resample_hyperparameters(rng);
- }
-
- int DecrementRule(const TRule& rule) {
- RuleModelHash::iterator it = r.find(rule.f_);
- assert(it != r.end());
- int count = it->second.decrement(rule);
- if (count) {
- if (it->second.num_customers() == 0) r.erase(it);
- }
- return count;
- }
-
- int IncrementRule(const TRule& rule) {
- RuleModelHash::iterator it = r.find(rule.f_);
- if (it == r.end()) {
- it = r.insert(make_pair(rule.f_, CCRP_NoTable<TRule>(1.0, 1.0, 8.0))).first;
- }
- int count = it->second.increment(rule);
- return count;
- }
-
- void IncrementRules(const std::vector<TRulePtr>& rules) {
- for (int i = 0; i < rules.size(); ++i)
- IncrementRule(*rules[i]);
- }
-
- void DecrementRules(const std::vector<TRulePtr>& rules) {
- for (int i = 0; i < rules.size(); ++i)
- DecrementRule(*rules[i]);
- }
-
- prob_t RuleProbability(const TRule& rule) const {
- prob_t p;
- RuleModelHash::const_iterator it = r.find(rule.f_);
- if (it == r.end()) {
- p.logeq(log(rp0(rule)));
- } else {
- p.logeq(it->second.logprob(rule, log(rp0(rule))));
- }
- return p;
- }
-
- prob_t Likelihood() const {
- prob_t p = prob_t::One();
- for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) {
- prob_t q; q.logeq(it->second.log_crp_prob());
- p *= q;
- for (CCRP_NoTable<TRule>::const_iterator i2 = it->second.begin(); i2 != it->second.end(); ++i2)
- p *= rp0(i2->first);
- }
- return p;
- }
-
- const ConditionalBaseMeasure& rp0;
- typedef std::tr1::unordered_map<std::vector<WordID>,
- CCRP_NoTable<TRule>,
- boost::hash<std::vector<WordID> > > RuleModelHash;
- RuleModelHash r;
-};
-
-template <typename ConditionalBaseMeasure>
-struct ConditionalParallelSegementationModel {
- explicit ConditionalParallelSegementationModel(ConditionalBaseMeasure& rcp0) :
- tmodel(rcp0), base(prob_t::One()), aligns(1,1) {}
-
- ConditionalTranslationModel<ConditionalBaseMeasure> tmodel;
-
- void DecrementRule(const TRule& rule) {
- tmodel.DecrementRule(rule);
- }
-
- void IncrementRule(const TRule& rule) {
- tmodel.IncrementRule(rule);
- }
-
- void IncrementRulesAndAlignments(const std::vector<TRulePtr>& rules) {
- tmodel.IncrementRules(rules);
- for (int i = 0; i < rules.size(); ++i) {
- IncrementAlign(rules[i]->f_.size());
- }
- }
-
- void DecrementRulesAndAlignments(const std::vector<TRulePtr>& rules) {
- tmodel.DecrementRules(rules);
- for (int i = 0; i < rules.size(); ++i) {
- DecrementAlign(rules[i]->f_.size());
- }
- }
-
- prob_t RuleProbability(const TRule& rule) const {
- return tmodel.RuleProbability(rule);
- }
-
- void IncrementAlign(unsigned span) {
- if (aligns.increment(span)) {
- // TODO
- }
- }
-
- void DecrementAlign(unsigned span) {
- if (aligns.decrement(span)) {
- // TODO
- }
- }
-
- prob_t AlignProbability(unsigned span) const {
- prob_t p;
- p.logeq(aligns.logprob(span, Md::log_poisson(span, 1.0)));
- return p;
- }
-
- prob_t Likelihood() const {
- prob_t p; p.logeq(aligns.log_crp_prob());
- p *= base;
- p *= tmodel.Likelihood();
- return p;
- }
-
- prob_t base;
- CCRP_NoTable<unsigned> aligns;
-};
-
-#endif
-
diff --git a/gi/pf/condnaive.cc b/gi/pf/condnaive.cc
deleted file mode 100644
index 419731ac..00000000
--- a/gi/pf/condnaive.cc
+++ /dev/null
@@ -1,298 +0,0 @@
-#include <iostream>
-#include <tr1/memory>
-#include <queue>
-
-#include <boost/multi_array.hpp>
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-
-#include "base_distributions.h"
-#include "monotonic_pseg.h"
-#include "conditional_pseg.h"
-#include "trule.h"
-#include "tdict.h"
-#include "filelib.h"
-#include "dict.h"
-#include "sampler.h"
-#include "ccrp_nt.h"
-#include "corpus.h"
-
-using namespace std;
-using namespace std::tr1;
-namespace po = boost::program_options;
-
-static unsigned kMAX_SRC_PHRASE;
-static unsigned kMAX_TRG_PHRASE;
-
-void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
- po::options_description opts("Configuration options");
- opts.add_options()
- ("samples,s",po::value<unsigned>()->default_value(1000),"Number of samples")
- ("input,i",po::value<string>(),"Read parallel data from")
- ("max_src_phrase",po::value<unsigned>()->default_value(4),"Maximum length of source language phrases")
- ("max_trg_phrase",po::value<unsigned>()->default_value(4),"Maximum length of target language phrases")
- ("model1,m",po::value<string>(),"Model 1 parameters (used in base distribution)")
- ("model1_interpolation_weight",po::value<double>()->default_value(0.95),"Mixing proportion of model 1 with uniform target distribution")
- ("random_seed,S",po::value<uint32_t>(), "Random seed");
- po::options_description clo("Command line options");
- clo.add_options()
- ("config", po::value<string>(), "Configuration file")
- ("help,h", "Print this help message and exit");
- po::options_description dconfig_options, dcmdline_options;
- dconfig_options.add(opts);
- dcmdline_options.add(opts).add(clo);
-
- po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
- if (conf->count("config")) {
- ifstream config((*conf)["config"].as<string>().c_str());
- po::store(po::parse_config_file(config, dconfig_options), *conf);
- }
- po::notify(*conf);
-
- if (conf->count("help") || (conf->count("input") == 0)) {
- cerr << dcmdline_options << endl;
- exit(1);
- }
-}
-
-boost::shared_ptr<MT19937> prng;
-
-struct ModelAndData {
- explicit ModelAndData(ConditionalParallelSegementationModel<PhraseConditionalBase>& m, const vector<vector<int> >& ce, const vector<vector<int> >& cf, const set<int>& ve, const set<int>& vf) :
- model(m),
- rng(&*prng),
- corpuse(ce),
- corpusf(cf),
- vocabe(ve),
- vocabf(vf),
- mh_samples(),
- mh_rejects(),
- kX(-TD::Convert("X")),
- derivations(corpuse.size()) {}
-
- void ResampleHyperparameters() {
- }
-
- void InstantiateRule(const pair<short,short>& from,
- const pair<short,short>& to,
- const vector<int>& sentf,
- const vector<int>& sente,
- TRule* rule) const {
- rule->f_.clear();
- rule->e_.clear();
- rule->lhs_ = kX;
- for (short i = from.first; i < to.first; ++i)
- rule->f_.push_back(sentf[i]);
- for (short i = from.second; i < to.second; ++i)
- rule->e_.push_back(sente[i]);
- }
-
- void DecrementDerivation(const vector<pair<short,short> >& d, const vector<int>& sentf, const vector<int>& sente) {
- if (d.size() < 2) return;
- TRule x;
- for (int i = 1; i < d.size(); ++i) {
- InstantiateRule(d[i], d[i-1], sentf, sente, &x);
- model.DecrementRule(x);
- model.DecrementAlign(x.f_.size());
- }
- }
-
- void PrintDerivation(const vector<pair<short,short> >& d, const vector<int>& sentf, const vector<int>& sente) {
- if (d.size() < 2) return;
- TRule x;
- for (int i = 1; i < d.size(); ++i) {
- InstantiateRule(d[i], d[i-1], sentf, sente, &x);
- cerr << i << '/' << (d.size() - 1) << ": " << x << endl;
- }
- }
-
- void IncrementDerivation(const vector<pair<short,short> >& d, const vector<int>& sentf, const vector<int>& sente) {
- if (d.size() < 2) return;
- TRule x;
- for (int i = 1; i < d.size(); ++i) {
- InstantiateRule(d[i], d[i-1], sentf, sente, &x);
- model.IncrementRule(x);
- model.IncrementAlign(x.f_.size());
- }
- }
-
- prob_t Likelihood() const {
- return model.Likelihood();
- }
-
- prob_t DerivationProposalProbability(const vector<pair<short,short> >& d, const vector<int>& sentf, const vector<int>& sente) const {
- prob_t p = prob_t::One();
- TRule x;
- for (int i = 1; i < d.size(); ++i) {
- InstantiateRule(d[i], d[i-1], sentf, sente, &x);
- p *= model.RuleProbability(x);
- p *= model.AlignProbability(x.f_.size());
- }
- return p;
- }
-
- void Sample();
-
- ConditionalParallelSegementationModel<PhraseConditionalBase>& model;
- MT19937* rng;
- const vector<vector<int> >& corpuse, corpusf;
- const set<int>& vocabe, vocabf;
- unsigned mh_samples, mh_rejects;
- const int kX;
- vector<vector<pair<short, short> > > derivations;
-};
-
-void ModelAndData::Sample() {
- unsigned MAXK = kMAX_SRC_PHRASE;
- unsigned MAXL = kMAX_TRG_PHRASE;
- TRule x;
- x.lhs_ = -TD::Convert("X");
-
- for (int samples = 0; samples < 1000; ++samples) {
- if (samples % 1 == 0 && samples > 0) {
- //ResampleHyperparameters();
- cerr << " [" << samples << " LLH=" << log(Likelihood()) << " MH=" << ((double)mh_rejects / mh_samples) << "]\n";
- for (int i = 0; i < 10; ++i) {
- cerr << "SENTENCE: " << TD::GetString(corpusf[i]) << " ||| " << TD::GetString(corpuse[i]) << endl;
- PrintDerivation(derivations[i], corpusf[i], corpuse[i]);
- }
- static TRule xx("[X] ||| w n ||| s h ||| X=0");
- const CCRP_NoTable<TRule>& dcrp = model.tmodel.r.find(xx.f_)->second;
- for (CCRP_NoTable<TRule>::const_iterator it = dcrp.begin(); it != dcrp.end(); ++it) {
- cerr << "\t" << it->second << "\t" << it->first << endl;
- }
- }
- cerr << '.' << flush;
- for (int s = 0; s < corpuse.size(); ++s) {
- const vector<int>& sentf = corpusf[s];
- const vector<int>& sente = corpuse[s];
-// cerr << " CUSTOMERS: " << rules.num_customers() << endl;
-// cerr << "SENTENCE: " << TD::GetString(sentf) << " ||| " << TD::GetString(sente) << endl;
-
- vector<pair<short, short> >& deriv = derivations[s];
- const prob_t p_cur = Likelihood();
- DecrementDerivation(deriv, sentf, sente);
-
- boost::multi_array<prob_t, 2> a(boost::extents[sentf.size() + 1][sente.size() + 1]);
- boost::multi_array<prob_t, 4> trans(boost::extents[sentf.size() + 1][sente.size() + 1][MAXK][MAXL]);
- a[0][0] = prob_t::One();
- for (int i = 0; i < sentf.size(); ++i) {
- for (int j = 0; j < sente.size(); ++j) {
- const prob_t src_a = a[i][j];
- x.f_.clear();
- for (int k = 1; k <= MAXK; ++k) {
- if (i + k > sentf.size()) break;
- x.f_.push_back(sentf[i + k - 1]);
- x.e_.clear();
- const prob_t p_span = model.AlignProbability(k); // prob of consuming this much source
- for (int l = 1; l <= MAXL; ++l) {
- if (j + l > sente.size()) break;
- x.e_.push_back(sente[j + l - 1]);
- trans[i][j][k - 1][l - 1] = model.RuleProbability(x) * p_span;
- a[i + k][j + l] += src_a * trans[i][j][k - 1][l - 1];
- }
- }
- }
- }
-// cerr << "Inside: " << log(a[sentf.size()][sente.size()]) << endl;
- const prob_t q_cur = DerivationProposalProbability(deriv, sentf, sente);
-
- vector<pair<short,short> > newderiv;
- int cur_i = sentf.size();
- int cur_j = sente.size();
- while(cur_i > 0 && cur_j > 0) {
- newderiv.push_back(pair<short,short>(cur_i, cur_j));
-// cerr << "NODE: (" << cur_i << "," << cur_j << ")\n";
- SampleSet<prob_t> ss;
- vector<pair<short,short> > nexts;
- for (int k = 1; k <= MAXK; ++k) {
- const int hyp_i = cur_i - k;
- if (hyp_i < 0) break;
- for (int l = 1; l <= MAXL; ++l) {
- const int hyp_j = cur_j - l;
- if (hyp_j < 0) break;
- const prob_t& inside = a[hyp_i][hyp_j];
- if (inside == prob_t::Zero()) continue;
- const prob_t& transp = trans[hyp_i][hyp_j][k - 1][l - 1];
- if (transp == prob_t::Zero()) continue;
- const prob_t p = inside * transp;
- ss.add(p);
- nexts.push_back(pair<short,short>(hyp_i, hyp_j));
-// cerr << " (" << hyp_i << "," << hyp_j << ") <--- " << log(p) << endl;
- }
- }
-// cerr << " sample set has " << nexts.size() << " elements.\n";
- const int selected = rng->SelectSample(ss);
- cur_i = nexts[selected].first;
- cur_j = nexts[selected].second;
- }
- newderiv.push_back(pair<short,short>(0,0));
- const prob_t q_new = DerivationProposalProbability(newderiv, sentf, sente);
- IncrementDerivation(newderiv, sentf, sente);
-// cerr << "SANITY: " << q_new << " " <<log(DerivationProposalProbability(newderiv, sentf, sente)) << endl;
- if (deriv.empty()) { deriv = newderiv; continue; }
- ++mh_samples;
-
- if (deriv != newderiv) {
- const prob_t p_new = Likelihood();
-// cerr << "p_cur=" << log(p_cur) << "\t p_new=" << log(p_new) << endl;
-// cerr << "q_cur=" << log(q_cur) << "\t q_new=" << log(q_new) << endl;
- if (!rng->AcceptMetropolisHastings(p_new, p_cur, q_new, q_cur)) {
- ++mh_rejects;
- DecrementDerivation(newderiv, sentf, sente);
- IncrementDerivation(deriv, sentf, sente);
- } else {
-// cerr << " ACCEPT\n";
- deriv = newderiv;
- }
- }
- }
- }
-}
-
-int main(int argc, char** argv) {
- po::variables_map conf;
- InitCommandLine(argc, argv, &conf);
- kMAX_TRG_PHRASE = conf["max_trg_phrase"].as<unsigned>();
- kMAX_SRC_PHRASE = conf["max_src_phrase"].as<unsigned>();
-
- if (!conf.count("model1")) {
- cerr << argv[0] << "Please use --model1 to specify model 1 parameters\n";
- return 1;
- }
- if (conf.count("random_seed"))
- prng.reset(new MT19937(conf["random_seed"].as<uint32_t>()));
- else
- prng.reset(new MT19937);
-// MT19937& rng = *prng;
-
- vector<vector<int> > corpuse, corpusf;
- set<int> vocabe, vocabf;
- corpus::ReadParallelCorpus(conf["input"].as<string>(), &corpusf, &corpuse, &vocabf, &vocabe);
- cerr << "f-Corpus size: " << corpusf.size() << " sentences\n";
- cerr << "f-Vocabulary size: " << vocabf.size() << " types\n";
- cerr << "f-Corpus size: " << corpuse.size() << " sentences\n";
- cerr << "f-Vocabulary size: " << vocabe.size() << " types\n";
- assert(corpusf.size() == corpuse.size());
-
- Model1 m1(conf["model1"].as<string>());
-
- PhraseConditionalBase pcb0(m1, conf["model1_interpolation_weight"].as<double>(), vocabe.size());
- ConditionalParallelSegementationModel<PhraseConditionalBase> x(pcb0);
-
- ModelAndData posterior(x, corpuse, corpusf, vocabe, vocabf);
- posterior.Sample();
-
- TRule r1("[X] ||| x ||| l e ||| X=0");
- TRule r2("[X] ||| A ||| a d ||| X=0");
- TRule r3("[X] ||| n ||| e r ||| X=0");
- TRule r4("[X] ||| x A n ||| b l a g ||| X=0");
-
- PhraseConditionalUninformativeBase u0(vocabe.size());
-
- cerr << (pcb0(r1)*pcb0(r2)*pcb0(r3)) << endl;
- cerr << (u0(r4)) << endl;
-
- return 0;
-}
-
diff --git a/gi/pf/corpus.cc b/gi/pf/corpus.cc
deleted file mode 100644
index cb6e4ed7..00000000
--- a/gi/pf/corpus.cc
+++ /dev/null
@@ -1,62 +0,0 @@
-#include "corpus.h"
-
-#include <set>
-#include <vector>
-#include <string>
-
-#include "tdict.h"
-#include "filelib.h"
-
-using namespace std;
-
-namespace corpus {
-
-void ReadParallelCorpus(const string& filename,
- vector<vector<WordID> >* f,
- vector<vector<WordID> >* e,
- set<WordID>* vocab_f,
- set<WordID>* vocab_e) {
- f->clear();
- e->clear();
- vocab_f->clear();
- vocab_e->clear();
- ReadFile rf(filename);
- istream* in = rf.stream();
- assert(*in);
- string line;
- unsigned lc = 0;
- const WordID kDIV = TD::Convert("|||");
- vector<WordID> tmp;
- while(getline(*in, line)) {
- ++lc;
- e->push_back(vector<int>());
- f->push_back(vector<int>());
- vector<int>& le = e->back();
- vector<int>& lf = f->back();
- tmp.clear();
- TD::ConvertSentence(line, &tmp);
- bool isf = true;
- for (unsigned i = 0; i < tmp.size(); ++i) {
- const int cur = tmp[i];
- if (isf) {
- if (kDIV == cur) {
- isf = false;
- } else {
- lf.push_back(cur);
- vocab_f->insert(cur);
- }
- } else {
- if (cur == kDIV) {
- cerr << "ERROR in " << lc << ": " << line << endl << endl;
- abort();
- }
- le.push_back(cur);
- vocab_e->insert(cur);
- }
- }
- assert(isf == false);
- }
-}
-
-}
-
diff --git a/gi/pf/corpus.h b/gi/pf/corpus.h
deleted file mode 100644
index e7febdb7..00000000
--- a/gi/pf/corpus.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#ifndef _CORPUS_H_
-#define _CORPUS_H_
-
-#include <string>
-#include <vector>
-#include <set>
-#include "wordid.h"
-
-namespace corpus {
-
-void ReadParallelCorpus(const std::string& filename,
- std::vector<std::vector<WordID> >* f,
- std::vector<std::vector<WordID> >* e,
- std::set<WordID>* vocab_f,
- std::set<WordID>* vocab_e);
-
-}
-
-#endif
diff --git a/gi/pf/dpnaive.cc b/gi/pf/dpnaive.cc
deleted file mode 100644
index 75ccad72..00000000
--- a/gi/pf/dpnaive.cc
+++ /dev/null
@@ -1,301 +0,0 @@
-#include <iostream>
-#include <tr1/memory>
-#include <queue>
-
-#include <boost/multi_array.hpp>
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-
-#include "base_distributions.h"
-#include "monotonic_pseg.h"
-#include "trule.h"
-#include "tdict.h"
-#include "filelib.h"
-#include "dict.h"
-#include "sampler.h"
-#include "ccrp_nt.h"
-#include "corpus.h"
-
-using namespace std;
-using namespace std::tr1;
-namespace po = boost::program_options;
-
-static unsigned kMAX_SRC_PHRASE;
-static unsigned kMAX_TRG_PHRASE;
-
-void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
- po::options_description opts("Configuration options");
- opts.add_options()
- ("samples,s",po::value<unsigned>()->default_value(1000),"Number of samples")
- ("input,i",po::value<string>(),"Read parallel data from")
- ("max_src_phrase",po::value<unsigned>()->default_value(4),"Maximum length of source language phrases")
- ("max_trg_phrase",po::value<unsigned>()->default_value(4),"Maximum length of target language phrases")
- ("model1,m",po::value<string>(),"Model 1 parameters (used in base distribution)")
- ("inverse_model1,M",po::value<string>(),"Inverse Model 1 parameters (used in base distribution)")
- ("model1_interpolation_weight",po::value<double>()->default_value(0.95),"Mixing proportion of model 1 with uniform target distribution")
- ("random_seed,S",po::value<uint32_t>(), "Random seed");
- po::options_description clo("Command line options");
- clo.add_options()
- ("config", po::value<string>(), "Configuration file")
- ("help,h", "Print this help message and exit");
- po::options_description dconfig_options, dcmdline_options;
- dconfig_options.add(opts);
- dcmdline_options.add(opts).add(clo);
-
- po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
- if (conf->count("config")) {
- ifstream config((*conf)["config"].as<string>().c_str());
- po::store(po::parse_config_file(config, dconfig_options), *conf);
- }
- po::notify(*conf);
-
- if (conf->count("help") || (conf->count("input") == 0)) {
- cerr << dcmdline_options << endl;
- exit(1);
- }
-}
-
-boost::shared_ptr<MT19937> prng;
-
-template <typename Base>
-struct ModelAndData {
- explicit ModelAndData(MonotonicParallelSegementationModel<PhraseJointBase_BiDir>& m, const Base& b, const vector<vector<int> >& ce, const vector<vector<int> >& cf, const set<int>& ve, const set<int>& vf) :
- model(m),
- rng(&*prng),
- p0(b),
- baseprob(prob_t::One()),
- corpuse(ce),
- corpusf(cf),
- vocabe(ve),
- vocabf(vf),
- mh_samples(),
- mh_rejects(),
- kX(-TD::Convert("X")),
- derivations(corpuse.size()) {}
-
- void ResampleHyperparameters() {
- }
-
- void InstantiateRule(const pair<short,short>& from,
- const pair<short,short>& to,
- const vector<int>& sentf,
- const vector<int>& sente,
- TRule* rule) const {
- rule->f_.clear();
- rule->e_.clear();
- rule->lhs_ = kX;
- for (short i = from.first; i < to.first; ++i)
- rule->f_.push_back(sentf[i]);
- for (short i = from.second; i < to.second; ++i)
- rule->e_.push_back(sente[i]);
- }
-
- void DecrementDerivation(const vector<pair<short,short> >& d, const vector<int>& sentf, const vector<int>& sente) {
- if (d.size() < 2) return;
- TRule x;
- for (int i = 1; i < d.size(); ++i) {
- InstantiateRule(d[i], d[i-1], sentf, sente, &x);
- model.DecrementRule(x);
- model.DecrementContinue();
- }
- model.DecrementStop();
- }
-
- void PrintDerivation(const vector<pair<short,short> >& d, const vector<int>& sentf, const vector<int>& sente) {
- if (d.size() < 2) return;
- TRule x;
- for (int i = 1; i < d.size(); ++i) {
- InstantiateRule(d[i], d[i-1], sentf, sente, &x);
- cerr << i << '/' << (d.size() - 1) << ": " << x << endl;
- }
- }
-
- void IncrementDerivation(const vector<pair<short,short> >& d, const vector<int>& sentf, const vector<int>& sente) {
- if (d.size() < 2) return;
- TRule x;
- for (int i = 1; i < d.size(); ++i) {
- InstantiateRule(d[i], d[i-1], sentf, sente, &x);
- model.IncrementRule(x);
- model.IncrementContinue();
- }
- model.IncrementStop();
- }
-
- prob_t Likelihood() const {
- return model.Likelihood();
- }
-
- prob_t DerivationProposalProbability(const vector<pair<short,short> >& d, const vector<int>& sentf, const vector<int>& sente) const {
- prob_t p = model.StopProbability();
- if (d.size() < 2) return p;
- TRule x;
- const prob_t p_cont = model.ContinueProbability();
- for (int i = 1; i < d.size(); ++i) {
- InstantiateRule(d[i], d[i-1], sentf, sente, &x);
- p *= p_cont;
- p *= model.RuleProbability(x);
- }
- return p;
- }
-
- void Sample();
-
- MonotonicParallelSegementationModel<PhraseJointBase_BiDir>& model;
- MT19937* rng;
- const Base& p0;
- prob_t baseprob; // cached value of generating the table table labels from p0
- // this can't be used if we go to a hierarchical prior!
- const vector<vector<int> >& corpuse, corpusf;
- const set<int>& vocabe, vocabf;
- unsigned mh_samples, mh_rejects;
- const int kX;
- vector<vector<pair<short, short> > > derivations;
-};
-
-template <typename Base>
-void ModelAndData<Base>::Sample() {
- unsigned MAXK = kMAX_SRC_PHRASE;
- unsigned MAXL = kMAX_TRG_PHRASE;
- TRule x;
- x.lhs_ = -TD::Convert("X");
- for (int samples = 0; samples < 1000; ++samples) {
- if (samples % 1 == 0 && samples > 0) {
- //ResampleHyperparameters();
- cerr << " [" << samples << " LLH=" << log(Likelihood()) << " MH=" << ((double)mh_rejects / mh_samples) << "]\n";
- for (int i = 0; i < 10; ++i) {
- cerr << "SENTENCE: " << TD::GetString(corpusf[i]) << " ||| " << TD::GetString(corpuse[i]) << endl;
- PrintDerivation(derivations[i], corpusf[i], corpuse[i]);
- }
- }
- cerr << '.' << flush;
- for (int s = 0; s < corpuse.size(); ++s) {
- const vector<int>& sentf = corpusf[s];
- const vector<int>& sente = corpuse[s];
-// cerr << " CUSTOMERS: " << rules.num_customers() << endl;
-// cerr << "SENTENCE: " << TD::GetString(sentf) << " ||| " << TD::GetString(sente) << endl;
-
- vector<pair<short, short> >& deriv = derivations[s];
- const prob_t p_cur = Likelihood();
- DecrementDerivation(deriv, sentf, sente);
-
- boost::multi_array<prob_t, 2> a(boost::extents[sentf.size() + 1][sente.size() + 1]);
- boost::multi_array<prob_t, 4> trans(boost::extents[sentf.size() + 1][sente.size() + 1][MAXK][MAXL]);
- a[0][0] = prob_t::One();
- const prob_t q_stop = model.StopProbability();
- const prob_t q_cont = model.ContinueProbability();
- for (int i = 0; i < sentf.size(); ++i) {
- for (int j = 0; j < sente.size(); ++j) {
- const prob_t src_a = a[i][j];
- x.f_.clear();
- for (int k = 1; k <= MAXK; ++k) {
- if (i + k > sentf.size()) break;
- x.f_.push_back(sentf[i + k - 1]);
- x.e_.clear();
- for (int l = 1; l <= MAXL; ++l) {
- if (j + l > sente.size()) break;
- x.e_.push_back(sente[j + l - 1]);
- const bool stop_now = ((j + l) == sente.size()) && ((i + k) == sentf.size());
- const prob_t& cp = stop_now ? q_stop : q_cont;
- trans[i][j][k - 1][l - 1] = model.RuleProbability(x) * cp;
- a[i + k][j + l] += src_a * trans[i][j][k - 1][l - 1];
- }
- }
- }
- }
-// cerr << "Inside: " << log(a[sentf.size()][sente.size()]) << endl;
- const prob_t q_cur = DerivationProposalProbability(deriv, sentf, sente);
-
- vector<pair<short,short> > newderiv;
- int cur_i = sentf.size();
- int cur_j = sente.size();
- while(cur_i > 0 && cur_j > 0) {
- newderiv.push_back(pair<short,short>(cur_i, cur_j));
-// cerr << "NODE: (" << cur_i << "," << cur_j << ")\n";
- SampleSet<prob_t> ss;
- vector<pair<short,short> > nexts;
- for (int k = 1; k <= MAXK; ++k) {
- const int hyp_i = cur_i - k;
- if (hyp_i < 0) break;
- for (int l = 1; l <= MAXL; ++l) {
- const int hyp_j = cur_j - l;
- if (hyp_j < 0) break;
- const prob_t& inside = a[hyp_i][hyp_j];
- if (inside == prob_t::Zero()) continue;
- const prob_t& transp = trans[hyp_i][hyp_j][k - 1][l - 1];
- if (transp == prob_t::Zero()) continue;
- const prob_t p = inside * transp;
- ss.add(p);
- nexts.push_back(pair<short,short>(hyp_i, hyp_j));
-// cerr << " (" << hyp_i << "," << hyp_j << ") <--- " << log(p) << endl;
- }
- }
-// cerr << " sample set has " << nexts.size() << " elements.\n";
- const int selected = rng->SelectSample(ss);
- cur_i = nexts[selected].first;
- cur_j = nexts[selected].second;
- }
- newderiv.push_back(pair<short,short>(0,0));
- const prob_t q_new = DerivationProposalProbability(newderiv, sentf, sente);
- IncrementDerivation(newderiv, sentf, sente);
-// cerr << "SANITY: " << q_new << " " <<log(DerivationProposalProbability(newderiv, sentf, sente)) << endl;
- if (deriv.empty()) { deriv = newderiv; continue; }
- ++mh_samples;
-
- if (deriv != newderiv) {
- const prob_t p_new = Likelihood();
-// cerr << "p_cur=" << log(p_cur) << "\t p_new=" << log(p_new) << endl;
-// cerr << "q_cur=" << log(q_cur) << "\t q_new=" << log(q_new) << endl;
- if (!rng->AcceptMetropolisHastings(p_new, p_cur, q_new, q_cur)) {
- ++mh_rejects;
- DecrementDerivation(newderiv, sentf, sente);
- IncrementDerivation(deriv, sentf, sente);
- } else {
-// cerr << " ACCEPT\n";
- deriv = newderiv;
- }
- }
- }
- }
-}
-
-int main(int argc, char** argv) {
- po::variables_map conf;
- InitCommandLine(argc, argv, &conf);
- kMAX_TRG_PHRASE = conf["max_trg_phrase"].as<unsigned>();
- kMAX_SRC_PHRASE = conf["max_src_phrase"].as<unsigned>();
-
- if (!conf.count("model1")) {
- cerr << argv[0] << "Please use --model1 to specify model 1 parameters\n";
- return 1;
- }
- if (!conf.count("inverse_model1")) {
- cerr << argv[0] << "Please use --inverse_model1 to specify inverse model 1 parameters\n";
- return 1;
- }
- if (conf.count("random_seed"))
- prng.reset(new MT19937(conf["random_seed"].as<uint32_t>()));
- else
- prng.reset(new MT19937);
-// MT19937& rng = *prng;
-
- vector<vector<int> > corpuse, corpusf;
- set<int> vocabe, vocabf;
- corpus::ReadParallelCorpus(conf["input"].as<string>(), &corpusf, &corpuse, &vocabf, &vocabe);
- cerr << "f-Corpus size: " << corpusf.size() << " sentences\n";
- cerr << "f-Vocabulary size: " << vocabf.size() << " types\n";
- cerr << "f-Corpus size: " << corpuse.size() << " sentences\n";
- cerr << "f-Vocabulary size: " << vocabe.size() << " types\n";
- assert(corpusf.size() == corpuse.size());
-
- Model1 m1(conf["model1"].as<string>());
- Model1 invm1(conf["inverse_model1"].as<string>());
-// PhraseJointBase lp0(m1, conf["model1_interpolation_weight"].as<double>(), vocabe.size(), vocabf.size());
- PhraseJointBase_BiDir alp0(m1, invm1, conf["model1_interpolation_weight"].as<double>(), vocabe.size(), vocabf.size());
- MonotonicParallelSegementationModel<PhraseJointBase_BiDir> m(alp0);
-
- ModelAndData<PhraseJointBase_BiDir> posterior(m, alp0, corpuse, corpusf, vocabe, vocabf);
- posterior.Sample();
-
- return 0;
-}
-
diff --git a/gi/pf/guess-translits.pl b/gi/pf/guess-translits.pl
deleted file mode 100755
index d00c2168..00000000
--- a/gi/pf/guess-translits.pl
+++ /dev/null
@@ -1,72 +0,0 @@
-#!/usr/bin/perl -w
-use strict;
-use utf8;
-
-my $MIN_PMI = -3;
-
-my %fs;
-my %es;
-my %ef;
-
-die "Usage: $0 < input.utf8.txt\n" if scalar @ARGV > 0;
-
-binmode(STDIN,":utf8");
-binmode(STDOUT,":utf8");
-binmode(STDERR,":utf8");
-
-my $tot = 0;
-print STDERR "Reading alignments from STDIN ...\n";
-while(<STDIN>) {
- chomp;
- my ($fsent, $esent, $alsent) = split / \|\|\| /;
- die "Format should be 'foreign sentence ||| english sentence ||| 0-0 1-1 ...'\n" unless defined $fsent && defined $esent && defined $alsent;
-
- my @fws = split /\s+/, $fsent;
- my @ews = split /\s+/, $esent;
- my @as = split /\s+/, $alsent;
- my %a2b;
- my %b2a;
- for my $ap (@as) {
- my ($a,$b) = split /-/, $ap;
- die "BAD INPUT: $_\n" unless defined $a && defined $b;
- $a2b{$a}->{$b} = 1;
- $b2a{$b}->{$a} = 1;
- }
- for my $a (keys %a2b) {
- my $bref = $a2b{$a};
- next unless scalar keys %$bref < 2;
- my $b = (keys %$bref)[0];
- next unless scalar keys %{$b2a{$b}} < 2;
- my $f = $fws[$a];
- next unless defined $f;
- next unless length($f) > 3;
- my $e = $ews[$b];
- next unless defined $e;
- next unless length($e) > 3;
-
- $ef{$f}->{$e}++;
- $es{$e}++;
- $fs{$f}++;
- $tot++;
- }
-}
-my $ltot = log($tot);
-my $num = 0;
-print STDERR "Extracting pairs for PMI > $MIN_PMI ...\n";
-for my $f (keys %fs) {
- my $logf = log($fs{$f});
- my $esref = $ef{$f};
- for my $e (keys %$esref) {
- my $loge = log($es{$e});
- my $ef = $esref->{$e};
- my $logef = log($ef);
- my $pmi = $logef - ($loge + $logf);
- next if $pmi < $MIN_PMI;
- my @flets = split //, $f;
- my @elets = split //, $e;
- print "@flets ||| @elets\n";
- $num++;
- }
-}
-print STDERR "Extracted $num pairs.\n";
-print STDERR "Recommend running:\n ../../training/model1 -v -d -t -99999 output.txt\n";
diff --git a/gi/pf/hpyp_tm.cc b/gi/pf/hpyp_tm.cc
deleted file mode 100644
index 784f9958..00000000
--- a/gi/pf/hpyp_tm.cc
+++ /dev/null
@@ -1,133 +0,0 @@
-#include "hpyp_tm.h"
-
-#include <tr1/unordered_map>
-#include <iostream>
-#include <queue>
-
-#include "tdict.h"
-#include "ccrp.h"
-#include "pyp_word_model.h"
-#include "tied_resampler.h"
-
-using namespace std;
-using namespace std::tr1;
-
-struct FreqBinner {
- FreqBinner(const std::string& fname) { fd_.Load(fname); }
- unsigned NumberOfBins() const { return fd_.Max() + 1; }
- unsigned Bin(const WordID& w) const { return fd_.LookUp(w); }
- FreqDict<unsigned> fd_;
-};
-
-template <typename Base, class Binner = FreqBinner>
-struct ConditionalPYPWordModel {
- ConditionalPYPWordModel(Base* b, const Binner* bnr = NULL) :
- base(*b),
- binner(bnr),
- btr(binner ? binner->NumberOfBins() + 1u : 2u) {}
-
- void Summary() const {
- cerr << "Number of conditioning contexts: " << r.size() << endl;
- for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) {
- cerr << TD::Convert(it->first) << " \tPYP(d=" << it->second.discount() << ",s=" << it->second.strength() << ") --------------------------" << endl;
- for (CCRP<vector<WordID> >::const_iterator i2 = it->second.begin(); i2 != it->second.end(); ++i2)
- cerr << " " << i2->second.total_dish_count_ << '\t' << TD::GetString(i2->first) << endl;
- }
- }
-
- void ResampleHyperparameters(MT19937* rng) {
- btr.ResampleHyperparameters(rng);
- }
-
- prob_t Prob(const WordID src, const vector<WordID>& trglets) const {
- RuleModelHash::const_iterator it = r.find(src);
- if (it == r.end()) {
- return base(trglets);
- } else {
- return it->second.prob(trglets, base(trglets));
- }
- }
-
- void Increment(const WordID src, const vector<WordID>& trglets, MT19937* rng) {
- RuleModelHash::iterator it = r.find(src);
- if (it == r.end()) {
- it = r.insert(make_pair(src, CCRP<vector<WordID> >(0.5,1.0))).first;
- static const WordID kNULL = TD::Convert("NULL");
- unsigned bin = (src == kNULL ? 0 : 1);
- if (binner && bin) { bin = binner->Bin(src) + 1; }
- btr.Add(bin, &it->second);
- }
- if (it->second.increment(trglets, base(trglets), rng))
- base.Increment(trglets, rng);
- }
-
- void Decrement(const WordID src, const vector<WordID>& trglets, MT19937* rng) {
- RuleModelHash::iterator it = r.find(src);
- assert(it != r.end());
- if (it->second.decrement(trglets, rng)) {
- base.Decrement(trglets, rng);
- }
- }
-
- prob_t Likelihood() const {
- prob_t p = prob_t::One();
- for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) {
- prob_t q; q.logeq(it->second.log_crp_prob());
- p *= q;
- }
- return p;
- }
-
- unsigned UniqueConditioningContexts() const {
- return r.size();
- }
-
- // TODO tie PYP hyperparameters based on source word frequency bins
- Base& base;
- const Binner* binner;
- BinTiedResampler<CCRP<vector<WordID> > > btr;
- typedef unordered_map<WordID, CCRP<vector<WordID> > > RuleModelHash;
- RuleModelHash r;
-};
-
-HPYPLexicalTranslation::HPYPLexicalTranslation(const vector<vector<WordID> >& lets,
- const unsigned vocab_size,
- const unsigned num_letters) :
- letters(lets),
- base(vocab_size, num_letters, 5),
- up0(new PYPWordModel<PoissonUniformWordModel>(&base)),
- tmodel(new ConditionalPYPWordModel<PYPWordModel<PoissonUniformWordModel> >(up0, new FreqBinner("10k.freq"))),
- kX(-TD::Convert("X")) {}
-
-void HPYPLexicalTranslation::Summary() const {
- tmodel->Summary();
- up0->Summary();
-}
-
-prob_t HPYPLexicalTranslation::Likelihood() const {
- prob_t p = up0->Likelihood();
- p *= tmodel->Likelihood();
- return p;
-}
-
-void HPYPLexicalTranslation::ResampleHyperparameters(MT19937* rng) {
- tmodel->ResampleHyperparameters(rng);
- up0->ResampleHyperparameters(rng);
-}
-
-unsigned HPYPLexicalTranslation::UniqueConditioningContexts() const {
- return tmodel->UniqueConditioningContexts();
-}
-
-prob_t HPYPLexicalTranslation::Prob(WordID src, WordID trg) const {
- return tmodel->Prob(src, letters[trg]);
-}
-
-void HPYPLexicalTranslation::Increment(WordID src, WordID trg, MT19937* rng) {
- tmodel->Increment(src, letters[trg], rng);
-}
-
-void HPYPLexicalTranslation::Decrement(WordID src, WordID trg, MT19937* rng) {
- tmodel->Decrement(src, letters[trg], rng);
-}
-
diff --git a/gi/pf/hpyp_tm.h b/gi/pf/hpyp_tm.h
deleted file mode 100644
index af3215ba..00000000
--- a/gi/pf/hpyp_tm.h
+++ /dev/null
@@ -1,38 +0,0 @@
-#ifndef HPYP_LEX_TRANS
-#define HPYP_LEX_TRANS
-
-#include <vector>
-#include "wordid.h"
-#include "prob.h"
-#include "sampler.h"
-#include "freqdict.h"
-#include "poisson_uniform_word_model.h"
-
-struct FreqBinner;
-template <class B> struct PYPWordModel;
-template <typename T, class B> struct ConditionalPYPWordModel;
-
-struct HPYPLexicalTranslation {
- explicit HPYPLexicalTranslation(const std::vector<std::vector<WordID> >& lets,
- const unsigned vocab_size,
- const unsigned num_letters);
-
- prob_t Likelihood() const;
-
- void ResampleHyperparameters(MT19937* rng);
- prob_t Prob(WordID src, WordID trg) const; // return p(trg | src)
- void Summary() const;
- void Increment(WordID src, WordID trg, MT19937* rng);
- void Decrement(WordID src, WordID trg, MT19937* rng);
- unsigned UniqueConditioningContexts() const;
-
- private:
- const std::vector<std::vector<WordID> >& letters; // spelling dictionary
- PoissonUniformWordModel base; // "generator" of English types
- PYPWordModel<PoissonUniformWordModel>* up0; // model English lexicon
- ConditionalPYPWordModel<PYPWordModel<PoissonUniformWordModel>, FreqBinner>* tmodel; // translation distributions
- // (model English word | French word)
- const WordID kX;
-};
-
-#endif
diff --git a/gi/pf/itg.cc b/gi/pf/itg.cc
deleted file mode 100644
index 29ec3860..00000000
--- a/gi/pf/itg.cc
+++ /dev/null
@@ -1,275 +0,0 @@
-#include <iostream>
-#include <tr1/memory>
-#include <queue>
-
-#include <boost/functional.hpp>
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-
-#include "viterbi.h"
-#include "hg.h"
-#include "trule.h"
-#include "tdict.h"
-#include "filelib.h"
-#include "dict.h"
-#include "sampler.h"
-#include "ccrp_nt.h"
-#include "ccrp_onetable.h"
-
-using namespace std;
-using namespace tr1;
-namespace po = boost::program_options;
-
-ostream& operator<<(ostream& os, const vector<WordID>& p) {
- os << '[';
- for (int i = 0; i < p.size(); ++i)
- os << (i==0 ? "" : " ") << TD::Convert(p[i]);
- return os << ']';
-}
-
-struct UnigramModel {
- explicit UnigramModel(const string& fname, unsigned vocab_size, double p0null = 0.05) :
- use_uniform_(fname.size() == 0),
- p0null_(p0null),
- uniform_((1.0 - p0null) / vocab_size),
- probs_(TD::NumWords() + 1) {
- if (fname.size() > 0) LoadUnigrams(fname);
- probs_[0] = p0null_;
- }
-
-//
-// \data\
-// ngram 1=9295
-//
-// \1-grams:
-// -3.191193 "
-
- void LoadUnigrams(const string& fname) {
- cerr << "Loading unigram probabilities from " << fname << " ..." << endl;
- ReadFile rf(fname);
- string line;
- istream& in = *rf.stream();
- assert(in);
- getline(in, line);
- assert(line.empty());
- getline(in, line);
- assert(line == "\\data\\");
- getline(in, line);
- size_t pos = line.find("ngram 1=");
- assert(pos == 0);
- assert(line.size() > 8);
- const size_t num_unigrams = atoi(&line[8]);
- getline(in, line);
- assert(line.empty());
- getline(in, line);
- assert(line == "\\1-grams:");
- for (size_t i = 0; i < num_unigrams; ++i) {
- getline(in, line);
- assert(line.size() > 0);
- pos = line.find('\t');
- assert(pos > 0);
- assert(pos + 1 < line.size());
- const WordID w = TD::Convert(line.substr(pos + 1));
- line[pos] = 0;
- float p = atof(&line[0]);
- const prob_t pnon_null(1.0 - p0null_.as_float());
- if (w < probs_.size()) probs_[w].logeq(p * log(10) + log(pnon_null)); else abort();
- }
- }
-
- const prob_t& operator()(const WordID& w) const {
- if (!w) return p0null_;
- if (use_uniform_) return uniform_;
- return probs_[w];
- }
-
- const bool use_uniform_;
- const prob_t p0null_;
- const prob_t uniform_;
- vector<prob_t> probs_;
-};
-
-struct Model1 {
- explicit Model1(const string& fname) :
- kNULL(TD::Convert("<eps>")),
- kZERO() {
- LoadModel1(fname);
- }
-
- void LoadModel1(const string& fname) {
- cerr << "Loading Model 1 parameters from " << fname << " ..." << endl;
- ReadFile rf(fname);
- istream& in = *rf.stream();
- string line;
- unsigned lc = 0;
- while(getline(in, line)) {
- ++lc;
- int cur = 0;
- int start = 0;
- while(cur < line.size() && line[cur] != ' ') { ++cur; }
- assert(cur != line.size());
- line[cur] = 0;
- const WordID src = TD::Convert(&line[0]);
- ++cur;
- start = cur;
- while(cur < line.size() && line[cur] != ' ') { ++cur; }
- assert(cur != line.size());
- line[cur] = 0;
- WordID trg = TD::Convert(&line[start]);
- const double logprob = strtod(&line[cur + 1], NULL);
- if (src >= ttable.size()) ttable.resize(src + 1);
- ttable[src][trg].logeq(logprob);
- }
- cerr << " read " << lc << " parameters.\n";
- }
-
- // returns prob 0 if src or trg is not found!
- const prob_t& operator()(WordID src, WordID trg) const {
- if (src == 0) src = kNULL;
- if (src < ttable.size()) {
- const map<WordID, prob_t>& cpd = ttable[src];
- const map<WordID, prob_t>::const_iterator it = cpd.find(trg);
- if (it != cpd.end())
- return it->second;
- }
- return kZERO;
- }
-
- const WordID kNULL;
- const prob_t kZERO;
- vector<map<WordID, prob_t> > ttable;
-};
-
-void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
- po::options_description opts("Configuration options");
- opts.add_options()
- ("samples,s",po::value<unsigned>()->default_value(1000),"Number of samples")
- ("particles,p",po::value<unsigned>()->default_value(25),"Number of particles")
- ("input,i",po::value<string>(),"Read parallel data from")
- ("model1,m",po::value<string>(),"Model 1 parameters (used in base distribution)")
- ("inverse_model1,M",po::value<string>(),"Inverse Model 1 parameters (used in backward estimate)")
- ("model1_interpolation_weight",po::value<double>()->default_value(0.95),"Mixing proportion of model 1 with uniform target distribution")
- ("src_unigram,u",po::value<string>()->default_value(""),"Source unigram distribution; empty for uniform")
- ("trg_unigram,U",po::value<string>()->default_value(""),"Target unigram distribution; empty for uniform")
- ("random_seed,S",po::value<uint32_t>(), "Random seed");
- po::options_description clo("Command line options");
- clo.add_options()
- ("config", po::value<string>(), "Configuration file")
- ("help,h", "Print this help message and exit");
- po::options_description dconfig_options, dcmdline_options;
- dconfig_options.add(opts);
- dcmdline_options.add(opts).add(clo);
-
- po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
- if (conf->count("config")) {
- ifstream config((*conf)["config"].as<string>().c_str());
- po::store(po::parse_config_file(config, dconfig_options), *conf);
- }
- po::notify(*conf);
-
- if (conf->count("help") || (conf->count("input") == 0)) {
- cerr << dcmdline_options << endl;
- exit(1);
- }
-}
-
-void ReadParallelCorpus(const string& filename,
- vector<vector<WordID> >* f,
- vector<vector<WordID> >* e,
- set<WordID>* vocab_f,
- set<WordID>* vocab_e) {
- f->clear();
- e->clear();
- vocab_f->clear();
- vocab_e->clear();
- istream* in;
- if (filename == "-")
- in = &cin;
- else
- in = new ifstream(filename.c_str());
- assert(*in);
- string line;
- const WordID kDIV = TD::Convert("|||");
- vector<WordID> tmp;
- while(*in) {
- getline(*in, line);
- if (line.empty() && !*in) break;
- e->push_back(vector<int>());
- f->push_back(vector<int>());
- vector<int>& le = e->back();
- vector<int>& lf = f->back();
- tmp.clear();
- TD::ConvertSentence(line, &tmp);
- bool isf = true;
- for (unsigned i = 0; i < tmp.size(); ++i) {
- const int cur = tmp[i];
- if (isf) {
- if (kDIV == cur) { isf = false; } else {
- lf.push_back(cur);
- vocab_f->insert(cur);
- }
- } else {
- assert(cur != kDIV);
- le.push_back(cur);
- vocab_e->insert(cur);
- }
- }
- assert(isf == false);
- }
- if (in != &cin) delete in;
-}
-
-int main(int argc, char** argv) {
- po::variables_map conf;
- InitCommandLine(argc, argv, &conf);
- const unsigned particles = conf["particles"].as<unsigned>();
- const unsigned samples = conf["samples"].as<unsigned>();
- TD::Convert("<s>");
- TD::Convert("</s>");
- TD::Convert("<unk>");
- if (!conf.count("model1")) {
- cerr << argv[0] << "Please use --model1 to specify model 1 parameters\n";
- return 1;
- }
- boost::shared_ptr<MT19937> prng;
- if (conf.count("random_seed"))
- prng.reset(new MT19937(conf["random_seed"].as<uint32_t>()));
- else
- prng.reset(new MT19937);
- MT19937& rng = *prng;
-
- vector<vector<WordID> > corpuse, corpusf;
- set<WordID> vocabe, vocabf;
- cerr << "Reading corpus...\n";
- ReadParallelCorpus(conf["input"].as<string>(), &corpusf, &corpuse, &vocabf, &vocabe);
- cerr << "F-corpus size: " << corpusf.size() << " sentences\t (" << vocabf.size() << " word types)\n";
- cerr << "E-corpus size: " << corpuse.size() << " sentences\t (" << vocabe.size() << " word types)\n";
- assert(corpusf.size() == corpuse.size());
- UnigramModel src_unigram(conf["src_unigram"].as<string>(), vocabf.size());
- UnigramModel trg_unigram(conf["trg_unigram"].as<string>(), vocabe.size());
- const prob_t kHALF(0.5);
-
- const string kEMPTY = "NULL";
- const int kLHS = -TD::Convert("X");
- Model1 m1(conf["model1"].as<string>());
- Model1 invm1(conf["inverse_model1"].as<string>());
- for (int si = 0; si < conf["samples"].as<unsigned>(); ++si) {
- cerr << '.' << flush;
- for (int ci = 0; ci < corpusf.size(); ++ci) {
- const vector<WordID>& trg = corpuse[ci];
- const vector<WordID>& src = corpusf[ci];
- for (int i = 0; i <= trg.size(); ++i) {
- const WordID e_i = i > 0 ? trg[i-1] : 0;
- for (int j = 0; j <= src.size(); ++j) {
- const WordID f_j = j > 0 ? src[j-1] : 0;
- if (e_i == 0 && f_j == 0) continue;
- prob_t je = kHALF * src_unigram(f_j) * m1(f_j,e_i) + kHALF * trg_unigram(e_i) * invm1(e_i,f_j);
- cerr << "p( " << (e_i ? TD::Convert(e_i) : kEMPTY) << " , " << (f_j ? TD::Convert(f_j) : kEMPTY) << " ) = " << log(je) << endl;
- if (e_i && f_j)
- cout << "[X] ||| " << TD::Convert(f_j) << " ||| " << TD::Convert(e_i) << " ||| LogProb=" << log(je) << endl;
- }
- }
- }
- }
-}
-
diff --git a/gi/pf/learn_cfg.cc b/gi/pf/learn_cfg.cc
deleted file mode 100644
index 44eaa162..00000000
--- a/gi/pf/learn_cfg.cc
+++ /dev/null
@@ -1,428 +0,0 @@
-#include <iostream>
-#include <tr1/memory>
-#include <queue>
-
-#include <boost/functional.hpp>
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-
-#include "inside_outside.h"
-#include "hg.h"
-#include "bottom_up_parser.h"
-#include "fdict.h"
-#include "grammar.h"
-#include "m.h"
-#include "trule.h"
-#include "tdict.h"
-#include "filelib.h"
-#include "dict.h"
-#include "sampler.h"
-#include "ccrp.h"
-#include "ccrp_onetable.h"
-
-using namespace std;
-using namespace tr1;
-namespace po = boost::program_options;
-
-boost::shared_ptr<MT19937> prng;
-vector<int> nt_vocab;
-vector<int> nt_id_to_index;
-static unsigned kMAX_RULE_SIZE = 0;
-static unsigned kMAX_ARITY = 0;
-static bool kALLOW_MIXED = true; // allow rules with mixed terminals and NTs
-static bool kHIERARCHICAL_PRIOR = false;
-
-void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
- po::options_description opts("Configuration options");
- opts.add_options()
- ("samples,s",po::value<unsigned>()->default_value(1000),"Number of samples")
- ("input,i",po::value<string>(),"Read parallel data from")
- ("max_rule_size,m", po::value<unsigned>()->default_value(0), "Maximum rule size (0 for unlimited)")
- ("max_arity,a", po::value<unsigned>()->default_value(0), "Maximum number of nonterminals in a rule (0 for unlimited)")
- ("no_mixed_rules,M", "Do not mix terminals and nonterminals in a rule RHS")
- ("nonterminals,n", po::value<unsigned>()->default_value(1), "Size of nonterminal vocabulary")
- ("hierarchical_prior,h", "Use hierarchical prior")
- ("random_seed,S",po::value<uint32_t>(), "Random seed");
- po::options_description clo("Command line options");
- clo.add_options()
- ("config", po::value<string>(), "Configuration file")
- ("help", "Print this help message and exit");
- po::options_description dconfig_options, dcmdline_options;
- dconfig_options.add(opts);
- dcmdline_options.add(opts).add(clo);
-
- po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
- if (conf->count("config")) {
- ifstream config((*conf)["config"].as<string>().c_str());
- po::store(po::parse_config_file(config, dconfig_options), *conf);
- }
- po::notify(*conf);
-
- if (conf->count("help") || (conf->count("input") == 0)) {
- cerr << dcmdline_options << endl;
- exit(1);
- }
-}
-
-unsigned ReadCorpus(const string& filename,
- vector<vector<WordID> >* e,
- set<WordID>* vocab_e) {
- e->clear();
- vocab_e->clear();
- istream* in;
- if (filename == "-")
- in = &cin;
- else
- in = new ifstream(filename.c_str());
- assert(*in);
- string line;
- unsigned toks = 0;
- while(*in) {
- getline(*in, line);
- if (line.empty() && !*in) break;
- e->push_back(vector<int>());
- vector<int>& le = e->back();
- TD::ConvertSentence(line, &le);
- for (unsigned i = 0; i < le.size(); ++i)
- vocab_e->insert(le[i]);
- toks += le.size();
- }
- if (in != &cin) delete in;
- return toks;
-}
-
-struct Grid {
- // a b c d e
- // 0 - 0 - -
- vector<int> grid;
-};
-
-struct BaseRuleModel {
- explicit BaseRuleModel(unsigned term_size,
- unsigned nonterm_size = 1) :
- unif_term(1.0 / term_size),
- unif_nonterm(1.0 / nonterm_size) {}
- prob_t operator()(const TRule& r) const {
- prob_t p; p.logeq(Md::log_poisson(1.0, r.f_.size()));
- const prob_t term_prob((2.0 + 0.01*r.f_.size()) / (r.f_.size() + 2));
- const prob_t nonterm_prob(1.0 - term_prob.as_float());
- for (unsigned i = 0; i < r.f_.size(); ++i) {
- if (r.f_[i] <= 0) { // nonterminal
- if (kALLOW_MIXED) p *= nonterm_prob;
- p *= unif_nonterm;
- } else { // terminal
- if (kALLOW_MIXED) p *= term_prob;
- p *= unif_term;
- }
- }
- return p;
- }
- const prob_t unif_term, unif_nonterm;
-};
-
-struct HieroLMModel {
- explicit HieroLMModel(unsigned vocab_size, unsigned num_nts = 1) :
- base(vocab_size, num_nts),
- q0(1,1,1,1),
- nts(num_nts, CCRP<TRule>(1,1,1,1)) {}
-
- prob_t Prob(const TRule& r) const {
- return nts[nt_id_to_index[-r.lhs_]].prob(r, p0(r));
- }
-
- inline prob_t p0(const TRule& r) const {
- if (kHIERARCHICAL_PRIOR)
- return q0.prob(r, base(r));
- else
- return base(r);
- }
-
- int Increment(const TRule& r, MT19937* rng) {
- const int delta = nts[nt_id_to_index[-r.lhs_]].increment(r, p0(r), rng);
- if (kHIERARCHICAL_PRIOR && delta)
- q0.increment(r, base(r), rng);
- return delta;
- // return x.increment(r);
- }
-
- int Decrement(const TRule& r, MT19937* rng) {
- const int delta = nts[nt_id_to_index[-r.lhs_]].decrement(r, rng);
- if (kHIERARCHICAL_PRIOR && delta)
- q0.decrement(r, rng);
- return delta;
- //return x.decrement(r);
- }
-
- prob_t Likelihood() const {
- prob_t p = prob_t::One();
- for (unsigned i = 0; i < nts.size(); ++i) {
- prob_t q; q.logeq(nts[i].log_crp_prob());
- p *= q;
- for (CCRP<TRule>::const_iterator it = nts[i].begin(); it != nts[i].end(); ++it) {
- prob_t tp = p0(it->first);
- tp.poweq(it->second.table_counts_.size());
- p *= tp;
- }
- }
- if (kHIERARCHICAL_PRIOR) {
- prob_t q; q.logeq(q0.log_crp_prob());
- p *= q;
- for (CCRP<TRule>::const_iterator it = q0.begin(); it != q0.end(); ++it) {
- prob_t tp = base(it->first);
- tp.poweq(it->second.table_counts_.size());
- p *= tp;
- }
- }
- //for (CCRP_OneTable<TRule>::const_iterator it = x.begin(); it != x.end(); ++it)
- // p *= base(it->first);
- return p;
- }
-
- void ResampleHyperparameters(MT19937* rng) {
- for (unsigned i = 0; i < nts.size(); ++i)
- nts[i].resample_hyperparameters(rng);
- if (kHIERARCHICAL_PRIOR) {
- q0.resample_hyperparameters(rng);
- cerr << "[base d=" << q0.discount() << ", s=" << q0.strength() << "]";
- }
- cerr << " d=" << nts[0].discount() << ", s=" << nts[0].strength() << endl;
- }
-
- const BaseRuleModel base;
- CCRP<TRule> q0;
- vector<CCRP<TRule> > nts;
- //CCRP_OneTable<TRule> x;
-};
-
-vector<GrammarIter* > tofreelist;
-
-HieroLMModel* plm;
-
-struct NPGrammarIter : public GrammarIter, public RuleBin {
- NPGrammarIter() : arity() { tofreelist.push_back(this); }
- NPGrammarIter(const TRulePtr& inr, const int a, int symbol) : arity(a) {
- if (inr) {
- r.reset(new TRule(*inr));
- } else {
- r.reset(new TRule);
- }
- TRule& rr = *r;
- rr.lhs_ = nt_vocab[0];
- rr.f_.push_back(symbol);
- rr.e_.push_back(symbol < 0 ? (1-int(arity)) : symbol);
- tofreelist.push_back(this);
- }
- inline static unsigned NextArity(int cur_a, int symbol) {
- return cur_a + (symbol <= 0 ? 1 : 0);
- }
- virtual int GetNumRules() const {
- if (r) return nt_vocab.size(); else return 0;
- }
- virtual TRulePtr GetIthRule(int i) const {
- if (i == 0) return r;
- TRulePtr nr(new TRule(*r));
- nr->lhs_ = nt_vocab[i];
- return nr;
- }
- virtual int Arity() const {
- return arity;
- }
- virtual const RuleBin* GetRules() const {
- if (!r) return NULL; else return this;
- }
- virtual const GrammarIter* Extend(int symbol) const {
- const int next_arity = NextArity(arity, symbol);
- if (kMAX_ARITY && next_arity > kMAX_ARITY)
- return NULL;
- if (!kALLOW_MIXED && r) {
- bool t1 = r->f_.front() <= 0;
- bool t2 = symbol <= 0;
- if (t1 != t2) return NULL;
- }
- if (!kMAX_RULE_SIZE || !r || (r->f_.size() < kMAX_RULE_SIZE))
- return new NPGrammarIter(r, next_arity, symbol);
- else
- return NULL;
- }
- const unsigned char arity;
- TRulePtr r;
-};
-
-struct NPGrammar : public Grammar {
- virtual const GrammarIter* GetRoot() const {
- return new NPGrammarIter;
- }
-};
-
-prob_t TotalProb(const Hypergraph& hg) {
- return Inside<prob_t, EdgeProb>(hg);
-}
-
-void SampleDerivation(const Hypergraph& hg, MT19937* rng, vector<unsigned>* sampled_deriv) {
- vector<prob_t> node_probs;
- Inside<prob_t, EdgeProb>(hg, &node_probs);
- queue<unsigned> q;
- q.push(hg.nodes_.size() - 2);
- while(!q.empty()) {
- unsigned cur_node_id = q.front();
-// cerr << "NODE=" << cur_node_id << endl;
- q.pop();
- const Hypergraph::Node& node = hg.nodes_[cur_node_id];
- const unsigned num_in_edges = node.in_edges_.size();
- unsigned sampled_edge = 0;
- if (num_in_edges == 1) {
- sampled_edge = node.in_edges_[0];
- } else {
- //prob_t z;
- assert(num_in_edges > 1);
- SampleSet<prob_t> ss;
- for (unsigned j = 0; j < num_in_edges; ++j) {
- const Hypergraph::Edge& edge = hg.edges_[node.in_edges_[j]];
- prob_t p = edge.edge_prob_;
- for (unsigned k = 0; k < edge.tail_nodes_.size(); ++k)
- p *= node_probs[edge.tail_nodes_[k]];
- ss.add(p);
-// cerr << log(ss[j]) << " ||| " << edge.rule_->AsString() << endl;
- //z += p;
- }
-// for (unsigned j = 0; j < num_in_edges; ++j) {
-// const Hypergraph::Edge& edge = hg.edges_[node.in_edges_[j]];
-// cerr << exp(log(ss[j] / z)) << " ||| " << edge.rule_->AsString() << endl;
-// }
-// cerr << " --- \n";
- sampled_edge = node.in_edges_[rng->SelectSample(ss)];
- }
- sampled_deriv->push_back(sampled_edge);
- const Hypergraph::Edge& edge = hg.edges_[sampled_edge];
- for (unsigned j = 0; j < edge.tail_nodes_.size(); ++j) {
- q.push(edge.tail_nodes_[j]);
- }
- }
- for (unsigned i = 0; i < sampled_deriv->size(); ++i) {
- cerr << *hg.edges_[(*sampled_deriv)[i]].rule_ << endl;
- }
-}
-
-void IncrementDerivation(const Hypergraph& hg, const vector<unsigned>& d, HieroLMModel* plm, MT19937* rng) {
- for (unsigned i = 0; i < d.size(); ++i)
- plm->Increment(*hg.edges_[d[i]].rule_, rng);
-}
-
-void DecrementDerivation(const Hypergraph& hg, const vector<unsigned>& d, HieroLMModel* plm, MT19937* rng) {
- for (unsigned i = 0; i < d.size(); ++i)
- plm->Decrement(*hg.edges_[d[i]].rule_, rng);
-}
-
-int main(int argc, char** argv) {
- po::variables_map conf;
-
- InitCommandLine(argc, argv, &conf);
- nt_vocab.resize(conf["nonterminals"].as<unsigned>());
- assert(nt_vocab.size() > 0);
- assert(nt_vocab.size() < 26);
- {
- string nt = "X";
- for (unsigned i = 0; i < nt_vocab.size(); ++i) {
- if (nt_vocab.size() > 1) nt[0] = ('A' + i);
- int pid = TD::Convert(nt);
- nt_vocab[i] = -pid;
- if (pid >= nt_id_to_index.size()) {
- nt_id_to_index.resize(pid + 1, -1);
- }
- nt_id_to_index[pid] = i;
- }
- }
- vector<GrammarPtr> grammars;
- grammars.push_back(GrammarPtr(new NPGrammar));
-
- const unsigned samples = conf["samples"].as<unsigned>();
- kMAX_RULE_SIZE = conf["max_rule_size"].as<unsigned>();
- if (kMAX_RULE_SIZE == 1) {
- cerr << "Invalid maximum rule size: must be 0 or >1\n";
- return 1;
- }
- kMAX_ARITY = conf["max_arity"].as<unsigned>();
- if (kMAX_ARITY == 1) {
- cerr << "Invalid maximum arity: must be 0 or >1\n";
- return 1;
- }
- kALLOW_MIXED = !conf.count("no_mixed_rules");
-
- kHIERARCHICAL_PRIOR = conf.count("hierarchical_prior");
-
- if (conf.count("random_seed"))
- prng.reset(new MT19937(conf["random_seed"].as<uint32_t>()));
- else
- prng.reset(new MT19937);
- MT19937& rng = *prng;
- vector<vector<WordID> > corpuse;
- set<WordID> vocabe;
- cerr << "Reading corpus...\n";
- const unsigned toks = ReadCorpus(conf["input"].as<string>(), &corpuse, &vocabe);
- cerr << "E-corpus size: " << corpuse.size() << " sentences\t (" << vocabe.size() << " word types)\n";
- HieroLMModel lm(vocabe.size(), nt_vocab.size());
-
- plm = &lm;
- ExhaustiveBottomUpParser parser(TD::Convert(-nt_vocab[0]), grammars);
-
- Hypergraph hg;
- const int kGoal = -TD::Convert("Goal");
- const int kLP = FD::Convert("LogProb");
- SparseVector<double> v; v.set_value(kLP, 1.0);
- vector<vector<unsigned> > derivs(corpuse.size());
- vector<Lattice> cl(corpuse.size());
- for (int ci = 0; ci < corpuse.size(); ++ci) {
- vector<int>& src = corpuse[ci];
- Lattice& lat = cl[ci];
- lat.resize(src.size());
- for (unsigned i = 0; i < src.size(); ++i)
- lat[i].push_back(LatticeArc(src[i], 0.0, 1));
- }
- for (int SS=0; SS < samples; ++SS) {
- const bool is_last = ((samples - 1) == SS);
- prob_t dlh = prob_t::One();
- for (int ci = 0; ci < corpuse.size(); ++ci) {
- const vector<int>& src = corpuse[ci];
- const Lattice& lat = cl[ci];
- cerr << TD::GetString(src) << endl;
- hg.clear();
- parser.Parse(lat, &hg); // exhaustive parse
- vector<unsigned>& d = derivs[ci];
- if (!is_last) DecrementDerivation(hg, d, &lm, &rng);
- for (unsigned i = 0; i < hg.edges_.size(); ++i) {
- TRule& r = *hg.edges_[i].rule_;
- if (r.lhs_ == kGoal)
- hg.edges_[i].edge_prob_ = prob_t::One();
- else
- hg.edges_[i].edge_prob_ = lm.Prob(r);
- }
- if (!is_last) {
- d.clear();
- SampleDerivation(hg, &rng, &d);
- IncrementDerivation(hg, derivs[ci], &lm, &rng);
- } else {
- prob_t p = TotalProb(hg);
- dlh *= p;
- cerr << " p(sentence) = " << log(p) << "\t" << log(dlh) << endl;
- }
- if (tofreelist.size() > 200000) {
- cerr << "Freeing ... ";
- for (unsigned i = 0; i < tofreelist.size(); ++i)
- delete tofreelist[i];
- tofreelist.clear();
- cerr << "Freed.\n";
- }
- }
- double llh = log(lm.Likelihood());
- cerr << "LLH=" << llh << "\tENTROPY=" << (-llh / log(2) / toks) << "\tPPL=" << pow(2, -llh / log(2) / toks) << endl;
- if (SS % 10 == 9) lm.ResampleHyperparameters(&rng);
- if (is_last) {
- double z = log(dlh);
- cerr << "TOTAL_PROB=" << z << "\tENTROPY=" << (-z / log(2) / toks) << "\tPPL=" << pow(2, -z / log(2) / toks) << endl;
- }
- }
- for (unsigned i = 0; i < nt_vocab.size(); ++i)
- cerr << lm.nts[i] << endl;
- return 0;
-}
-
diff --git a/gi/pf/make-freq-bins.pl b/gi/pf/make-freq-bins.pl
deleted file mode 100755
index fdcd3555..00000000
--- a/gi/pf/make-freq-bins.pl
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/usr/bin/perl -w
-use strict;
-
-my $BASE = 6;
-my $CUTOFF = 3;
-
-my %d;
-my $num = 0;
-while(<>){
- chomp;
- my @words = split /\s+/;
- for my $w (@words) {$d{$w}++; $num++;}
-}
-
-my @vocab = sort {$d{$b} <=> $d{$a}} keys %d;
-
-for (my $i=0; $i<scalar @vocab; $i++) {
- my $most = $d{$vocab[$i]};
- my $least = 1;
-
- my $nl = -int(log($most / $num) / log($BASE) + $CUTOFF);
- if ($nl < 0) { $nl = 0; }
- print "$vocab[$i] $nl\n"
-}
-
-
diff --git a/gi/pf/mh_test.cc b/gi/pf/mh_test.cc
deleted file mode 100644
index 296e7285..00000000
--- a/gi/pf/mh_test.cc
+++ /dev/null
@@ -1,148 +0,0 @@
-#include "ccrp.h"
-
-#include <vector>
-#include <iostream>
-
-#include "tdict.h"
-#include "transliterations.h"
-
-using namespace std;
-
-MT19937 rng;
-
-static bool verbose = false;
-
-struct Model {
-
- Model() : bp(), base(0.2, 0.6) , ccrps(5, CCRP<int>(0.8, 0.5)) {}
-
- double p0(int x) const {
- assert(x > 0);
- assert(x < 5);
- return 1.0/4.0;
- }
-
- double llh() const {
- double lh = bp + base.log_crp_prob();
- for (int ctx = 1; ctx < 5; ++ctx)
- lh += ccrps[ctx].log_crp_prob();
- return lh;
- }
-
- double prob(int ctx, int x) const {
- assert(ctx > 0 && ctx < 5);
- return ccrps[ctx].prob(x, base.prob(x, p0(x)));
- }
-
- void increment(int ctx, int x) {
- assert(ctx > 0 && ctx < 5);
- if (ccrps[ctx].increment(x, base.prob(x, p0(x)), &rng)) {
- if (base.increment(x, p0(x), &rng)) {
- bp += log(1.0 / 4.0);
- }
- }
- }
-
- // this is just a biased estimate
- double est_base_prob(int x) {
- return (x + 1) * x / 40.0;
- }
-
- void increment_is(int ctx, int x) {
- assert(ctx > 0 && ctx < 5);
- SampleSet<double> ss;
- const int PARTICLES = 25;
- vector<CCRP<int> > s1s(PARTICLES, CCRP<int>(0.5,0.5));
- vector<CCRP<int> > sbs(PARTICLES, CCRP<int>(0.5,0.5));
- vector<double> sp0s(PARTICLES);
-
- CCRP<int> s1 = ccrps[ctx];
- CCRP<int> sb = base;
- double sp0 = bp;
- for (int pp = 0; pp < PARTICLES; ++pp) {
- if (pp > 0) {
- ccrps[ctx] = s1;
- base = sb;
- bp = sp0;
- }
-
- double q = 1;
- double gamma = 1;
- double est_p = est_base_prob(x);
- //base.prob(x, p0(x)) + rng.next() * 0.1;
- if (ccrps[ctx].increment(x, est_p, &rng, &q)) {
- gamma = q * base.prob(x, p0(x));
- q *= est_p;
- if (verbose) cerr << "(DP-base draw) ";
- double qq = -1;
- if (base.increment(x, p0(x), &rng, &qq)) {
- if (verbose) cerr << "(G0 draw) ";
- bp += log(p0(x));
- qq *= p0(x);
- }
- } else { gamma = q; }
- double w = gamma / q;
- if (verbose)
- cerr << "gamma=" << gamma << " q=" << q << "\tw=" << w << endl;
- ss.add(w);
- s1s[pp] = ccrps[ctx];
- sbs[pp] = base;
- sp0s[pp] = bp;
- }
- int ps = rng.SelectSample(ss);
- ccrps[ctx] = s1s[ps];
- base = sbs[ps];
- bp = sp0s[ps];
- if (verbose) {
- cerr << "SELECTED: " << ps << endl;
- static int cc = 0; cc++; if (cc ==10) exit(1);
- }
- }
-
- void decrement(int ctx, int x) {
- assert(ctx > 0 && ctx < 5);
- if (ccrps[ctx].decrement(x, &rng)) {
- if (base.decrement(x, &rng)) {
- bp -= log(p0(x));
- }
- }
- }
-
- double bp;
- CCRP<int> base;
- vector<CCRP<int> > ccrps;
-
-};
-
-int main(int argc, char** argv) {
- if (argc > 1) { verbose = true; }
- vector<int> counts(15, 0);
- vector<int> tcounts(15, 0);
- int points[] = {1,2, 2,2, 3,2, 4,1, 3, 4, 3, 3, 2, 3, 4, 1, 4, 1, 3, 2, 1, 3, 1, 4, 0, 0};
- double tlh = 0;
- double tt = 0;
- for (int n = 0; n < 1000; ++n) {
- if (n % 10 == 0) cerr << '.';
- if ((n+1) % 400 == 0) cerr << " [" << (n+1) << "]\n";
- Model m;
- for (int *x = points; *x; x += 2)
- m.increment(x[0], x[1]);
-
- for (int j = 0; j < 24; ++j) {
- for (int *x = points; *x; x += 2) {
- if (rng.next() < 0.8) {
- m.decrement(x[0], x[1]);
- m.increment_is(x[0], x[1]);
- }
- }
- }
- counts[m.base.num_customers()]++;
- tcounts[m.base.num_tables()]++;
- tlh += m.llh();
- tt += 1.0;
- }
- cerr << "mean LLH = " << (tlh / tt) << endl;
- for (int i = 0; i < 15; ++i)
- cerr << i << ": " << (counts[i] / tt) << "\t" << (tcounts[i] / tt) << endl;
-}
-
diff --git a/gi/pf/monotonic_pseg.h b/gi/pf/monotonic_pseg.h
deleted file mode 100644
index 10d171fe..00000000
--- a/gi/pf/monotonic_pseg.h
+++ /dev/null
@@ -1,89 +0,0 @@
-#ifndef _MONOTONIC_PSEG_H_
-#define _MONOTONIC_PSEG_H_
-
-#include <vector>
-
-#include "prob.h"
-#include "ccrp_nt.h"
-#include "trule.h"
-#include "base_distributions.h"
-
-template <typename BaseMeasure>
-struct MonotonicParallelSegementationModel {
- explicit MonotonicParallelSegementationModel(BaseMeasure& rcp0) :
- rp0(rcp0), base(prob_t::One()), rules(1,1), stop(1.0) {}
-
- void DecrementRule(const TRule& rule) {
- if (rules.decrement(rule))
- base /= rp0(rule);
- }
-
- void IncrementRule(const TRule& rule) {
- if (rules.increment(rule))
- base *= rp0(rule);
- }
-
- void IncrementRulesAndStops(const std::vector<TRulePtr>& rules) {
- for (int i = 0; i < rules.size(); ++i)
- IncrementRule(*rules[i]);
- if (rules.size()) IncrementContinue(rules.size() - 1);
- IncrementStop();
- }
-
- void DecrementRulesAndStops(const std::vector<TRulePtr>& rules) {
- for (int i = 0; i < rules.size(); ++i)
- DecrementRule(*rules[i]);
- if (rules.size()) {
- DecrementContinue(rules.size() - 1);
- DecrementStop();
- }
- }
-
- prob_t RuleProbability(const TRule& rule) const {
- prob_t p; p.logeq(rules.logprob(rule, log(rp0(rule))));
- return p;
- }
-
- prob_t Likelihood() const {
- prob_t p = base;
- prob_t q; q.logeq(rules.log_crp_prob());
- p *= q;
- q.logeq(stop.log_crp_prob());
- p *= q;
- return p;
- }
-
- void IncrementStop() {
- stop.increment(true);
- }
-
- void IncrementContinue(int n = 1) {
- for (int i = 0; i < n; ++i)
- stop.increment(false);
- }
-
- void DecrementStop() {
- stop.decrement(true);
- }
-
- void DecrementContinue(int n = 1) {
- for (int i = 0; i < n; ++i)
- stop.decrement(false);
- }
-
- prob_t StopProbability() const {
- return prob_t(stop.prob(true, 0.5));
- }
-
- prob_t ContinueProbability() const {
- return prob_t(stop.prob(false, 0.5));
- }
-
- const BaseMeasure& rp0;
- prob_t base;
- CCRP_NoTable<TRule> rules;
- CCRP_NoTable<bool> stop;
-};
-
-#endif
-
diff --git a/gi/pf/ngram_base.cc b/gi/pf/ngram_base.cc
deleted file mode 100644
index 1299f06f..00000000
--- a/gi/pf/ngram_base.cc
+++ /dev/null
@@ -1,69 +0,0 @@
-#include "ngram_base.h"
-
-#include "lm/model.hh"
-#include "tdict.h"
-
-using namespace std;
-
-namespace {
-struct GICSVMapper : public lm::EnumerateVocab {
- GICSVMapper(vector<lm::WordIndex>* out) : out_(out), kLM_UNKNOWN_TOKEN(0) { out_->clear(); }
- void Add(lm::WordIndex index, const StringPiece &str) {
- const WordID cdec_id = TD::Convert(str.as_string());
- if (cdec_id >= out_->size())
- out_->resize(cdec_id + 1, kLM_UNKNOWN_TOKEN);
- (*out_)[cdec_id] = index;
- }
- vector<lm::WordIndex>* out_;
- const lm::WordIndex kLM_UNKNOWN_TOKEN;
-};
-}
-
-struct FixedNgramBaseImpl {
- FixedNgramBaseImpl(const string& param) {
- GICSVMapper vm(&cdec2klm_map_);
- lm::ngram::Config conf;
- conf.enumerate_vocab = &vm;
- cerr << "Reading character LM from " << param << endl;
- model = new lm::ngram::ProbingModel(param.c_str(), conf);
- order = model->Order();
- kEOS = MapWord(TD::Convert("</s>"));
- assert(kEOS > 0);
- }
-
- lm::WordIndex MapWord(const WordID w) const {
- if (w < cdec2klm_map_.size()) return cdec2klm_map_[w];
- return 0;
- }
-
- ~FixedNgramBaseImpl() { delete model; }
-
- prob_t StringProbability(const vector<WordID>& s) const {
- lm::ngram::State state = model->BeginSentenceState();
- double prob = 0;
- for (unsigned i = 0; i < s.size(); ++i) {
- const lm::ngram::State scopy(state);
- prob += model->Score(scopy, MapWord(s[i]), state);
- }
- const lm::ngram::State scopy(state);
- prob += model->Score(scopy, kEOS, state);
- prob_t p; p.logeq(prob * log(10));
- return p;
- }
-
- lm::ngram::ProbingModel* model;
- unsigned order;
- vector<lm::WordIndex> cdec2klm_map_;
- lm::WordIndex kEOS;
-};
-
-FixedNgramBase::~FixedNgramBase() { delete impl; }
-
-FixedNgramBase::FixedNgramBase(const string& lmfname) {
- impl = new FixedNgramBaseImpl(lmfname);
-}
-
-prob_t FixedNgramBase::StringProbability(const vector<WordID>& s) const {
- return impl->StringProbability(s);
-}
-
diff --git a/gi/pf/ngram_base.h b/gi/pf/ngram_base.h
deleted file mode 100644
index 4ea999f3..00000000
--- a/gi/pf/ngram_base.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef _NGRAM_BASE_H_
-#define _NGRAM_BASE_H_
-
-#include <string>
-#include <vector>
-#include "trule.h"
-#include "wordid.h"
-#include "prob.h"
-
-struct FixedNgramBaseImpl;
-struct FixedNgramBase {
- FixedNgramBase(const std::string& lmfname);
- ~FixedNgramBase();
- prob_t StringProbability(const std::vector<WordID>& s) const;
-
- prob_t operator()(const TRule& rule) const {
- return StringProbability(rule.e_);
- }
-
- private:
- FixedNgramBaseImpl* impl;
-
-};
-
-#endif
diff --git a/gi/pf/nuisance_test.cc b/gi/pf/nuisance_test.cc
deleted file mode 100644
index fc0af9cb..00000000
--- a/gi/pf/nuisance_test.cc
+++ /dev/null
@@ -1,161 +0,0 @@
-#include "ccrp.h"
-
-#include <vector>
-#include <iostream>
-
-#include "tdict.h"
-#include "transliterations.h"
-
-using namespace std;
-
-MT19937 rng;
-
-ostream& operator<<(ostream&os, const vector<int>& v) {
- os << '[' << v[0];
- if (v.size() == 2) os << ' ' << v[1];
- return os << ']';
-}
-
-struct Base {
- Base() : llh(), v(2), v1(1), v2(1), crp(0.25, 0.5) {}
- inline double p0(const vector<int>& x) const {
- double p = 0.75;
- if (x.size() == 2) p = 0.25;
- p *= 1.0 / 3.0;
- if (x.size() == 2) p *= 1.0 / 3.0;
- return p;
- }
- double est_deriv_prob(int a, int b, int seg) const {
- assert(a > 0 && a < 4); // a \in {1,2,3}
- assert(b > 0 && b < 4); // b \in {1,2,3}
- assert(seg == 0 || seg == 1); // seg \in {0,1}
- if (seg == 0) {
- v[0] = a;
- v[1] = b;
- return crp.prob(v, p0(v));
- } else {
- v1[0] = a;
- v2[0] = b;
- return crp.prob(v1, p0(v1)) * crp.prob(v2, p0(v2));
- }
- }
- double est_marginal_prob(int a, int b) const {
- return est_deriv_prob(a,b,0) + est_deriv_prob(a,b,1);
- }
- int increment(int a, int b, double* pw = NULL) {
- double p1 = est_deriv_prob(a, b, 0);
- double p2 = est_deriv_prob(a, b, 1);
- //p1 = 0.5; p2 = 0.5;
- int seg = rng.SelectSample(p1,p2);
- double tmp = 0;
- if (!pw) pw = &tmp;
- double& w = *pw;
- if (seg == 0) {
- v[0] = a;
- v[1] = b;
- w = crp.prob(v, p0(v)) / p1;
- if (crp.increment(v, p0(v), &rng)) {
- llh += log(p0(v));
- }
- } else {
- v1[0] = a;
- w = crp.prob(v1, p0(v1)) / p2;
- if (crp.increment(v1, p0(v1), &rng)) {
- llh += log(p0(v1));
- }
- v2[0] = b;
- w *= crp.prob(v2, p0(v2));
- if (crp.increment(v2, p0(v2), &rng)) {
- llh += log(p0(v2));
- }
- }
- return seg;
- }
- void increment(int a, int b, int seg) {
- if (seg == 0) {
- v[0] = a;
- v[1] = b;
- if (crp.increment(v, p0(v), &rng)) {
- llh += log(p0(v));
- }
- } else {
- v1[0] = a;
- if (crp.increment(v1, p0(v1), &rng)) {
- llh += log(p0(v1));
- }
- v2[0] = b;
- if (crp.increment(v2, p0(v2), &rng)) {
- llh += log(p0(v2));
- }
- }
- }
- void decrement(int a, int b, int seg) {
- if (seg == 0) {
- v[0] = a;
- v[1] = b;
- if (crp.decrement(v, &rng)) {
- llh -= log(p0(v));
- }
- } else {
- v1[0] = a;
- if (crp.decrement(v1, &rng)) {
- llh -= log(p0(v1));
- }
- v2[0] = b;
- if (crp.decrement(v2, &rng)) {
- llh -= log(p0(v2));
- }
- }
- }
- double log_likelihood() const {
- return llh + crp.log_crp_prob();
- }
- double llh;
- mutable vector<int> v, v1, v2;
- CCRP<vector<int> > crp;
-};
-
-int main(int argc, char** argv) {
- double tl = 0;
- const int ITERS = 1000;
- const int PARTICLES = 20;
- const int DATAPOINTS = 50;
- WordID x = TD::Convert("souvenons");
- WordID y = TD::Convert("remember");
- vector<WordID> src; TD::ConvertSentence("s o u v e n o n s", &src);
- vector<WordID> trg; TD::ConvertSentence("r e m e m b e r", &trg);
-// Transliterations xx;
-// xx.Initialize(x, src, y, trg);
-// return 1;
-
- for (int j = 0; j < ITERS; ++j) {
- Base b;
- vector<int> segs(DATAPOINTS);
- SampleSet<double> ss;
- vector<int> sss;
- for (int i = 0; i < DATAPOINTS; i++) {
- ss.clear();
- sss.clear();
- int x = ((i / 10) % 3) + 1;
- int y = (i % 3) + 1;
- //double ep = b.est_marginal_prob(x,y);
- //cerr << "est p(" << x << "," << y << ") = " << ep << endl;
- for (int n = 0; n < PARTICLES; ++n) {
- double w;
- int seg = b.increment(x,y,&w);
- //cerr << seg << " w=" << w << endl;
- ss.add(w);
- sss.push_back(seg);
- b.decrement(x,y,seg);
- }
- int seg = sss[rng.SelectSample(ss)];
- b.increment(x, y, seg);
- //cerr << "Selected: " << seg << endl;
- //return 1;
- segs[i] = seg;
- }
- tl += b.log_likelihood();
- }
- cerr << "LLH=" << tl / ITERS << endl;
-}
-
diff --git a/gi/pf/os_phrase.h b/gi/pf/os_phrase.h
deleted file mode 100644
index dfe40cb1..00000000
--- a/gi/pf/os_phrase.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef _OS_PHRASE_H_
-#define _OS_PHRASE_H_
-
-#include <iostream>
-#include <vector>
-#include "tdict.h"
-
-inline std::ostream& operator<<(std::ostream& os, const std::vector<WordID>& p) {
- os << '[';
- for (int i = 0; i < p.size(); ++i)
- os << (i==0 ? "" : " ") << TD::Convert(p[i]);
- return os << ']';
-}
-
-#endif
diff --git a/gi/pf/pf.h b/gi/pf/pf.h
deleted file mode 100644
index ede7cda8..00000000
--- a/gi/pf/pf.h
+++ /dev/null
@@ -1,84 +0,0 @@
-#ifndef _PF_H_
-#define _PF_H_
-
-#include <cassert>
-#include <vector>
-#include "sampler.h"
-#include "prob.h"
-
-template <typename ParticleType>
-struct ParticleRenormalizer {
- void operator()(std::vector<ParticleType>* pv) const {
- if (pv->empty()) return;
- prob_t z = prob_t::Zero();
- for (unsigned i = 0; i < pv->size(); ++i)
- z += (*pv)[i].weight;
- assert(z > prob_t::Zero());
- for (unsigned i = 0; i < pv->size(); ++i)
- (*pv)[i].weight /= z;
- }
-};
-
-template <typename ParticleType>
-struct MultinomialResampleFilter {
- explicit MultinomialResampleFilter(MT19937* rng) : rng_(rng) {}
-
- void operator()(std::vector<ParticleType>* pv) {
- if (pv->empty()) return;
- std::vector<ParticleType>& ps = *pv;
- SampleSet<prob_t> ss;
- for (int i = 0; i < ps.size(); ++i)
- ss.add(ps[i].weight);
- std::vector<ParticleType> nps; nps.reserve(ps.size());
- const prob_t uniform_weight(1.0 / ps.size());
- for (int i = 0; i < ps.size(); ++i) {
- nps.push_back(ps[rng_->SelectSample(ss)]);
- nps[i].weight = uniform_weight;
- }
- nps.swap(ps);
- }
-
- private:
- MT19937* rng_;
-};
-
-template <typename ParticleType>
-struct SystematicResampleFilter {
- explicit SystematicResampleFilter(MT19937* rng) : rng_(rng), renorm_() {}
-
- void operator()(std::vector<ParticleType>* pv) {
- if (pv->empty()) return;
- renorm_(pv);
- std::vector<ParticleType>& ps = *pv;
- std::vector<ParticleType> nps; nps.reserve(ps.size());
- double lower = 0, upper = 0;
- const double skip = 1.0 / ps.size();
- double u_j = rng_->next() * skip;
- //std::cerr << "u_0: " << u_j << std::endl;
- int j = 0;
- for (unsigned i = 0; i < ps.size(); ++i) {
- upper += ps[i].weight.as_float();
- //std::cerr << "lower: " << lower << " upper: " << upper << std::endl;
- // how many children does ps[i] have?
- while (u_j < lower) { u_j += skip; ++j; }
- while (u_j >= lower && u_j <= upper) {
- assert(j < ps.size());
- nps.push_back(ps[i]);
- u_j += skip;
- //std::cerr << " add u_j=" << u_j << std::endl;
- ++j;
- }
- lower = upper;
- }
- //std::cerr << ps.size() << " " << nps.size() << "\n";
- assert(ps.size() == nps.size());
- //exit(1);
- ps.swap(nps);
- }
-
- private:
- MT19937* rng_;
- ParticleRenormalizer<ParticleType> renorm_;
-};
-
-#endif
diff --git a/gi/pf/pf_test.cc b/gi/pf/pf_test.cc
deleted file mode 100644
index 296e7285..00000000
--- a/gi/pf/pf_test.cc
+++ /dev/null
@@ -1,148 +0,0 @@
-#include "ccrp.h"
-
-#include <vector>
-#include <iostream>
-
-#include "tdict.h"
-#include "transliterations.h"
-
-using namespace std;
-
-MT19937 rng;
-
-static bool verbose = false;
-
-struct Model {
-
- Model() : bp(), base(0.2, 0.6) , ccrps(5, CCRP<int>(0.8, 0.5)) {}
-
- double p0(int x) const {
- assert(x > 0);
- assert(x < 5);
- return 1.0/4.0;
- }
-
- double llh() const {
- double lh = bp + base.log_crp_prob();
- for (int ctx = 1; ctx < 5; ++ctx)
- lh += ccrps[ctx].log_crp_prob();
- return lh;
- }
-
- double prob(int ctx, int x) const {
- assert(ctx > 0 && ctx < 5);
- return ccrps[ctx].prob(x, base.prob(x, p0(x)));
- }
-
- void increment(int ctx, int x) {
- assert(ctx > 0 && ctx < 5);
- if (ccrps[ctx].increment(x, base.prob(x, p0(x)), &rng)) {
- if (base.increment(x, p0(x), &rng)) {
- bp += log(1.0 / 4.0);
- }
- }
- }
-
- // this is just a biased estimate
- double est_base_prob(int x) {
- return (x + 1) * x / 40.0;
- }
-
- void increment_is(int ctx, int x) {
- assert(ctx > 0 && ctx < 5);
- SampleSet<double> ss;
- const int PARTICLES = 25;
- vector<CCRP<int> > s1s(PARTICLES, CCRP<int>(0.5,0.5));
- vector<CCRP<int> > sbs(PARTICLES, CCRP<int>(0.5,0.5));
- vector<double> sp0s(PARTICLES);
-
- CCRP<int> s1 = ccrps[ctx];
- CCRP<int> sb = base;
- double sp0 = bp;
- for (int pp = 0; pp < PARTICLES; ++pp) {
- if (pp > 0) {
- ccrps[ctx] = s1;
- base = sb;
- bp = sp0;
- }
-
- double q = 1;
- double gamma = 1;
- double est_p = est_base_prob(x);
- //base.prob(x, p0(x)) + rng.next() * 0.1;
- if (ccrps[ctx].increment(x, est_p, &rng, &q)) {
- gamma = q * base.prob(x, p0(x));
- q *= est_p;
- if (verbose) cerr << "(DP-base draw) ";
- double qq = -1;
- if (base.increment(x, p0(x), &rng, &qq)) {
- if (verbose) cerr << "(G0 draw) ";
- bp += log(p0(x));
- qq *= p0(x);
- }
- } else { gamma = q; }
- double w = gamma / q;
- if (verbose)
- cerr << "gamma=" << gamma << " q=" << q << "\tw=" << w << endl;
- ss.add(w);
- s1s[pp] = ccrps[ctx];
- sbs[pp] = base;
- sp0s[pp] = bp;
- }
- int ps = rng.SelectSample(ss);
- ccrps[ctx] = s1s[ps];
- base = sbs[ps];
- bp = sp0s[ps];
- if (verbose) {
- cerr << "SELECTED: " << ps << endl;
- static int cc = 0; cc++; if (cc ==10) exit(1);
- }
- }
-
- void decrement(int ctx, int x) {
- assert(ctx > 0 && ctx < 5);
- if (ccrps[ctx].decrement(x, &rng)) {
- if (base.decrement(x, &rng)) {
- bp -= log(p0(x));
- }
- }
- }
-
- double bp;
- CCRP<int> base;
- vector<CCRP<int> > ccrps;
-
-};
-
-int main(int argc, char** argv) {
- if (argc > 1) { verbose = true; }
- vector<int> counts(15, 0);
- vector<int> tcounts(15, 0);
- int points[] = {1,2, 2,2, 3,2, 4,1, 3, 4, 3, 3, 2, 3, 4, 1, 4, 1, 3, 2, 1, 3, 1, 4, 0, 0};
- double tlh = 0;
- double tt = 0;
- for (int n = 0; n < 1000; ++n) {
- if (n % 10 == 0) cerr << '.';
- if ((n+1) % 400 == 0) cerr << " [" << (n+1) << "]\n";
- Model m;
- for (int *x = points; *x; x += 2)
- m.increment(x[0], x[1]);
-
- for (int j = 0; j < 24; ++j) {
- for (int *x = points; *x; x += 2) {
- if (rng.next() < 0.8) {
- m.decrement(x[0], x[1]);
- m.increment_is(x[0], x[1]);
- }
- }
- }
- counts[m.base.num_customers()]++;
- tcounts[m.base.num_tables()]++;
- tlh += m.llh();
- tt += 1.0;
- }
- cerr << "mean LLH = " << (tlh / tt) << endl;
- for (int i = 0; i < 15; ++i)
- cerr << i << ": " << (counts[i] / tt) << "\t" << (tcounts[i] / tt) << endl;
-}
-
diff --git a/gi/pf/pfbrat.cc b/gi/pf/pfbrat.cc
deleted file mode 100644
index 832f22cf..00000000
--- a/gi/pf/pfbrat.cc
+++ /dev/null
@@ -1,543 +0,0 @@
-#include <iostream>
-#include <tr1/memory>
-#include <queue>
-
-#include <boost/functional.hpp>
-#include <boost/multi_array.hpp>
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-
-#include "viterbi.h"
-#include "hg.h"
-#include "trule.h"
-#include "tdict.h"
-#include "filelib.h"
-#include "dict.h"
-#include "sampler.h"
-#include "ccrp_nt.h"
-#include "cfg_wfst_composer.h"
-
-using namespace std;
-using namespace tr1;
-namespace po = boost::program_options;
-
-static unsigned kMAX_SRC_PHRASE;
-static unsigned kMAX_TRG_PHRASE;
-struct FSTState;
-
-double log_poisson(unsigned x, const double& lambda) {
- assert(lambda > 0.0);
- return log(lambda) * x - lgamma(x + 1) - lambda;
-}
-
-struct ConditionalBase {
- explicit ConditionalBase(const double m1mixture, const unsigned vocab_e_size, const string& model1fname) :
- kM1MIXTURE(m1mixture),
- kUNIFORM_MIXTURE(1.0 - m1mixture),
- kUNIFORM_TARGET(1.0 / vocab_e_size),
- kNULL(TD::Convert("<eps>")) {
- assert(m1mixture >= 0.0 && m1mixture <= 1.0);
- assert(vocab_e_size > 0);
- LoadModel1(model1fname);
- }
-
- void LoadModel1(const string& fname) {
- cerr << "Loading Model 1 parameters from " << fname << " ..." << endl;
- ReadFile rf(fname);
- istream& in = *rf.stream();
- string line;
- unsigned lc = 0;
- while(getline(in, line)) {
- ++lc;
- int cur = 0;
- int start = 0;
- while(cur < line.size() && line[cur] != ' ') { ++cur; }
- assert(cur != line.size());
- line[cur] = 0;
- const WordID src = TD::Convert(&line[0]);
- ++cur;
- start = cur;
- while(cur < line.size() && line[cur] != ' ') { ++cur; }
- assert(cur != line.size());
- line[cur] = 0;
- WordID trg = TD::Convert(&line[start]);
- const double logprob = strtod(&line[cur + 1], NULL);
- if (src >= ttable.size()) ttable.resize(src + 1);
- ttable[src][trg].logeq(logprob);
- }
- cerr << " read " << lc << " parameters.\n";
- }
-
- // return logp0 of rule.e_ | rule.f_
- prob_t operator()(const TRule& rule) const {
- const int flen = rule.f_.size();
- const int elen = rule.e_.size();
- prob_t uniform_src_alignment; uniform_src_alignment.logeq(-log(flen + 1));
- prob_t p;
- p.logeq(log_poisson(elen, flen + 0.01)); // elen | flen ~Pois(flen + 0.01)
- for (int i = 0; i < elen; ++i) { // for each position i in e-RHS
- const WordID trg = rule.e_[i];
- prob_t tp = prob_t::Zero();
- for (int j = -1; j < flen; ++j) {
- const WordID src = j < 0 ? kNULL : rule.f_[j];
- const map<WordID, prob_t>::const_iterator it = ttable[src].find(trg);
- if (it != ttable[src].end()) {
- tp += kM1MIXTURE * it->second;
- }
- tp += kUNIFORM_MIXTURE * kUNIFORM_TARGET;
- }
- tp *= uniform_src_alignment; // draw a_i ~uniform
- p *= tp; // draw e_i ~Model1(f_a_i) / uniform
- }
- return p;
- }
-
- const prob_t kM1MIXTURE; // Model 1 mixture component
- const prob_t kUNIFORM_MIXTURE; // uniform mixture component
- const prob_t kUNIFORM_TARGET;
- const WordID kNULL;
- vector<map<WordID, prob_t> > ttable;
-};
-
-void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
- po::options_description opts("Configuration options");
- opts.add_options()
- ("samples,s",po::value<unsigned>()->default_value(1000),"Number of samples")
- ("input,i",po::value<string>(),"Read parallel data from")
- ("max_src_phrase",po::value<unsigned>()->default_value(3),"Maximum length of source language phrases")
- ("max_trg_phrase",po::value<unsigned>()->default_value(3),"Maximum length of target language phrases")
- ("model1,m",po::value<string>(),"Model 1 parameters (used in base distribution)")
- ("model1_interpolation_weight",po::value<double>()->default_value(0.95),"Mixing proportion of model 1 with uniform target distribution")
- ("random_seed,S",po::value<uint32_t>(), "Random seed");
- po::options_description clo("Command line options");
- clo.add_options()
- ("config", po::value<string>(), "Configuration file")
- ("help,h", "Print this help message and exit");
- po::options_description dconfig_options, dcmdline_options;
- dconfig_options.add(opts);
- dcmdline_options.add(opts).add(clo);
-
- po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
- if (conf->count("config")) {
- ifstream config((*conf)["config"].as<string>().c_str());
- po::store(po::parse_config_file(config, dconfig_options), *conf);
- }
- po::notify(*conf);
-
- if (conf->count("help") || (conf->count("input") == 0)) {
- cerr << dcmdline_options << endl;
- exit(1);
- }
-}
-
-void ReadParallelCorpus(const string& filename,
- vector<vector<WordID> >* f,
- vector<vector<int> >* e,
- set<int>* vocab_f,
- set<int>* vocab_e) {
- f->clear();
- e->clear();
- vocab_f->clear();
- vocab_e->clear();
- istream* in;
- if (filename == "-")
- in = &cin;
- else
- in = new ifstream(filename.c_str());
- assert(*in);
- string line;
- const WordID kDIV = TD::Convert("|||");
- vector<WordID> tmp;
- while(*in) {
- getline(*in, line);
- if (line.empty() && !*in) break;
- e->push_back(vector<int>());
- f->push_back(vector<int>());
- vector<int>& le = e->back();
- vector<int>& lf = f->back();
- tmp.clear();
- TD::ConvertSentence(line, &tmp);
- bool isf = true;
- for (unsigned i = 0; i < tmp.size(); ++i) {
- const int cur = tmp[i];
- if (isf) {
- if (kDIV == cur) { isf = false; } else {
- lf.push_back(cur);
- vocab_f->insert(cur);
- }
- } else {
- assert(cur != kDIV);
- le.push_back(cur);
- vocab_e->insert(cur);
- }
- }
- assert(isf == false);
- }
- if (in != &cin) delete in;
-}
-
-struct UniphraseLM {
- UniphraseLM(const vector<vector<int> >& corpus,
- const set<int>& vocab,
- const po::variables_map& conf) :
- phrases_(1,1),
- gen_(1,1),
- corpus_(corpus),
- uniform_word_(1.0 / vocab.size()),
- gen_p0_(0.5),
- p_end_(0.5),
- use_poisson_(conf.count("poisson_length") > 0) {}
-
- void ResampleHyperparameters(MT19937* rng) {
- phrases_.resample_hyperparameters(rng);
- gen_.resample_hyperparameters(rng);
- cerr << " " << phrases_.alpha();
- }
-
- CCRP_NoTable<vector<int> > phrases_;
- CCRP_NoTable<bool> gen_;
- vector<vector<bool> > z_; // z_[i] is there a phrase boundary after the ith word
- const vector<vector<int> >& corpus_;
- const double uniform_word_;
- const double gen_p0_;
- const double p_end_; // in base length distribution, p of the end of a phrase
- const bool use_poisson_;
-};
-
-struct Reachability {
- boost::multi_array<bool, 4> edges; // edges[src_covered][trg_covered][x][trg_delta] is this edge worth exploring?
- boost::multi_array<short, 2> max_src_delta; // msd[src_covered][trg_covered] -- the largest src delta that's valid
-
- Reachability(int srclen, int trglen, int src_max_phrase_len, int trg_max_phrase_len) :
- edges(boost::extents[srclen][trglen][src_max_phrase_len+1][trg_max_phrase_len+1]),
- max_src_delta(boost::extents[srclen][trglen]) {
- ComputeReachability(srclen, trglen, src_max_phrase_len, trg_max_phrase_len);
- }
-
- private:
- struct SState {
- SState() : prev_src_covered(), prev_trg_covered() {}
- SState(int i, int j) : prev_src_covered(i), prev_trg_covered(j) {}
- int prev_src_covered;
- int prev_trg_covered;
- };
-
- struct NState {
- NState() : next_src_covered(), next_trg_covered() {}
- NState(int i, int j) : next_src_covered(i), next_trg_covered(j) {}
- int next_src_covered;
- int next_trg_covered;
- };
-
- void ComputeReachability(int srclen, int trglen, int src_max_phrase_len, int trg_max_phrase_len) {
- typedef boost::multi_array<vector<SState>, 2> array_type;
- array_type a(boost::extents[srclen + 1][trglen + 1]);
- a[0][0].push_back(SState());
- for (int i = 0; i < srclen; ++i) {
- for (int j = 0; j < trglen; ++j) {
- if (a[i][j].size() == 0) continue;
- const SState prev(i,j);
- for (int k = 1; k <= src_max_phrase_len; ++k) {
- if ((i + k) > srclen) continue;
- for (int l = 1; l <= trg_max_phrase_len; ++l) {
- if ((j + l) > trglen) continue;
- a[i + k][j + l].push_back(prev);
- }
- }
- }
- }
- a[0][0].clear();
- cerr << "Final cell contains " << a[srclen][trglen].size() << " back pointers\n";
- assert(a[srclen][trglen].size() > 0);
-
- typedef boost::multi_array<bool, 2> rarray_type;
- rarray_type r(boost::extents[srclen + 1][trglen + 1]);
-// typedef boost::multi_array<vector<NState>, 2> narray_type;
-// narray_type b(boost::extents[srclen + 1][trglen + 1]);
- r[srclen][trglen] = true;
- for (int i = srclen; i >= 0; --i) {
- for (int j = trglen; j >= 0; --j) {
- vector<SState>& prevs = a[i][j];
- if (!r[i][j]) { prevs.clear(); }
-// const NState nstate(i,j);
- for (int k = 0; k < prevs.size(); ++k) {
- r[prevs[k].prev_src_covered][prevs[k].prev_trg_covered] = true;
- int src_delta = i - prevs[k].prev_src_covered;
- edges[prevs[k].prev_src_covered][prevs[k].prev_trg_covered][src_delta][j - prevs[k].prev_trg_covered] = true;
- short &msd = max_src_delta[prevs[k].prev_src_covered][prevs[k].prev_trg_covered];
- if (src_delta > msd) msd = src_delta;
-// b[prevs[k].prev_src_covered][prevs[k].prev_trg_covered].push_back(nstate);
- }
- }
- }
- assert(!edges[0][0][1][0]);
- assert(!edges[0][0][0][1]);
- assert(!edges[0][0][0][0]);
- cerr << " MAX SRC DELTA[0][0] = " << max_src_delta[0][0] << endl;
- assert(max_src_delta[0][0] > 0);
- //cerr << "First cell contains " << b[0][0].size() << " forward pointers\n";
- //for (int i = 0; i < b[0][0].size(); ++i) {
- // cerr << " -> (" << b[0][0][i].next_src_covered << "," << b[0][0][i].next_trg_covered << ")\n";
- //}
- }
-};
-
-ostream& operator<<(ostream& os, const FSTState& q);
-struct FSTState {
- explicit FSTState(int src_size) :
- trg_covered_(),
- src_covered_(),
- src_coverage_(src_size) {}
-
- FSTState(short trg_covered, short src_covered, const vector<bool>& src_coverage, const vector<short>& src_prefix) :
- trg_covered_(trg_covered),
- src_covered_(src_covered),
- src_coverage_(src_coverage),
- src_prefix_(src_prefix) {
- if (src_coverage_.size() == src_covered) {
- assert(src_prefix.size() == 0);
- }
- }
-
- // if we extend by the word at src_position, what are
- // the next states that are reachable and lie on a valid
- // path to the final state?
- vector<FSTState> Extensions(int src_position, int src_len, int trg_len, const Reachability& r) const {
- assert(src_position < src_coverage_.size());
- if (src_coverage_[src_position]) {
- cerr << "Trying to extend " << *this << " with position " << src_position << endl;
- abort();
- }
- vector<bool> ncvg = src_coverage_;
- ncvg[src_position] = true;
-
- vector<FSTState> res;
- const int trg_remaining = trg_len - trg_covered_;
- if (trg_remaining <= 0) {
- cerr << "Target appears to have been covered: " << *this << " (trg_len=" << trg_len << ",trg_covered=" << trg_covered_ << ")" << endl;
- abort();
- }
- const int src_remaining = src_len - src_covered_;
- if (src_remaining <= 0) {
- cerr << "Source appears to have been covered: " << *this << endl;
- abort();
- }
-
- for (int tc = 1; tc <= kMAX_TRG_PHRASE; ++tc) {
- if (r.edges[src_covered_][trg_covered_][src_prefix_.size() + 1][tc]) {
- int nc = src_prefix_.size() + 1 + src_covered_;
- res.push_back(FSTState(trg_covered_ + tc, nc, ncvg, vector<short>()));
- }
- }
-
- if ((src_prefix_.size() + 1) < r.max_src_delta[src_covered_][trg_covered_]) {
- vector<short> nsp = src_prefix_;
- nsp.push_back(src_position);
- res.push_back(FSTState(trg_covered_, src_covered_, ncvg, nsp));
- }
-
- if (res.size() == 0) {
- cerr << *this << " can't be extended!\n";
- abort();
- }
- return res;
- }
-
- short trg_covered_, src_covered_;
- vector<bool> src_coverage_;
- vector<short> src_prefix_;
-};
-bool operator<(const FSTState& q, const FSTState& r) {
- if (q.trg_covered_ != r.trg_covered_) return q.trg_covered_ < r.trg_covered_;
- if (q.src_covered_!= r.src_covered_) return q.src_covered_ < r.src_covered_;
- if (q.src_coverage_ != r.src_coverage_) return q.src_coverage_ < r.src_coverage_;
- return q.src_prefix_ < r.src_prefix_;
-}
-
-ostream& operator<<(ostream& os, const FSTState& q) {
- os << "[" << q.trg_covered_ << " : ";
- for (int i = 0; i < q.src_coverage_.size(); ++i)
- os << q.src_coverage_[i];
- os << " : <";
- for (int i = 0; i < q.src_prefix_.size(); ++i) {
- if (i != 0) os << ' ';
- os << q.src_prefix_[i];
- }
- return os << ">]";
-}
-
-struct MyModel {
- MyModel(ConditionalBase& rcp0) : rp0(rcp0) {}
- typedef unordered_map<vector<WordID>, CCRP_NoTable<TRule>, boost::hash<vector<WordID> > > SrcToRuleCRPMap;
-
- void DecrementRule(const TRule& rule) {
- SrcToRuleCRPMap::iterator it = rules.find(rule.f_);
- assert(it != rules.end());
- it->second.decrement(rule);
- if (it->second.num_customers() == 0) rules.erase(it);
- }
-
- void IncrementRule(const TRule& rule) {
- SrcToRuleCRPMap::iterator it = rules.find(rule.f_);
- if (it == rules.end()) {
- CCRP_NoTable<TRule> crp(1,1);
- it = rules.insert(make_pair(rule.f_, crp)).first;
- }
- it->second.increment(rule);
- }
-
- // conditioned on rule.f_
- prob_t RuleConditionalProbability(const TRule& rule) const {
- const prob_t base = rp0(rule);
- SrcToRuleCRPMap::const_iterator it = rules.find(rule.f_);
- if (it == rules.end()) {
- return base;
- } else {
- const double lp = it->second.logprob(rule, log(base));
- prob_t q; q.logeq(lp);
- return q;
- }
- }
-
- const ConditionalBase& rp0;
- SrcToRuleCRPMap rules;
-};
-
-struct MyFST : public WFST {
- MyFST(const vector<WordID>& ssrc, const vector<WordID>& strg, MyModel* m) :
- src(ssrc), trg(strg),
- r(src.size(),trg.size(),kMAX_SRC_PHRASE, kMAX_TRG_PHRASE),
- model(m) {
- FSTState in(src.size());
- cerr << " INIT: " << in << endl;
- init = GetNode(in);
- for (int i = 0; i < in.src_coverage_.size(); ++i) in.src_coverage_[i] = true;
- in.src_covered_ = src.size();
- in.trg_covered_ = trg.size();
- cerr << "FINAL: " << in << endl;
- final = GetNode(in);
- }
- virtual const WFSTNode* Final() const;
- virtual const WFSTNode* Initial() const;
-
- const WFSTNode* GetNode(const FSTState& q);
- map<FSTState, boost::shared_ptr<WFSTNode> > m;
- const vector<WordID>& src;
- const vector<WordID>& trg;
- Reachability r;
- const WFSTNode* init;
- const WFSTNode* final;
- MyModel* model;
-};
-
-struct MyNode : public WFSTNode {
- MyNode(const FSTState& q, MyFST* fst) : state(q), container(fst) {}
- virtual vector<pair<const WFSTNode*, TRulePtr> > ExtendInput(unsigned srcindex) const;
- const FSTState state;
- mutable MyFST* container;
-};
-
-vector<pair<const WFSTNode*, TRulePtr> > MyNode::ExtendInput(unsigned srcindex) const {
- cerr << "EXTEND " << state << " with " << srcindex << endl;
- vector<FSTState> ext = state.Extensions(srcindex, container->src.size(), container->trg.size(), container->r);
- vector<pair<const WFSTNode*,TRulePtr> > res(ext.size());
- for (unsigned i = 0; i < ext.size(); ++i) {
- res[i].first = container->GetNode(ext[i]);
- if (ext[i].src_prefix_.size() == 0) {
- const unsigned trg_from = state.trg_covered_;
- const unsigned trg_to = ext[i].trg_covered_;
- const unsigned prev_prfx_size = state.src_prefix_.size();
- res[i].second.reset(new TRule);
- res[i].second->lhs_ = -TD::Convert("X");
- vector<WordID>& src = res[i].second->f_;
- vector<WordID>& trg = res[i].second->e_;
- src.resize(prev_prfx_size + 1);
- for (unsigned j = 0; j < prev_prfx_size; ++j)
- src[j] = container->src[state.src_prefix_[j]];
- src[prev_prfx_size] = container->src[srcindex];
- for (unsigned j = trg_from; j < trg_to; ++j)
- trg.push_back(container->trg[j]);
- res[i].second->scores_.set_value(FD::Convert("Proposal"), log(container->model->RuleConditionalProbability(*res[i].second)));
- }
- }
- return res;
-}
-
-const WFSTNode* MyFST::GetNode(const FSTState& q) {
- boost::shared_ptr<WFSTNode>& res = m[q];
- if (!res) {
- res.reset(new MyNode(q, this));
- }
- return &*res;
-}
-
-const WFSTNode* MyFST::Final() const {
- return final;
-}
-
-const WFSTNode* MyFST::Initial() const {
- return init;
-}
-
-int main(int argc, char** argv) {
- po::variables_map conf;
- InitCommandLine(argc, argv, &conf);
- kMAX_TRG_PHRASE = conf["max_trg_phrase"].as<unsigned>();
- kMAX_SRC_PHRASE = conf["max_src_phrase"].as<unsigned>();
-
- if (!conf.count("model1")) {
- cerr << argv[0] << "Please use --model1 to specify model 1 parameters\n";
- return 1;
- }
- boost::shared_ptr<MT19937> prng;
- if (conf.count("random_seed"))
- prng.reset(new MT19937(conf["random_seed"].as<uint32_t>()));
- else
- prng.reset(new MT19937);
- MT19937& rng = *prng;
-
- vector<vector<int> > corpuse, corpusf;
- set<int> vocabe, vocabf;
- ReadParallelCorpus(conf["input"].as<string>(), &corpusf, &corpuse, &vocabf, &vocabe);
- cerr << "f-Corpus size: " << corpusf.size() << " sentences\n";
- cerr << "f-Vocabulary size: " << vocabf.size() << " types\n";
- cerr << "f-Corpus size: " << corpuse.size() << " sentences\n";
- cerr << "f-Vocabulary size: " << vocabe.size() << " types\n";
- assert(corpusf.size() == corpuse.size());
-
- ConditionalBase lp0(conf["model1_interpolation_weight"].as<double>(),
- vocabe.size(),
- conf["model1"].as<string>());
- MyModel m(lp0);
-
- TRule x("[X] ||| kAnwntR myN ||| at the convent ||| 0");
- m.IncrementRule(x);
- TRule y("[X] ||| nY dyN ||| gave ||| 0");
- m.IncrementRule(y);
-
-
- MyFST fst(corpusf[0], corpuse[0], &m);
- ifstream in("./kimura.g");
- assert(in);
- CFG_WFSTComposer comp(fst);
- Hypergraph hg;
- bool succeed = comp.Compose(&in, &hg);
- hg.PrintGraphviz();
- if (succeed) { cerr << "SUCCESS.\n"; } else { cerr << "FAILURE REPORTED.\n"; }
-
-#if 0
- ifstream in2("./amnabooks.g");
- assert(in2);
- MyFST fst2(corpusf[1], corpuse[1], &m);
- CFG_WFSTComposer comp2(fst2);
- Hypergraph hg2;
- bool succeed2 = comp2.Compose(&in2, &hg2);
- if (succeed2) { cerr << "SUCCESS.\n"; } else { cerr << "FAILURE REPORTED.\n"; }
-#endif
-
- SparseVector<double> w; w.set_value(FD::Convert("Proposal"), 1.0);
- hg.Reweight(w);
- cerr << ViterbiFTree(hg) << endl;
- return 0;
-}
-
diff --git a/gi/pf/pfdist.cc b/gi/pf/pfdist.cc
deleted file mode 100644
index a3e46064..00000000
--- a/gi/pf/pfdist.cc
+++ /dev/null
@@ -1,598 +0,0 @@
-#include <iostream>
-#include <tr1/memory>
-#include <queue>
-
-#include <boost/functional.hpp>
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-
-#include "pf.h"
-#include "base_distributions.h"
-#include "reachability.h"
-#include "viterbi.h"
-#include "hg.h"
-#include "trule.h"
-#include "tdict.h"
-#include "filelib.h"
-#include "dict.h"
-#include "sampler.h"
-#include "ccrp_nt.h"
-#include "ccrp_onetable.h"
-
-using namespace std;
-using namespace tr1;
-namespace po = boost::program_options;
-
-boost::shared_ptr<MT19937> prng;
-
-void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
- po::options_description opts("Configuration options");
- opts.add_options()
- ("samples,s",po::value<unsigned>()->default_value(1000),"Number of samples")
- ("particles,p",po::value<unsigned>()->default_value(30),"Number of particles")
- ("filter_frequency,f",po::value<unsigned>()->default_value(5),"Number of time steps between filterings")
- ("input,i",po::value<string>(),"Read parallel data from")
- ("max_src_phrase",po::value<unsigned>()->default_value(5),"Maximum length of source language phrases")
- ("max_trg_phrase",po::value<unsigned>()->default_value(5),"Maximum length of target language phrases")
- ("model1,m",po::value<string>(),"Model 1 parameters (used in base distribution)")
- ("inverse_model1,M",po::value<string>(),"Inverse Model 1 parameters (used in backward estimate)")
- ("model1_interpolation_weight",po::value<double>()->default_value(0.95),"Mixing proportion of model 1 with uniform target distribution")
- ("random_seed,S",po::value<uint32_t>(), "Random seed");
- po::options_description clo("Command line options");
- clo.add_options()
- ("config", po::value<string>(), "Configuration file")
- ("help,h", "Print this help message and exit");
- po::options_description dconfig_options, dcmdline_options;
- dconfig_options.add(opts);
- dcmdline_options.add(opts).add(clo);
-
- po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
- if (conf->count("config")) {
- ifstream config((*conf)["config"].as<string>().c_str());
- po::store(po::parse_config_file(config, dconfig_options), *conf);
- }
- po::notify(*conf);
-
- if (conf->count("help") || (conf->count("input") == 0)) {
- cerr << dcmdline_options << endl;
- exit(1);
- }
-}
-
-void ReadParallelCorpus(const string& filename,
- vector<vector<WordID> >* f,
- vector<vector<WordID> >* e,
- set<WordID>* vocab_f,
- set<WordID>* vocab_e) {
- f->clear();
- e->clear();
- vocab_f->clear();
- vocab_e->clear();
- istream* in;
- if (filename == "-")
- in = &cin;
- else
- in = new ifstream(filename.c_str());
- assert(*in);
- string line;
- const WordID kDIV = TD::Convert("|||");
- vector<WordID> tmp;
- while(*in) {
- getline(*in, line);
- if (line.empty() && !*in) break;
- e->push_back(vector<int>());
- f->push_back(vector<int>());
- vector<int>& le = e->back();
- vector<int>& lf = f->back();
- tmp.clear();
- TD::ConvertSentence(line, &tmp);
- bool isf = true;
- for (unsigned i = 0; i < tmp.size(); ++i) {
- const int cur = tmp[i];
- if (isf) {
- if (kDIV == cur) { isf = false; } else {
- lf.push_back(cur);
- vocab_f->insert(cur);
- }
- } else {
- assert(cur != kDIV);
- le.push_back(cur);
- vocab_e->insert(cur);
- }
- }
- assert(isf == false);
- }
- if (in != &cin) delete in;
-}
-
-#if 0
-struct MyConditionalModel {
- MyConditionalModel(PhraseConditionalBase& rcp0) : rp0(&rcp0), base(prob_t::One()), src_phrases(1,1), src_jumps(200, CCRP_NoTable<int>(1,1)) {}
-
- prob_t srcp0(const vector<WordID>& src) const {
- prob_t p(1.0 / 3000.0);
- p.poweq(src.size());
- prob_t lenp; lenp.logeq(log_poisson(src.size(), 1.0));
- p *= lenp;
- return p;
- }
-
- void DecrementRule(const TRule& rule) {
- const RuleCRPMap::iterator it = rules.find(rule.f_);
- assert(it != rules.end());
- if (it->second.decrement(rule)) {
- base /= (*rp0)(rule);
- if (it->second.num_customers() == 0)
- rules.erase(it);
- }
- if (src_phrases.decrement(rule.f_))
- base /= srcp0(rule.f_);
- }
-
- void IncrementRule(const TRule& rule) {
- RuleCRPMap::iterator it = rules.find(rule.f_);
- if (it == rules.end())
- it = rules.insert(make_pair(rule.f_, CCRP_NoTable<TRule>(1,1))).first;
- if (it->second.increment(rule)) {
- base *= (*rp0)(rule);
- }
- if (src_phrases.increment(rule.f_))
- base *= srcp0(rule.f_);
- }
-
- void IncrementRules(const vector<TRulePtr>& rules) {
- for (int i = 0; i < rules.size(); ++i)
- IncrementRule(*rules[i]);
- }
-
- void DecrementRules(const vector<TRulePtr>& rules) {
- for (int i = 0; i < rules.size(); ++i)
- DecrementRule(*rules[i]);
- }
-
- void IncrementJump(int dist, unsigned src_len) {
- assert(src_len > 0);
- if (src_jumps[src_len].increment(dist))
- base *= jp0(dist, src_len);
- }
-
- void DecrementJump(int dist, unsigned src_len) {
- assert(src_len > 0);
- if (src_jumps[src_len].decrement(dist))
- base /= jp0(dist, src_len);
- }
-
- void IncrementJumps(const vector<int>& js, unsigned src_len) {
- for (unsigned i = 0; i < js.size(); ++i)
- IncrementJump(js[i], src_len);
- }
-
- void DecrementJumps(const vector<int>& js, unsigned src_len) {
- for (unsigned i = 0; i < js.size(); ++i)
- DecrementJump(js[i], src_len);
- }
-
- // p(jump = dist | src_len , z)
- prob_t JumpProbability(int dist, unsigned src_len) {
- const prob_t p0 = jp0(dist, src_len);
- const double lp = src_jumps[src_len].logprob(dist, log(p0));
- prob_t q; q.logeq(lp);
- return q;
- }
-
- // p(rule.f_ | z) * p(rule.e_ | rule.f_ , z)
- prob_t RuleProbability(const TRule& rule) const {
- const prob_t p0 = (*rp0)(rule);
- prob_t srcp; srcp.logeq(src_phrases.logprob(rule.f_, log(srcp0(rule.f_))));
- const RuleCRPMap::const_iterator it = rules.find(rule.f_);
- if (it == rules.end()) return srcp * p0;
- const double lp = it->second.logprob(rule, log(p0));
- prob_t q; q.logeq(lp);
- return q * srcp;
- }
-
- prob_t Likelihood() const {
- prob_t p = base;
- for (RuleCRPMap::const_iterator it = rules.begin();
- it != rules.end(); ++it) {
- prob_t cl; cl.logeq(it->second.log_crp_prob());
- p *= cl;
- }
- for (unsigned l = 1; l < src_jumps.size(); ++l) {
- if (src_jumps[l].num_customers() > 0) {
- prob_t q;
- q.logeq(src_jumps[l].log_crp_prob());
- p *= q;
- }
- }
- return p;
- }
-
- JumpBase jp0;
- const PhraseConditionalBase* rp0;
- prob_t base;
- typedef unordered_map<vector<WordID>, CCRP_NoTable<TRule>, boost::hash<vector<WordID> > > RuleCRPMap;
- RuleCRPMap rules;
- CCRP_NoTable<vector<WordID> > src_phrases;
- vector<CCRP_NoTable<int> > src_jumps;
-};
-
-#endif
-
-struct MyJointModel {
- MyJointModel(PhraseJointBase& rcp0) :
- rp0(rcp0), base(prob_t::One()), rules(1,1), src_jumps(200, CCRP_NoTable<int>(1,1)) {}
-
- void DecrementRule(const TRule& rule) {
- if (rules.decrement(rule))
- base /= rp0(rule);
- }
-
- void IncrementRule(const TRule& rule) {
- if (rules.increment(rule))
- base *= rp0(rule);
- }
-
- void IncrementRules(const vector<TRulePtr>& rules) {
- for (int i = 0; i < rules.size(); ++i)
- IncrementRule(*rules[i]);
- }
-
- void DecrementRules(const vector<TRulePtr>& rules) {
- for (int i = 0; i < rules.size(); ++i)
- DecrementRule(*rules[i]);
- }
-
- void IncrementJump(int dist, unsigned src_len) {
- assert(src_len > 0);
- if (src_jumps[src_len].increment(dist))
- base *= jp0(dist, src_len);
- }
-
- void DecrementJump(int dist, unsigned src_len) {
- assert(src_len > 0);
- if (src_jumps[src_len].decrement(dist))
- base /= jp0(dist, src_len);
- }
-
- void IncrementJumps(const vector<int>& js, unsigned src_len) {
- for (unsigned i = 0; i < js.size(); ++i)
- IncrementJump(js[i], src_len);
- }
-
- void DecrementJumps(const vector<int>& js, unsigned src_len) {
- for (unsigned i = 0; i < js.size(); ++i)
- DecrementJump(js[i], src_len);
- }
-
- // p(jump = dist | src_len , z)
- prob_t JumpProbability(int dist, unsigned src_len) {
- const prob_t p0 = jp0(dist, src_len);
- const double lp = src_jumps[src_len].logprob(dist, log(p0));
- prob_t q; q.logeq(lp);
- return q;
- }
-
- // p(rule.f_ | z) * p(rule.e_ | rule.f_ , z)
- prob_t RuleProbability(const TRule& rule) const {
- prob_t p; p.logeq(rules.logprob(rule, log(rp0(rule))));
- return p;
- }
-
- prob_t Likelihood() const {
- prob_t p = base;
- prob_t q; q.logeq(rules.log_crp_prob());
- p *= q;
- for (unsigned l = 1; l < src_jumps.size(); ++l) {
- if (src_jumps[l].num_customers() > 0) {
- prob_t q;
- q.logeq(src_jumps[l].log_crp_prob());
- p *= q;
- }
- }
- return p;
- }
-
- JumpBase jp0;
- const PhraseJointBase& rp0;
- prob_t base;
- CCRP_NoTable<TRule> rules;
- vector<CCRP_NoTable<int> > src_jumps;
-};
-
-struct BackwardEstimate {
- BackwardEstimate(const Model1& m1, const vector<WordID>& src, const vector<WordID>& trg) :
- model1_(m1), src_(src), trg_(trg) {
- }
- const prob_t& operator()(const vector<bool>& src_cov, unsigned trg_cov) const {
- assert(src_.size() == src_cov.size());
- assert(trg_cov <= trg_.size());
- prob_t& e = cache_[src_cov][trg_cov];
- if (e.is_0()) {
- if (trg_cov == trg_.size()) { e = prob_t::One(); return e; }
- vector<WordID> r(src_.size() + 1); r.clear();
- r.push_back(0); // NULL word
- for (int i = 0; i < src_cov.size(); ++i)
- if (!src_cov[i]) r.push_back(src_[i]);
- const prob_t uniform_alignment(1.0 / r.size());
- e.logeq(Md::log_poisson(trg_.size() - trg_cov, r.size() - 1)); // p(trg len remaining | src len remaining)
- for (unsigned j = trg_cov; j < trg_.size(); ++j) {
- prob_t p;
- for (unsigned i = 0; i < r.size(); ++i)
- p += model1_(r[i], trg_[j]);
- if (p.is_0()) {
- cerr << "ERROR: p(" << TD::Convert(trg_[j]) << " | " << TD::GetString(r) << ") = 0!\n";
- abort();
- }
- p *= uniform_alignment;
- e *= p;
- }
- }
- return e;
- }
- const Model1& model1_;
- const vector<WordID>& src_;
- const vector<WordID>& trg_;
- mutable unordered_map<vector<bool>, map<unsigned, prob_t>, boost::hash<vector<bool> > > cache_;
-};
-
-struct BackwardEstimateSym {
- BackwardEstimateSym(const Model1& m1,
- const Model1& invm1, const vector<WordID>& src, const vector<WordID>& trg) :
- model1_(m1), invmodel1_(invm1), src_(src), trg_(trg) {
- }
- const prob_t& operator()(const vector<bool>& src_cov, unsigned trg_cov) const {
- assert(src_.size() == src_cov.size());
- assert(trg_cov <= trg_.size());
- prob_t& e = cache_[src_cov][trg_cov];
- if (e.is_0()) {
- if (trg_cov == trg_.size()) { e = prob_t::One(); return e; }
- vector<WordID> r(src_.size() + 1); r.clear();
- for (int i = 0; i < src_cov.size(); ++i)
- if (!src_cov[i]) r.push_back(src_[i]);
- r.push_back(0); // NULL word
- const prob_t uniform_alignment(1.0 / r.size());
- e.logeq(Md::log_poisson(trg_.size() - trg_cov, r.size() - 1)); // p(trg len remaining | src len remaining)
- for (unsigned j = trg_cov; j < trg_.size(); ++j) {
- prob_t p;
- for (unsigned i = 0; i < r.size(); ++i)
- p += model1_(r[i], trg_[j]);
- if (p.is_0()) {
- cerr << "ERROR: p(" << TD::Convert(trg_[j]) << " | " << TD::GetString(r) << ") = 0!\n";
- abort();
- }
- p *= uniform_alignment;
- e *= p;
- }
- r.pop_back();
- const prob_t inv_uniform(1.0 / (trg_.size() - trg_cov + 1.0));
- prob_t inv;
- inv.logeq(Md::log_poisson(r.size(), trg_.size() - trg_cov));
- for (unsigned i = 0; i < r.size(); ++i) {
- prob_t p;
- for (unsigned j = trg_cov - 1; j < trg_.size(); ++j)
- p += invmodel1_(j < trg_cov ? 0 : trg_[j], r[i]);
- if (p.is_0()) {
- cerr << "ERROR: p_inv(" << TD::Convert(r[i]) << " | " << TD::GetString(trg_) << ") = 0!\n";
- abort();
- }
- p *= inv_uniform;
- inv *= p;
- }
- prob_t x = pow(e * inv, 0.5);
- e = x;
- //cerr << "Forward: " << log(e) << "\tBackward: " << log(inv) << "\t prop: " << log(x) << endl;
- }
- return e;
- }
- const Model1& model1_;
- const Model1& invmodel1_;
- const vector<WordID>& src_;
- const vector<WordID>& trg_;
- mutable unordered_map<vector<bool>, map<unsigned, prob_t>, boost::hash<vector<bool> > > cache_;
-};
-
-struct Particle {
- Particle() : weight(prob_t::One()), src_cov(), trg_cov(), prev_pos(-1) {}
- prob_t weight;
- prob_t gamma_last;
- vector<int> src_jumps;
- vector<TRulePtr> rules;
- vector<bool> src_cv;
- int src_cov;
- int trg_cov;
- int prev_pos;
-};
-
-ostream& operator<<(ostream& o, const vector<bool>& v) {
- for (int i = 0; i < v.size(); ++i)
- o << (v[i] ? '1' : '0');
- return o;
-}
-ostream& operator<<(ostream& o, const Particle& p) {
- o << "[cv=" << p.src_cv << " src_cov=" << p.src_cov << " trg_cov=" << p.trg_cov << " last_pos=" << p.prev_pos << " num_rules=" << p.rules.size() << " w=" << log(p.weight) << ']';
- return o;
-}
-
-int main(int argc, char** argv) {
- po::variables_map conf;
- InitCommandLine(argc, argv, &conf);
- const unsigned kMAX_TRG_PHRASE = conf["max_trg_phrase"].as<unsigned>();
- const unsigned kMAX_SRC_PHRASE = conf["max_src_phrase"].as<unsigned>();
- const unsigned particles = conf["particles"].as<unsigned>();
- const unsigned samples = conf["samples"].as<unsigned>();
- const unsigned rejuv_freq = conf["filter_frequency"].as<unsigned>();
-
- if (!conf.count("model1")) {
- cerr << argv[0] << "Please use --model1 to specify model 1 parameters\n";
- return 1;
- }
- if (conf.count("random_seed"))
- prng.reset(new MT19937(conf["random_seed"].as<uint32_t>()));
- else
- prng.reset(new MT19937);
- MT19937& rng = *prng;
-
- vector<vector<WordID> > corpuse, corpusf;
- set<WordID> vocabe, vocabf;
- cerr << "Reading corpus...\n";
- ReadParallelCorpus(conf["input"].as<string>(), &corpusf, &corpuse, &vocabf, &vocabe);
- cerr << "F-corpus size: " << corpusf.size() << " sentences\t (" << vocabf.size() << " word types)\n";
- cerr << "E-corpus size: " << corpuse.size() << " sentences\t (" << vocabe.size() << " word types)\n";
- assert(corpusf.size() == corpuse.size());
-
- const int kLHS = -TD::Convert("X");
- Model1 m1(conf["model1"].as<string>());
- Model1 invm1(conf["inverse_model1"].as<string>());
-
-#if 0
- PhraseConditionalBase lp0(m1, conf["model1_interpolation_weight"].as<double>(), vocabe.size());
- MyConditionalModel m(lp0);
-#else
- PhraseJointBase lp0(m1, conf["model1_interpolation_weight"].as<double>(), vocabe.size(), vocabf.size());
- MyJointModel m(lp0);
-#endif
-
- MultinomialResampleFilter<Particle> filter(&rng);
- cerr << "Initializing reachability limits...\n";
- vector<Particle> ps(corpusf.size());
- vector<Reachability> reaches; reaches.reserve(corpusf.size());
- for (int ci = 0; ci < corpusf.size(); ++ci)
- reaches.push_back(Reachability(corpusf[ci].size(),
- corpuse[ci].size(),
- kMAX_SRC_PHRASE,
- kMAX_TRG_PHRASE));
- cerr << "Sampling...\n";
- vector<Particle> tmp_p(10000); // work space
- SampleSet<prob_t> pfss;
- for (int SS=0; SS < samples; ++SS) {
- for (int ci = 0; ci < corpusf.size(); ++ci) {
- vector<int>& src = corpusf[ci];
- vector<int>& trg = corpuse[ci];
- m.DecrementRules(ps[ci].rules);
- m.DecrementJumps(ps[ci].src_jumps, src.size());
-
- //BackwardEstimate be(m1, src, trg);
- BackwardEstimateSym be(m1, invm1, src, trg);
- const Reachability& r = reaches[ci];
- vector<Particle> lps(particles);
-
- for (int pi = 0; pi < particles; ++pi) {
- Particle& p = lps[pi];
- p.src_cv.resize(src.size(), false);
- }
-
- bool all_complete = false;
- while(!all_complete) {
- SampleSet<prob_t> ss;
-
- // all particles have now been extended a bit, we will reweight them now
- if (lps[0].trg_cov > 0)
- filter(&lps);
-
- // loop over all particles and extend them
- bool done_nothing = true;
- for (int pi = 0; pi < particles; ++pi) {
- Particle& p = lps[pi];
- int tic = 0;
- while(p.trg_cov < trg.size() && tic < rejuv_freq) {
- ++tic;
- done_nothing = false;
- ss.clear();
- TRule x; x.lhs_ = kLHS;
- prob_t z;
- int first_uncovered = src.size();
- int last_uncovered = -1;
- for (int i = 0; i < src.size(); ++i) {
- const bool is_uncovered = !p.src_cv[i];
- if (i < first_uncovered && is_uncovered) first_uncovered = i;
- if (is_uncovered && i > last_uncovered) last_uncovered = i;
- }
- assert(last_uncovered > -1);
- assert(first_uncovered < src.size());
-
- for (int trg_len = 1; trg_len <= kMAX_TRG_PHRASE; ++trg_len) {
- x.e_.push_back(trg[trg_len - 1 + p.trg_cov]);
- for (int src_len = 1; src_len <= kMAX_SRC_PHRASE; ++src_len) {
- if (!r.edges[p.src_cov][p.trg_cov][src_len][trg_len]) continue;
-
- const int last_possible_start = last_uncovered - src_len + 1;
- assert(last_possible_start >= 0);
- //cerr << src_len << "," << trg_len << " is allowed. E=" << TD::GetString(x.e_) << endl;
- //cerr << " first_uncovered=" << first_uncovered << " last_possible_start=" << last_possible_start << endl;
- for (int i = first_uncovered; i <= last_possible_start; ++i) {
- if (p.src_cv[i]) continue;
- assert(ss.size() < tmp_p.size()); // if fails increase tmp_p size
- Particle& np = tmp_p[ss.size()];
- np = p;
- x.f_.clear();
- int gap_add = 0;
- bool bad = false;
- prob_t jp = prob_t::One();
- int prev_pos = p.prev_pos;
- for (int j = 0; j < src_len; ++j) {
- if ((j + i + gap_add) == src.size()) { bad = true; break; }
- while ((i+j+gap_add) < src.size() && p.src_cv[i + j + gap_add]) { ++gap_add; }
- if ((j + i + gap_add) == src.size()) { bad = true; break; }
- np.src_cv[i + j + gap_add] = true;
- x.f_.push_back(src[i + j + gap_add]);
- jp *= m.JumpProbability(i + j + gap_add - prev_pos, src.size());
- int jump = i + j + gap_add - prev_pos;
- assert(jump != 0);
- np.src_jumps.push_back(jump);
- prev_pos = i + j + gap_add;
- }
- if (bad) continue;
- np.prev_pos = prev_pos;
- np.src_cov += x.f_.size();
- np.trg_cov += x.e_.size();
- if (x.f_.size() != src_len) continue;
- prob_t rp = m.RuleProbability(x);
- np.gamma_last = rp * jp;
- const prob_t u = pow(np.gamma_last * be(np.src_cv, np.trg_cov), 0.2);
- //cerr << "**rule=" << x << endl;
- //cerr << " u=" << log(u) << " rule=" << rp << " jump=" << jp << endl;
- ss.add(u);
- np.rules.push_back(TRulePtr(new TRule(x)));
- z += u;
-
- const bool completed = (p.trg_cov == trg.size());
- if (completed) {
- int last_jump = src.size() - p.prev_pos;
- assert(last_jump > 0);
- p.src_jumps.push_back(last_jump);
- p.weight *= m.JumpProbability(last_jump, src.size());
- }
- }
- }
- }
- cerr << "number of edges to consider: " << ss.size() << endl;
- const int sampled = rng.SelectSample(ss);
- prob_t q_n = ss[sampled] / z;
- p = tmp_p[sampled];
- //m.IncrementRule(*p.rules.back());
- p.weight *= p.gamma_last / q_n;
- cerr << "[w=" << log(p.weight) << "]\tsampled rule: " << p.rules.back()->AsString() << endl;
- cerr << p << endl;
- }
- } // loop over particles (pi = 0 .. particles)
- if (done_nothing) all_complete = true;
- }
- pfss.clear();
- for (int i = 0; i < lps.size(); ++i)
- pfss.add(lps[i].weight);
- const int sampled = rng.SelectSample(pfss);
- ps[ci] = lps[sampled];
- m.IncrementRules(lps[sampled].rules);
- m.IncrementJumps(lps[sampled].src_jumps, src.size());
- for (int i = 0; i < lps[sampled].rules.size(); ++i) { cerr << "S:\t" << lps[sampled].rules[i]->AsString() << "\n"; }
- cerr << "tmp-LLH: " << log(m.Likelihood()) << endl;
- }
- cerr << "LLH: " << log(m.Likelihood()) << endl;
- for (int sni = 0; sni < 5; ++sni) {
- for (int i = 0; i < ps[sni].rules.size(); ++i) { cerr << "\t" << ps[sni].rules[i]->AsString() << endl; }
- }
- }
- return 0;
-}
-
diff --git a/gi/pf/pfdist.new.cc b/gi/pf/pfdist.new.cc
deleted file mode 100644
index 3169eb75..00000000
--- a/gi/pf/pfdist.new.cc
+++ /dev/null
@@ -1,620 +0,0 @@
-#include <iostream>
-#include <tr1/memory>
-#include <queue>
-
-#include <boost/functional.hpp>
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-
-#include "base_measures.h"
-#include "reachability.h"
-#include "viterbi.h"
-#include "hg.h"
-#include "trule.h"
-#include "tdict.h"
-#include "filelib.h"
-#include "dict.h"
-#include "sampler.h"
-#include "ccrp_nt.h"
-#include "ccrp_onetable.h"
-
-using namespace std;
-using namespace tr1;
-namespace po = boost::program_options;
-
-shared_ptr<MT19937> prng;
-
-size_t hash_value(const TRule& r) {
- size_t h = boost::hash_value(r.e_);
- boost::hash_combine(h, -r.lhs_);
- boost::hash_combine(h, boost::hash_value(r.f_));
- return h;
-}
-
-bool operator==(const TRule& a, const TRule& b) {
- return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_);
-}
-
-void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
- po::options_description opts("Configuration options");
- opts.add_options()
- ("samples,s",po::value<unsigned>()->default_value(1000),"Number of samples")
- ("particles,p",po::value<unsigned>()->default_value(25),"Number of particles")
- ("input,i",po::value<string>(),"Read parallel data from")
- ("max_src_phrase",po::value<unsigned>()->default_value(5),"Maximum length of source language phrases")
- ("max_trg_phrase",po::value<unsigned>()->default_value(5),"Maximum length of target language phrases")
- ("model1,m",po::value<string>(),"Model 1 parameters (used in base distribution)")
- ("inverse_model1,M",po::value<string>(),"Inverse Model 1 parameters (used in backward estimate)")
- ("model1_interpolation_weight",po::value<double>()->default_value(0.95),"Mixing proportion of model 1 with uniform target distribution")
- ("random_seed,S",po::value<uint32_t>(), "Random seed");
- po::options_description clo("Command line options");
- clo.add_options()
- ("config", po::value<string>(), "Configuration file")
- ("help,h", "Print this help message and exit");
- po::options_description dconfig_options, dcmdline_options;
- dconfig_options.add(opts);
- dcmdline_options.add(opts).add(clo);
-
- po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
- if (conf->count("config")) {
- ifstream config((*conf)["config"].as<string>().c_str());
- po::store(po::parse_config_file(config, dconfig_options), *conf);
- }
- po::notify(*conf);
-
- if (conf->count("help") || (conf->count("input") == 0)) {
- cerr << dcmdline_options << endl;
- exit(1);
- }
-}
-
-void ReadParallelCorpus(const string& filename,
- vector<vector<WordID> >* f,
- vector<vector<WordID> >* e,
- set<WordID>* vocab_f,
- set<WordID>* vocab_e) {
- f->clear();
- e->clear();
- vocab_f->clear();
- vocab_e->clear();
- istream* in;
- if (filename == "-")
- in = &cin;
- else
- in = new ifstream(filename.c_str());
- assert(*in);
- string line;
- const WordID kDIV = TD::Convert("|||");
- vector<WordID> tmp;
- while(*in) {
- getline(*in, line);
- if (line.empty() && !*in) break;
- e->push_back(vector<int>());
- f->push_back(vector<int>());
- vector<int>& le = e->back();
- vector<int>& lf = f->back();
- tmp.clear();
- TD::ConvertSentence(line, &tmp);
- bool isf = true;
- for (unsigned i = 0; i < tmp.size(); ++i) {
- const int cur = tmp[i];
- if (isf) {
- if (kDIV == cur) { isf = false; } else {
- lf.push_back(cur);
- vocab_f->insert(cur);
- }
- } else {
- assert(cur != kDIV);
- le.push_back(cur);
- vocab_e->insert(cur);
- }
- }
- assert(isf == false);
- }
- if (in != &cin) delete in;
-}
-
-#if 0
-struct MyConditionalModel {
- MyConditionalModel(PhraseConditionalBase& rcp0) : rp0(&rcp0), base(prob_t::One()), src_phrases(1,1), src_jumps(200, CCRP_NoTable<int>(1,1)) {}
-
- prob_t srcp0(const vector<WordID>& src) const {
- prob_t p(1.0 / 3000.0);
- p.poweq(src.size());
- prob_t lenp; lenp.logeq(log_poisson(src.size(), 1.0));
- p *= lenp;
- return p;
- }
-
- void DecrementRule(const TRule& rule) {
- const RuleCRPMap::iterator it = rules.find(rule.f_);
- assert(it != rules.end());
- if (it->second.decrement(rule)) {
- base /= (*rp0)(rule);
- if (it->second.num_customers() == 0)
- rules.erase(it);
- }
- if (src_phrases.decrement(rule.f_))
- base /= srcp0(rule.f_);
- }
-
- void IncrementRule(const TRule& rule) {
- RuleCRPMap::iterator it = rules.find(rule.f_);
- if (it == rules.end())
- it = rules.insert(make_pair(rule.f_, CCRP_NoTable<TRule>(1,1))).first;
- if (it->second.increment(rule)) {
- base *= (*rp0)(rule);
- }
- if (src_phrases.increment(rule.f_))
- base *= srcp0(rule.f_);
- }
-
- void IncrementRules(const vector<TRulePtr>& rules) {
- for (int i = 0; i < rules.size(); ++i)
- IncrementRule(*rules[i]);
- }
-
- void DecrementRules(const vector<TRulePtr>& rules) {
- for (int i = 0; i < rules.size(); ++i)
- DecrementRule(*rules[i]);
- }
-
- void IncrementJump(int dist, unsigned src_len) {
- assert(src_len > 0);
- if (src_jumps[src_len].increment(dist))
- base *= jp0(dist, src_len);
- }
-
- void DecrementJump(int dist, unsigned src_len) {
- assert(src_len > 0);
- if (src_jumps[src_len].decrement(dist))
- base /= jp0(dist, src_len);
- }
-
- void IncrementJumps(const vector<int>& js, unsigned src_len) {
- for (unsigned i = 0; i < js.size(); ++i)
- IncrementJump(js[i], src_len);
- }
-
- void DecrementJumps(const vector<int>& js, unsigned src_len) {
- for (unsigned i = 0; i < js.size(); ++i)
- DecrementJump(js[i], src_len);
- }
-
- // p(jump = dist | src_len , z)
- prob_t JumpProbability(int dist, unsigned src_len) {
- const prob_t p0 = jp0(dist, src_len);
- const double lp = src_jumps[src_len].logprob(dist, log(p0));
- prob_t q; q.logeq(lp);
- return q;
- }
-
- // p(rule.f_ | z) * p(rule.e_ | rule.f_ , z)
- prob_t RuleProbability(const TRule& rule) const {
- const prob_t p0 = (*rp0)(rule);
- prob_t srcp; srcp.logeq(src_phrases.logprob(rule.f_, log(srcp0(rule.f_))));
- const RuleCRPMap::const_iterator it = rules.find(rule.f_);
- if (it == rules.end()) return srcp * p0;
- const double lp = it->second.logprob(rule, log(p0));
- prob_t q; q.logeq(lp);
- return q * srcp;
- }
-
- prob_t Likelihood() const {
- prob_t p = base;
- for (RuleCRPMap::const_iterator it = rules.begin();
- it != rules.end(); ++it) {
- prob_t cl; cl.logeq(it->second.log_crp_prob());
- p *= cl;
- }
- for (unsigned l = 1; l < src_jumps.size(); ++l) {
- if (src_jumps[l].num_customers() > 0) {
- prob_t q;
- q.logeq(src_jumps[l].log_crp_prob());
- p *= q;
- }
- }
- return p;
- }
-
- JumpBase jp0;
- const PhraseConditionalBase* rp0;
- prob_t base;
- typedef unordered_map<vector<WordID>, CCRP_NoTable<TRule>, boost::hash<vector<WordID> > > RuleCRPMap;
- RuleCRPMap rules;
- CCRP_NoTable<vector<WordID> > src_phrases;
- vector<CCRP_NoTable<int> > src_jumps;
-};
-
-#endif
-
-struct MyJointModel {
- MyJointModel(PhraseJointBase& rcp0) :
- rp0(rcp0), base(prob_t::One()), rules(1,1), src_jumps(200, CCRP_NoTable<int>(1,1)) {}
-
- void DecrementRule(const TRule& rule) {
- if (rules.decrement(rule))
- base /= rp0(rule);
- }
-
- void IncrementRule(const TRule& rule) {
- if (rules.increment(rule))
- base *= rp0(rule);
- }
-
- void IncrementRules(const vector<TRulePtr>& rules) {
- for (int i = 0; i < rules.size(); ++i)
- IncrementRule(*rules[i]);
- }
-
- void DecrementRules(const vector<TRulePtr>& rules) {
- for (int i = 0; i < rules.size(); ++i)
- DecrementRule(*rules[i]);
- }
-
- void IncrementJump(int dist, unsigned src_len) {
- assert(src_len > 0);
- if (src_jumps[src_len].increment(dist))
- base *= jp0(dist, src_len);
- }
-
- void DecrementJump(int dist, unsigned src_len) {
- assert(src_len > 0);
- if (src_jumps[src_len].decrement(dist))
- base /= jp0(dist, src_len);
- }
-
- void IncrementJumps(const vector<int>& js, unsigned src_len) {
- for (unsigned i = 0; i < js.size(); ++i)
- IncrementJump(js[i], src_len);
- }
-
- void DecrementJumps(const vector<int>& js, unsigned src_len) {
- for (unsigned i = 0; i < js.size(); ++i)
- DecrementJump(js[i], src_len);
- }
-
- // p(jump = dist | src_len , z)
- prob_t JumpProbability(int dist, unsigned src_len) {
- const prob_t p0 = jp0(dist, src_len);
- const double lp = src_jumps[src_len].logprob(dist, log(p0));
- prob_t q; q.logeq(lp);
- return q;
- }
-
- // p(rule.f_ | z) * p(rule.e_ | rule.f_ , z)
- prob_t RuleProbability(const TRule& rule) const {
- prob_t p; p.logeq(rules.logprob(rule, log(rp0(rule))));
- return p;
- }
-
- prob_t Likelihood() const {
- prob_t p = base;
- prob_t q; q.logeq(rules.log_crp_prob());
- p *= q;
- for (unsigned l = 1; l < src_jumps.size(); ++l) {
- if (src_jumps[l].num_customers() > 0) {
- prob_t q;
- q.logeq(src_jumps[l].log_crp_prob());
- p *= q;
- }
- }
- return p;
- }
-
- JumpBase jp0;
- const PhraseJointBase& rp0;
- prob_t base;
- CCRP_NoTable<TRule> rules;
- vector<CCRP_NoTable<int> > src_jumps;
-};
-
-struct BackwardEstimate {
- BackwardEstimate(const Model1& m1, const vector<WordID>& src, const vector<WordID>& trg) :
- model1_(m1), src_(src), trg_(trg) {
- }
- const prob_t& operator()(const vector<bool>& src_cov, unsigned trg_cov) const {
- assert(src_.size() == src_cov.size());
- assert(trg_cov <= trg_.size());
- prob_t& e = cache_[src_cov][trg_cov];
- if (e.is_0()) {
- if (trg_cov == trg_.size()) { e = prob_t::One(); return e; }
- vector<WordID> r(src_.size() + 1); r.clear();
- r.push_back(0); // NULL word
- for (int i = 0; i < src_cov.size(); ++i)
- if (!src_cov[i]) r.push_back(src_[i]);
- const prob_t uniform_alignment(1.0 / r.size());
- e.logeq(log_poisson(trg_.size() - trg_cov, r.size() - 1)); // p(trg len remaining | src len remaining)
- for (unsigned j = trg_cov; j < trg_.size(); ++j) {
- prob_t p;
- for (unsigned i = 0; i < r.size(); ++i)
- p += model1_(r[i], trg_[j]);
- if (p.is_0()) {
- cerr << "ERROR: p(" << TD::Convert(trg_[j]) << " | " << TD::GetString(r) << ") = 0!\n";
- abort();
- }
- p *= uniform_alignment;
- e *= p;
- }
- }
- return e;
- }
- const Model1& model1_;
- const vector<WordID>& src_;
- const vector<WordID>& trg_;
- mutable unordered_map<vector<bool>, map<unsigned, prob_t>, boost::hash<vector<bool> > > cache_;
-};
-
-struct BackwardEstimateSym {
- BackwardEstimateSym(const Model1& m1,
- const Model1& invm1, const vector<WordID>& src, const vector<WordID>& trg) :
- model1_(m1), invmodel1_(invm1), src_(src), trg_(trg) {
- }
- const prob_t& operator()(const vector<bool>& src_cov, unsigned trg_cov) const {
- assert(src_.size() == src_cov.size());
- assert(trg_cov <= trg_.size());
- prob_t& e = cache_[src_cov][trg_cov];
- if (e.is_0()) {
- if (trg_cov == trg_.size()) { e = prob_t::One(); return e; }
- vector<WordID> r(src_.size() + 1); r.clear();
- for (int i = 0; i < src_cov.size(); ++i)
- if (!src_cov[i]) r.push_back(src_[i]);
- r.push_back(0); // NULL word
- const prob_t uniform_alignment(1.0 / r.size());
- e.logeq(log_poisson(trg_.size() - trg_cov, r.size() - 1)); // p(trg len remaining | src len remaining)
- for (unsigned j = trg_cov; j < trg_.size(); ++j) {
- prob_t p;
- for (unsigned i = 0; i < r.size(); ++i)
- p += model1_(r[i], trg_[j]);
- if (p.is_0()) {
- cerr << "ERROR: p(" << TD::Convert(trg_[j]) << " | " << TD::GetString(r) << ") = 0!\n";
- abort();
- }
- p *= uniform_alignment;
- e *= p;
- }
- r.pop_back();
- const prob_t inv_uniform(1.0 / (trg_.size() - trg_cov + 1.0));
- prob_t inv;
- inv.logeq(log_poisson(r.size(), trg_.size() - trg_cov));
- for (unsigned i = 0; i < r.size(); ++i) {
- prob_t p;
- for (unsigned j = trg_cov - 1; j < trg_.size(); ++j)
- p += invmodel1_(j < trg_cov ? 0 : trg_[j], r[i]);
- if (p.is_0()) {
- cerr << "ERROR: p_inv(" << TD::Convert(r[i]) << " | " << TD::GetString(trg_) << ") = 0!\n";
- abort();
- }
- p *= inv_uniform;
- inv *= p;
- }
- prob_t x = pow(e * inv, 0.5);
- e = x;
- //cerr << "Forward: " << log(e) << "\tBackward: " << log(inv) << "\t prop: " << log(x) << endl;
- }
- return e;
- }
- const Model1& model1_;
- const Model1& invmodel1_;
- const vector<WordID>& src_;
- const vector<WordID>& trg_;
- mutable unordered_map<vector<bool>, map<unsigned, prob_t>, boost::hash<vector<bool> > > cache_;
-};
-
-struct Particle {
- Particle() : weight(prob_t::One()), src_cov(), trg_cov(), prev_pos(-1) {}
- prob_t weight;
- prob_t gamma_last;
- vector<int> src_jumps;
- vector<TRulePtr> rules;
- vector<bool> src_cv;
- int src_cov;
- int trg_cov;
- int prev_pos;
-};
-
-ostream& operator<<(ostream& o, const vector<bool>& v) {
- for (int i = 0; i < v.size(); ++i)
- o << (v[i] ? '1' : '0');
- return o;
-}
-ostream& operator<<(ostream& o, const Particle& p) {
- o << "[cv=" << p.src_cv << " src_cov=" << p.src_cov << " trg_cov=" << p.trg_cov << " last_pos=" << p.prev_pos << " num_rules=" << p.rules.size() << " w=" << log(p.weight) << ']';
- return o;
-}
-
-void FilterCrapParticlesAndReweight(vector<Particle>* pps) {
- vector<Particle>& ps = *pps;
- SampleSet<prob_t> ss;
- for (int i = 0; i < ps.size(); ++i)
- ss.add(ps[i].weight);
- vector<Particle> nps; nps.reserve(ps.size());
- const prob_t uniform_weight(1.0 / ps.size());
- for (int i = 0; i < ps.size(); ++i) {
- nps.push_back(ps[prng->SelectSample(ss)]);
- nps[i].weight = uniform_weight;
- }
- nps.swap(ps);
-}
-
-int main(int argc, char** argv) {
- po::variables_map conf;
- InitCommandLine(argc, argv, &conf);
- const unsigned kMAX_TRG_PHRASE = conf["max_trg_phrase"].as<unsigned>();
- const unsigned kMAX_SRC_PHRASE = conf["max_src_phrase"].as<unsigned>();
- const unsigned particles = conf["particles"].as<unsigned>();
- const unsigned samples = conf["samples"].as<unsigned>();
-
- if (!conf.count("model1")) {
- cerr << argv[0] << "Please use --model1 to specify model 1 parameters\n";
- return 1;
- }
- if (conf.count("random_seed"))
- prng.reset(new MT19937(conf["random_seed"].as<uint32_t>()));
- else
- prng.reset(new MT19937);
- MT19937& rng = *prng;
-
- vector<vector<WordID> > corpuse, corpusf;
- set<WordID> vocabe, vocabf;
- cerr << "Reading corpus...\n";
- ReadParallelCorpus(conf["input"].as<string>(), &corpusf, &corpuse, &vocabf, &vocabe);
- cerr << "F-corpus size: " << corpusf.size() << " sentences\t (" << vocabf.size() << " word types)\n";
- cerr << "E-corpus size: " << corpuse.size() << " sentences\t (" << vocabe.size() << " word types)\n";
- assert(corpusf.size() == corpuse.size());
-
- const int kLHS = -TD::Convert("X");
- Model1 m1(conf["model1"].as<string>());
- Model1 invm1(conf["inverse_model1"].as<string>());
-
-#if 0
- PhraseConditionalBase lp0(m1, conf["model1_interpolation_weight"].as<double>(), vocabe.size());
- MyConditionalModel m(lp0);
-#else
- PhraseJointBase lp0(m1, conf["model1_interpolation_weight"].as<double>(), vocabe.size(), vocabf.size());
- MyJointModel m(lp0);
-#endif
-
- cerr << "Initializing reachability limits...\n";
- vector<Particle> ps(corpusf.size());
- vector<Reachability> reaches; reaches.reserve(corpusf.size());
- for (int ci = 0; ci < corpusf.size(); ++ci)
- reaches.push_back(Reachability(corpusf[ci].size(),
- corpuse[ci].size(),
- kMAX_SRC_PHRASE,
- kMAX_TRG_PHRASE));
- cerr << "Sampling...\n";
- vector<Particle> tmp_p(10000); // work space
- SampleSet<prob_t> pfss;
- for (int SS=0; SS < samples; ++SS) {
- for (int ci = 0; ci < corpusf.size(); ++ci) {
- vector<int>& src = corpusf[ci];
- vector<int>& trg = corpuse[ci];
- m.DecrementRules(ps[ci].rules);
- m.DecrementJumps(ps[ci].src_jumps, src.size());
-
- //BackwardEstimate be(m1, src, trg);
- BackwardEstimateSym be(m1, invm1, src, trg);
- const Reachability& r = reaches[ci];
- vector<Particle> lps(particles);
-
- for (int pi = 0; pi < particles; ++pi) {
- Particle& p = lps[pi];
- p.src_cv.resize(src.size(), false);
- }
-
- bool all_complete = false;
- while(!all_complete) {
- SampleSet<prob_t> ss;
-
- // all particles have now been extended a bit, we will reweight them now
- if (lps[0].trg_cov > 0)
- FilterCrapParticlesAndReweight(&lps);
-
- // loop over all particles and extend them
- bool done_nothing = true;
- for (int pi = 0; pi < particles; ++pi) {
- Particle& p = lps[pi];
- int tic = 0;
- const int rejuv_freq = 1;
- while(p.trg_cov < trg.size() && tic < rejuv_freq) {
- ++tic;
- done_nothing = false;
- ss.clear();
- TRule x; x.lhs_ = kLHS;
- prob_t z;
- int first_uncovered = src.size();
- int last_uncovered = -1;
- for (int i = 0; i < src.size(); ++i) {
- const bool is_uncovered = !p.src_cv[i];
- if (i < first_uncovered && is_uncovered) first_uncovered = i;
- if (is_uncovered && i > last_uncovered) last_uncovered = i;
- }
- assert(last_uncovered > -1);
- assert(first_uncovered < src.size());
-
- for (int trg_len = 1; trg_len <= kMAX_TRG_PHRASE; ++trg_len) {
- x.e_.push_back(trg[trg_len - 1 + p.trg_cov]);
- for (int src_len = 1; src_len <= kMAX_SRC_PHRASE; ++src_len) {
- if (!r.edges[p.src_cov][p.trg_cov][src_len][trg_len]) continue;
-
- const int last_possible_start = last_uncovered - src_len + 1;
- assert(last_possible_start >= 0);
- //cerr << src_len << "," << trg_len << " is allowed. E=" << TD::GetString(x.e_) << endl;
- //cerr << " first_uncovered=" << first_uncovered << " last_possible_start=" << last_possible_start << endl;
- for (int i = first_uncovered; i <= last_possible_start; ++i) {
- if (p.src_cv[i]) continue;
- assert(ss.size() < tmp_p.size()); // if fails increase tmp_p size
- Particle& np = tmp_p[ss.size()];
- np = p;
- x.f_.clear();
- int gap_add = 0;
- bool bad = false;
- prob_t jp = prob_t::One();
- int prev_pos = p.prev_pos;
- for (int j = 0; j < src_len; ++j) {
- if ((j + i + gap_add) == src.size()) { bad = true; break; }
- while ((i+j+gap_add) < src.size() && p.src_cv[i + j + gap_add]) { ++gap_add; }
- if ((j + i + gap_add) == src.size()) { bad = true; break; }
- np.src_cv[i + j + gap_add] = true;
- x.f_.push_back(src[i + j + gap_add]);
- jp *= m.JumpProbability(i + j + gap_add - prev_pos, src.size());
- int jump = i + j + gap_add - prev_pos;
- assert(jump != 0);
- np.src_jumps.push_back(jump);
- prev_pos = i + j + gap_add;
- }
- if (bad) continue;
- np.prev_pos = prev_pos;
- np.src_cov += x.f_.size();
- np.trg_cov += x.e_.size();
- if (x.f_.size() != src_len) continue;
- prob_t rp = m.RuleProbability(x);
- np.gamma_last = rp * jp;
- const prob_t u = pow(np.gamma_last * be(np.src_cv, np.trg_cov), 0.2);
- //cerr << "**rule=" << x << endl;
- //cerr << " u=" << log(u) << " rule=" << rp << " jump=" << jp << endl;
- ss.add(u);
- np.rules.push_back(TRulePtr(new TRule(x)));
- z += u;
-
- const bool completed = (p.trg_cov == trg.size());
- if (completed) {
- int last_jump = src.size() - p.prev_pos;
- assert(last_jump > 0);
- p.src_jumps.push_back(last_jump);
- p.weight *= m.JumpProbability(last_jump, src.size());
- }
- }
- }
- }
- cerr << "number of edges to consider: " << ss.size() << endl;
- const int sampled = rng.SelectSample(ss);
- prob_t q_n = ss[sampled] / z;
- p = tmp_p[sampled];
- //m.IncrementRule(*p.rules.back());
- p.weight *= p.gamma_last / q_n;
- cerr << "[w=" << log(p.weight) << "]\tsampled rule: " << p.rules.back()->AsString() << endl;
- cerr << p << endl;
- }
- } // loop over particles (pi = 0 .. particles)
- if (done_nothing) all_complete = true;
- }
- pfss.clear();
- for (int i = 0; i < lps.size(); ++i)
- pfss.add(lps[i].weight);
- const int sampled = rng.SelectSample(pfss);
- ps[ci] = lps[sampled];
- m.IncrementRules(lps[sampled].rules);
- m.IncrementJumps(lps[sampled].src_jumps, src.size());
- for (int i = 0; i < lps[sampled].rules.size(); ++i) { cerr << "S:\t" << lps[sampled].rules[i]->AsString() << "\n"; }
- cerr << "tmp-LLH: " << log(m.Likelihood()) << endl;
- }
- cerr << "LLH: " << log(m.Likelihood()) << endl;
- for (int sni = 0; sni < 5; ++sni) {
- for (int i = 0; i < ps[sni].rules.size(); ++i) { cerr << "\t" << ps[sni].rules[i]->AsString() << endl; }
- }
- }
- return 0;
-}
-
diff --git a/gi/pf/pfnaive.cc b/gi/pf/pfnaive.cc
deleted file mode 100644
index 958ec4e2..00000000
--- a/gi/pf/pfnaive.cc
+++ /dev/null
@@ -1,284 +0,0 @@
-#include <iostream>
-#include <tr1/memory>
-#include <queue>
-
-#include <boost/functional.hpp>
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-
-#include "pf.h"
-#include "base_distributions.h"
-#include "monotonic_pseg.h"
-#include "reachability.h"
-#include "viterbi.h"
-#include "hg.h"
-#include "trule.h"
-#include "tdict.h"
-#include "filelib.h"
-#include "dict.h"
-#include "sampler.h"
-#include "ccrp_nt.h"
-#include "ccrp_onetable.h"
-#include "corpus.h"
-
-using namespace std;
-using namespace tr1;
-namespace po = boost::program_options;
-
-boost::shared_ptr<MT19937> prng;
-
-void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
- po::options_description opts("Configuration options");
- opts.add_options()
- ("samples,s",po::value<unsigned>()->default_value(1000),"Number of samples")
- ("particles,p",po::value<unsigned>()->default_value(30),"Number of particles")
- ("filter_frequency,f",po::value<unsigned>()->default_value(5),"Number of time steps between filterings")
- ("input,i",po::value<string>(),"Read parallel data from")
- ("max_src_phrase",po::value<unsigned>()->default_value(5),"Maximum length of source language phrases")
- ("max_trg_phrase",po::value<unsigned>()->default_value(5),"Maximum length of target language phrases")
- ("model1,m",po::value<string>(),"Model 1 parameters (used in base distribution)")
- ("inverse_model1,M",po::value<string>(),"Inverse Model 1 parameters (used in backward estimate)")
- ("model1_interpolation_weight",po::value<double>()->default_value(0.95),"Mixing proportion of model 1 with uniform target distribution")
- ("random_seed,S",po::value<uint32_t>(), "Random seed");
- po::options_description clo("Command line options");
- clo.add_options()
- ("config", po::value<string>(), "Configuration file")
- ("help,h", "Print this help message and exit");
- po::options_description dconfig_options, dcmdline_options;
- dconfig_options.add(opts);
- dcmdline_options.add(opts).add(clo);
-
- po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
- if (conf->count("config")) {
- ifstream config((*conf)["config"].as<string>().c_str());
- po::store(po::parse_config_file(config, dconfig_options), *conf);
- }
- po::notify(*conf);
-
- if (conf->count("help") || (conf->count("input") == 0)) {
- cerr << dcmdline_options << endl;
- exit(1);
- }
-}
-
-struct BackwardEstimateSym {
- BackwardEstimateSym(const Model1& m1,
- const Model1& invm1, const vector<WordID>& src, const vector<WordID>& trg) :
- model1_(m1), invmodel1_(invm1), src_(src), trg_(trg) {
- }
- const prob_t& operator()(unsigned src_cov, unsigned trg_cov) const {
- assert(src_cov <= src_.size());
- assert(trg_cov <= trg_.size());
- prob_t& e = cache_[src_cov][trg_cov];
- if (e.is_0()) {
- if (trg_cov == trg_.size()) { e = prob_t::One(); return e; }
- vector<WordID> r(src_.size() + 1); r.clear();
- for (int i = src_cov; i < src_.size(); ++i)
- r.push_back(src_[i]);
- r.push_back(0); // NULL word
- const prob_t uniform_alignment(1.0 / r.size());
- e.logeq(Md::log_poisson(trg_.size() - trg_cov, r.size() - 1)); // p(trg len remaining | src len remaining)
- for (unsigned j = trg_cov; j < trg_.size(); ++j) {
- prob_t p;
- for (unsigned i = 0; i < r.size(); ++i)
- p += model1_(r[i], trg_[j]);
- if (p.is_0()) {
- cerr << "ERROR: p(" << TD::Convert(trg_[j]) << " | " << TD::GetString(r) << ") = 0!\n";
- abort();
- }
- p *= uniform_alignment;
- e *= p;
- }
- r.pop_back();
- const prob_t inv_uniform(1.0 / (trg_.size() - trg_cov + 1.0));
- prob_t inv;
- inv.logeq(Md::log_poisson(r.size(), trg_.size() - trg_cov));
- for (unsigned i = 0; i < r.size(); ++i) {
- prob_t p;
- for (unsigned j = trg_cov - 1; j < trg_.size(); ++j)
- p += invmodel1_(j < trg_cov ? 0 : trg_[j], r[i]);
- if (p.is_0()) {
- cerr << "ERROR: p_inv(" << TD::Convert(r[i]) << " | " << TD::GetString(trg_) << ") = 0!\n";
- abort();
- }
- p *= inv_uniform;
- inv *= p;
- }
- prob_t x = pow(e * inv, 0.5);
- e = x;
- //cerr << "Forward: " << log(e) << "\tBackward: " << log(inv) << "\t prop: " << log(x) << endl;
- }
- return e;
- }
- const Model1& model1_;
- const Model1& invmodel1_;
- const vector<WordID>& src_;
- const vector<WordID>& trg_;
- mutable unordered_map<unsigned, map<unsigned, prob_t> > cache_;
-};
-
-struct Particle {
- Particle() : weight(prob_t::One()), src_cov(), trg_cov() {}
- prob_t weight;
- prob_t gamma_last;
- vector<TRulePtr> rules;
- int src_cov;
- int trg_cov;
-};
-
-ostream& operator<<(ostream& o, const vector<bool>& v) {
- for (int i = 0; i < v.size(); ++i)
- o << (v[i] ? '1' : '0');
- return o;
-}
-ostream& operator<<(ostream& o, const Particle& p) {
- o << "[src_cov=" << p.src_cov << " trg_cov=" << p.trg_cov << " num_rules=" << p.rules.size() << " w=" << log(p.weight) << ']';
- return o;
-}
-
-int main(int argc, char** argv) {
- po::variables_map conf;
- InitCommandLine(argc, argv, &conf);
- const unsigned kMAX_TRG_PHRASE = conf["max_trg_phrase"].as<unsigned>();
- const unsigned kMAX_SRC_PHRASE = conf["max_src_phrase"].as<unsigned>();
- const unsigned particles = conf["particles"].as<unsigned>();
- const unsigned samples = conf["samples"].as<unsigned>();
- const unsigned rejuv_freq = conf["filter_frequency"].as<unsigned>();
-
- if (!conf.count("model1")) {
- cerr << argv[0] << "Please use --model1 to specify model 1 parameters\n";
- return 1;
- }
- if (conf.count("random_seed"))
- prng.reset(new MT19937(conf["random_seed"].as<uint32_t>()));
- else
- prng.reset(new MT19937);
- MT19937& rng = *prng;
-
- vector<vector<WordID> > corpuse, corpusf;
- set<WordID> vocabe, vocabf;
- cerr << "Reading corpus...\n";
- corpus::ReadParallelCorpus(conf["input"].as<string>(), &corpusf, &corpuse, &vocabf, &vocabe);
- cerr << "F-corpus size: " << corpusf.size() << " sentences\t (" << vocabf.size() << " word types)\n";
- cerr << "E-corpus size: " << corpuse.size() << " sentences\t (" << vocabe.size() << " word types)\n";
- assert(corpusf.size() == corpuse.size());
-
- const int kLHS = -TD::Convert("X");
- Model1 m1(conf["model1"].as<string>());
- Model1 invm1(conf["inverse_model1"].as<string>());
-
- PhraseJointBase lp0(m1, conf["model1_interpolation_weight"].as<double>(), vocabe.size(), vocabf.size());
- PhraseJointBase_BiDir alp0(m1, invm1, conf["model1_interpolation_weight"].as<double>(), vocabe.size(), vocabf.size());
- MonotonicParallelSegementationModel<PhraseJointBase_BiDir> m(alp0);
- TRule xx("[X] ||| ms. kimura ||| MS. KIMURA ||| X=0");
- cerr << xx << endl << lp0(xx) << " " << alp0(xx) << endl;
- TRule xx12("[X] ||| . ||| PHARMACY . ||| X=0");
- TRule xx21("[X] ||| pharmacy . ||| . ||| X=0");
-// TRule xx22("[X] ||| . ||| . ||| X=0");
- TRule xx22("[X] ||| . ||| THE . ||| X=0");
- cerr << xx12 << "\t" << lp0(xx12) << " " << alp0(xx12) << endl;
- cerr << xx21 << "\t" << lp0(xx21) << " " << alp0(xx21) << endl;
- cerr << xx22 << "\t" << lp0(xx22) << " " << alp0(xx22) << endl;
-
- cerr << "Initializing reachability limits...\n";
- vector<Particle> ps(corpusf.size());
- vector<Reachability> reaches; reaches.reserve(corpusf.size());
- for (int ci = 0; ci < corpusf.size(); ++ci)
- reaches.push_back(Reachability(corpusf[ci].size(),
- corpuse[ci].size(),
- kMAX_SRC_PHRASE,
- kMAX_TRG_PHRASE));
- cerr << "Sampling...\n";
- vector<Particle> tmp_p(10000); // work space
- SampleSet<prob_t> pfss;
- SystematicResampleFilter<Particle> filter(&rng);
- // MultinomialResampleFilter<Particle> filter(&rng);
- for (int SS=0; SS < samples; ++SS) {
- for (int ci = 0; ci < corpusf.size(); ++ci) {
- vector<int>& src = corpusf[ci];
- vector<int>& trg = corpuse[ci];
- m.DecrementRulesAndStops(ps[ci].rules);
- const prob_t q_stop = m.StopProbability();
- const prob_t q_cont = m.ContinueProbability();
- cerr << "P(stop)=" << q_stop << "\tP(continue)=" <<q_cont << endl;
-
- BackwardEstimateSym be(m1, invm1, src, trg);
- const Reachability& r = reaches[ci];
- vector<Particle> lps(particles);
-
- bool all_complete = false;
- while(!all_complete) {
- SampleSet<prob_t> ss;
-
- // all particles have now been extended a bit, we will reweight them now
- if (lps[0].trg_cov > 0)
- filter(&lps);
-
- // loop over all particles and extend them
- bool done_nothing = true;
- for (int pi = 0; pi < particles; ++pi) {
- Particle& p = lps[pi];
- int tic = 0;
- while(p.trg_cov < trg.size() && tic < rejuv_freq) {
- ++tic;
- done_nothing = false;
- ss.clear();
- TRule x; x.lhs_ = kLHS;
- prob_t z;
-
- for (int trg_len = 1; trg_len <= kMAX_TRG_PHRASE; ++trg_len) {
- x.e_.push_back(trg[trg_len - 1 + p.trg_cov]);
- for (int src_len = 1; src_len <= kMAX_SRC_PHRASE; ++src_len) {
- if (!r.edges[p.src_cov][p.trg_cov][src_len][trg_len]) continue;
-
- int i = p.src_cov;
- assert(ss.size() < tmp_p.size()); // if fails increase tmp_p size
- Particle& np = tmp_p[ss.size()];
- np = p;
- x.f_.clear();
- for (int j = 0; j < src_len; ++j)
- x.f_.push_back(src[i + j]);
- np.src_cov += x.f_.size();
- np.trg_cov += x.e_.size();
- const bool stop_now = (np.src_cov == src_len && np.trg_cov == trg_len);
- prob_t rp = m.RuleProbability(x) * (stop_now ? q_stop : q_cont);
- np.gamma_last = rp;
- const prob_t u = pow(np.gamma_last * pow(be(np.src_cov, np.trg_cov), 1.2), 0.1);
- //cerr << "**rule=" << x << endl;
- //cerr << " u=" << log(u) << " rule=" << rp << endl;
- ss.add(u);
- np.rules.push_back(TRulePtr(new TRule(x)));
- z += u;
- }
- }
- //cerr << "number of edges to consider: " << ss.size() << endl;
- const int sampled = rng.SelectSample(ss);
- prob_t q_n = ss[sampled] / z;
- p = tmp_p[sampled];
- //m.IncrementRule(*p.rules.back());
- p.weight *= p.gamma_last / q_n;
- //cerr << "[w=" << log(p.weight) << "]\tsampled rule: " << p.rules.back()->AsString() << endl;
- //cerr << p << endl;
- }
- } // loop over particles (pi = 0 .. particles)
- if (done_nothing) all_complete = true;
- prob_t wv = prob_t::Zero();
- for (int pp = 0; pp < lps.size(); ++pp)
- wv += lps[pp].weight;
- for (int pp = 0; pp < lps.size(); ++pp)
- lps[pp].weight /= wv;
- }
- pfss.clear();
- for (int i = 0; i < lps.size(); ++i)
- pfss.add(lps[i].weight);
- const int sampled = rng.SelectSample(pfss);
- ps[ci] = lps[sampled];
- m.IncrementRulesAndStops(lps[sampled].rules);
- for (int i = 0; i < lps[sampled].rules.size(); ++i) { cerr << "S:\t" << lps[sampled].rules[i]->AsString() << "\n"; }
- cerr << "tmp-LLH: " << log(m.Likelihood()) << endl;
- }
- cerr << "LLH: " << log(m.Likelihood()) << endl;
- }
- return 0;
-}
-
diff --git a/gi/pf/poisson_uniform_word_model.h b/gi/pf/poisson_uniform_word_model.h
deleted file mode 100644
index 76204a0e..00000000
--- a/gi/pf/poisson_uniform_word_model.h
+++ /dev/null
@@ -1,50 +0,0 @@
-#ifndef _POISSON_UNIFORM_WORD_MODEL_H_
-#define _POISSON_UNIFORM_WORD_MODEL_H_
-
-#include <cmath>
-#include <vector>
-#include "prob.h"
-#include "m.h"
-
-// len ~ Poisson(lambda)
-// for (1..len)
-// e_i ~ Uniform({Vocabulary})
-struct PoissonUniformWordModel {
- explicit PoissonUniformWordModel(const unsigned vocab_size,
- const unsigned alphabet_size,
- const double mean_len = 5) :
- lh(prob_t::One()),
- v0(-std::log(vocab_size)),
- u0(-std::log(alphabet_size)),
- mean_length(mean_len) {}
-
- void ResampleHyperparameters(MT19937*) {}
-
- inline prob_t operator()(const std::vector<WordID>& s) const {
- prob_t p;
- p.logeq(Md::log_poisson(s.size(), mean_length) + s.size() * u0);
- //p.logeq(v0);
- return p;
- }
-
- inline void Increment(const std::vector<WordID>& w, MT19937*) {
- lh *= (*this)(w);
- }
-
- inline void Decrement(const std::vector<WordID>& w, MT19937 *) {
- lh /= (*this)(w);
- }
-
- inline prob_t Likelihood() const { return lh; }
-
- void Summary() const {}
-
- private:
-
- prob_t lh; // keeps track of the draws from the base distribution
- const double v0; // uniform log prob of generating a word
- const double u0; // uniform log prob of generating a letter
- const double mean_length; // mean length of a word in the base distribution
-};
-
-#endif
diff --git a/gi/pf/pyp_lm.cc b/gi/pf/pyp_lm.cc
deleted file mode 100644
index e2b67e17..00000000
--- a/gi/pf/pyp_lm.cc
+++ /dev/null
@@ -1,209 +0,0 @@
-#include <iostream>
-#include <tr1/memory>
-#include <queue>
-
-#include <boost/functional.hpp>
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-
-#include "corpus_tools.h"
-#include "m.h"
-#include "tdict.h"
-#include "sampler.h"
-#include "ccrp.h"
-#include "tied_resampler.h"
-
-// A not very memory-efficient implementation of an N-gram LM based on PYPs
-// as described in Y.-W. Teh. (2006) A Hierarchical Bayesian Language Model
-// based on Pitman-Yor Processes. In Proc. ACL.
-
-// I use templates to handle the recursive formalation of the prior, so
-// the order of the model has to be specified here, at compile time:
-#define kORDER 3
-
-using namespace std;
-using namespace tr1;
-namespace po = boost::program_options;
-
-boost::shared_ptr<MT19937> prng;
-
-void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
- po::options_description opts("Configuration options");
- opts.add_options()
- ("samples,n",po::value<unsigned>()->default_value(300),"Number of samples")
- ("train,i",po::value<string>(),"Training data file")
- ("test,T",po::value<string>(),"Test data file")
- ("discount_prior_a,a",po::value<double>()->default_value(1.0), "discount ~ Beta(a,b): a=this")
- ("discount_prior_b,b",po::value<double>()->default_value(1.0), "discount ~ Beta(a,b): b=this")
- ("strength_prior_s,s",po::value<double>()->default_value(1.0), "strength ~ Gamma(s,r): s=this")
- ("strength_prior_r,r",po::value<double>()->default_value(1.0), "strength ~ Gamma(s,r): r=this")
- ("random_seed,S",po::value<uint32_t>(), "Random seed");
- po::options_description clo("Command line options");
- clo.add_options()
- ("config", po::value<string>(), "Configuration file")
- ("help", "Print this help message and exit");
- po::options_description dconfig_options, dcmdline_options;
- dconfig_options.add(opts);
- dcmdline_options.add(opts).add(clo);
-
- po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
- if (conf->count("config")) {
- ifstream config((*conf)["config"].as<string>().c_str());
- po::store(po::parse_config_file(config, dconfig_options), *conf);
- }
- po::notify(*conf);
-
- if (conf->count("help") || (conf->count("train") == 0)) {
- cerr << dcmdline_options << endl;
- exit(1);
- }
-}
-
-template <unsigned N> struct PYPLM;
-
-// uniform base distribution (0-gram model)
-template<> struct PYPLM<0> {
- PYPLM(unsigned vs, double, double, double, double) : p0(1.0 / vs), draws() {}
- void increment(WordID, const vector<WordID>&, MT19937*) { ++draws; }
- void decrement(WordID, const vector<WordID>&, MT19937*) { --draws; assert(draws >= 0); }
- double prob(WordID, const vector<WordID>&) const { return p0; }
- void resample_hyperparameters(MT19937*) {}
- double log_likelihood() const { return draws * log(p0); }
- const double p0;
- int draws;
-};
-
-// represents an N-gram LM
-template <unsigned N> struct PYPLM {
- PYPLM(unsigned vs, double da, double db, double ss, double sr) :
- backoff(vs, da, db, ss, sr),
- tr(da, db, ss, sr, 0.8, 1.0),
- lookup(N-1) {}
- void increment(WordID w, const vector<WordID>& context, MT19937* rng) {
- const double bo = backoff.prob(w, context);
- for (unsigned i = 0; i < N-1; ++i)
- lookup[i] = context[context.size() - 1 - i];
- typename unordered_map<vector<WordID>, CCRP<WordID>, boost::hash<vector<WordID> > >::iterator it = p.find(lookup);
- if (it == p.end()) {
- it = p.insert(make_pair(lookup, CCRP<WordID>(0.5,1))).first;
- tr.Add(&it->second); // add to resampler
- }
- if (it->second.increment(w, bo, rng))
- backoff.increment(w, context, rng);
- }
- void decrement(WordID w, const vector<WordID>& context, MT19937* rng) {
- for (unsigned i = 0; i < N-1; ++i)
- lookup[i] = context[context.size() - 1 - i];
- typename unordered_map<vector<WordID>, CCRP<WordID>, boost::hash<vector<WordID> > >::iterator it = p.find(lookup);
- assert(it != p.end());
- if (it->second.decrement(w, rng))
- backoff.decrement(w, context, rng);
- }
- double prob(WordID w, const vector<WordID>& context) const {
- const double bo = backoff.prob(w, context);
- for (unsigned i = 0; i < N-1; ++i)
- lookup[i] = context[context.size() - 1 - i];
- typename unordered_map<vector<WordID>, CCRP<WordID>, boost::hash<vector<WordID> > >::const_iterator it = p.find(lookup);
- if (it == p.end()) return bo;
- return it->second.prob(w, bo);
- }
-
- double log_likelihood() const {
- double llh = backoff.log_likelihood();
- typename unordered_map<vector<WordID>, CCRP<WordID>, boost::hash<vector<WordID> > >::const_iterator it;
- for (it = p.begin(); it != p.end(); ++it)
- llh += it->second.log_crp_prob();
- llh += tr.LogLikelihood();
- return llh;
- }
-
- void resample_hyperparameters(MT19937* rng) {
- tr.ResampleHyperparameters(rng);
- backoff.resample_hyperparameters(rng);
- }
-
- PYPLM<N-1> backoff;
- TiedResampler<CCRP<WordID> > tr;
- double discount_a, discount_b, strength_s, strength_r;
- double d, strength;
- mutable vector<WordID> lookup; // thread-local
- unordered_map<vector<WordID>, CCRP<WordID>, boost::hash<vector<WordID> > > p;
-};
-
-int main(int argc, char** argv) {
- po::variables_map conf;
-
- InitCommandLine(argc, argv, &conf);
- const unsigned samples = conf["samples"].as<unsigned>();
- if (conf.count("random_seed"))
- prng.reset(new MT19937(conf["random_seed"].as<uint32_t>()));
- else
- prng.reset(new MT19937);
- MT19937& rng = *prng;
- vector<vector<WordID> > corpuse;
- set<WordID> vocabe;
- const WordID kEOS = TD::Convert("</s>");
- cerr << "Reading corpus...\n";
- CorpusTools::ReadFromFile(conf["train"].as<string>(), &corpuse, &vocabe);
- cerr << "E-corpus size: " << corpuse.size() << " sentences\t (" << vocabe.size() << " word types)\n";
- vector<vector<WordID> > test;
- if (conf.count("test"))
- CorpusTools::ReadFromFile(conf["test"].as<string>(), &test);
- else
- test = corpuse;
- PYPLM<kORDER> lm(vocabe.size(),
- conf["discount_prior_a"].as<double>(),
- conf["discount_prior_b"].as<double>(),
- conf["strength_prior_s"].as<double>(),
- conf["strength_prior_r"].as<double>());
- vector<WordID> ctx(kORDER - 1, TD::Convert("<s>"));
- for (int SS=0; SS < samples; ++SS) {
- for (int ci = 0; ci < corpuse.size(); ++ci) {
- ctx.resize(kORDER - 1);
- const vector<WordID>& s = corpuse[ci];
- for (int i = 0; i <= s.size(); ++i) {
- WordID w = (i < s.size() ? s[i] : kEOS);
- if (SS > 0) lm.decrement(w, ctx, &rng);
- lm.increment(w, ctx, &rng);
- ctx.push_back(w);
- }
- if (SS > 0) lm.decrement(kEOS, ctx, &rng);
- lm.increment(kEOS, ctx, &rng);
- }
- if (SS % 10 == 9) {
- cerr << " [LLH=" << lm.log_likelihood() << "]" << endl;
- if (SS % 20 == 19) lm.resample_hyperparameters(&rng);
- } else { cerr << '.' << flush; }
- }
- double llh = 0;
- unsigned cnt = 0;
- unsigned oovs = 0;
- for (int ci = 0; ci < test.size(); ++ci) {
- ctx.resize(kORDER - 1);
- const vector<WordID>& s = test[ci];
- for (int i = 0; i <= s.size(); ++i) {
- WordID w = (i < s.size() ? s[i] : kEOS);
- double lp = log(lm.prob(w, ctx)) / log(2);
- if (i < s.size() && vocabe.count(w) == 0) {
- cerr << "**OOV ";
- ++oovs;
- lp = 0;
- }
- cerr << "p(" << TD::Convert(w) << " |";
- for (int j = ctx.size() + 1 - kORDER; j < ctx.size(); ++j)
- cerr << ' ' << TD::Convert(ctx[j]);
- cerr << ") = " << lp << endl;
- ctx.push_back(w);
- llh -= lp;
- cnt++;
- }
- }
- cerr << " Log_10 prob: " << (-llh * log(2) / log(10)) << endl;
- cerr << " Count: " << cnt << endl;
- cerr << " OOVs: " << oovs << endl;
- cerr << "Cross-entropy: " << (llh / cnt) << endl;
- cerr << " Perplexity: " << pow(2, llh / cnt) << endl;
- return 0;
-}
-
-
diff --git a/gi/pf/pyp_tm.cc b/gi/pf/pyp_tm.cc
deleted file mode 100644
index 6bc8a5bf..00000000
--- a/gi/pf/pyp_tm.cc
+++ /dev/null
@@ -1,128 +0,0 @@
-#include "pyp_tm.h"
-
-#include <tr1/unordered_map>
-#include <iostream>
-#include <queue>
-
-#include "tdict.h"
-#include "ccrp.h"
-#include "pyp_word_model.h"
-#include "tied_resampler.h"
-
-using namespace std;
-using namespace std::tr1;
-
-struct FreqBinner {
- FreqBinner(const std::string& fname) { fd_.Load(fname); }
- unsigned NumberOfBins() const { return fd_.Max() + 1; }
- unsigned Bin(const WordID& w) const { return fd_.LookUp(w); }
- FreqDict<unsigned> fd_;
-};
-
-template <typename Base, class Binner = FreqBinner>
-struct ConditionalPYPWordModel {
- ConditionalPYPWordModel(Base* b, const Binner* bnr = NULL) :
- base(*b),
- binner(bnr),
- btr(binner ? binner->NumberOfBins() + 1u : 2u) {}
-
- void Summary() const {
- cerr << "Number of conditioning contexts: " << r.size() << endl;
- for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) {
- cerr << TD::Convert(it->first) << " \tPYP(d=" << it->second.discount() << ",s=" << it->second.strength() << ") --------------------------" << endl;
- for (CCRP<vector<WordID> >::const_iterator i2 = it->second.begin(); i2 != it->second.end(); ++i2)
- cerr << " " << i2->second.total_dish_count_ << '\t' << TD::GetString(i2->first) << endl;
- }
- }
-
- void ResampleHyperparameters(MT19937* rng) {
- btr.ResampleHyperparameters(rng);
- }
-
- prob_t Prob(const WordID src, const vector<WordID>& trglets) const {
- RuleModelHash::const_iterator it = r.find(src);
- if (it == r.end()) {
- return base(trglets);
- } else {
- return it->second.prob(trglets, base(trglets));
- }
- }
-
- void Increment(const WordID src, const vector<WordID>& trglets, MT19937* rng) {
- RuleModelHash::iterator it = r.find(src);
- if (it == r.end()) {
- it = r.insert(make_pair(src, CCRP<vector<WordID> >(0.5,1.0))).first;
- static const WordID kNULL = TD::Convert("NULL");
- unsigned bin = (src == kNULL ? 0 : 1);
- if (binner && bin) { bin = binner->Bin(src) + 1; }
- btr.Add(bin, &it->second);
- }
- if (it->second.increment(trglets, base(trglets), rng))
- base.Increment(trglets, rng);
- }
-
- void Decrement(const WordID src, const vector<WordID>& trglets, MT19937* rng) {
- RuleModelHash::iterator it = r.find(src);
- assert(it != r.end());
- if (it->second.decrement(trglets, rng)) {
- base.Decrement(trglets, rng);
- }
- }
-
- prob_t Likelihood() const {
- prob_t p = prob_t::One();
- for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) {
- prob_t q; q.logeq(it->second.log_crp_prob());
- p *= q;
- }
- return p;
- }
-
- unsigned UniqueConditioningContexts() const {
- return r.size();
- }
-
- // TODO tie PYP hyperparameters based on source word frequency bins
- Base& base;
- const Binner* binner;
- BinTiedResampler<CCRP<vector<WordID> > > btr;
- typedef unordered_map<WordID, CCRP<vector<WordID> > > RuleModelHash;
- RuleModelHash r;
-};
-
-PYPLexicalTranslation::PYPLexicalTranslation(const vector<vector<WordID> >& lets,
- const unsigned vocab_size,
- const unsigned num_letters) :
- letters(lets),
- base(vocab_size, num_letters, 5),
- tmodel(new ConditionalPYPWordModel<PoissonUniformWordModel>(&base, new FreqBinner("10k.freq"))),
- kX(-TD::Convert("X")) {}
-
-void PYPLexicalTranslation::Summary() const {
- tmodel->Summary();
-}
-
-prob_t PYPLexicalTranslation::Likelihood() const {
- return tmodel->Likelihood() * base.Likelihood();
-}
-
-void PYPLexicalTranslation::ResampleHyperparameters(MT19937* rng) {
- tmodel->ResampleHyperparameters(rng);
-}
-
-unsigned PYPLexicalTranslation::UniqueConditioningContexts() const {
- return tmodel->UniqueConditioningContexts();
-}
-
-prob_t PYPLexicalTranslation::Prob(WordID src, WordID trg) const {
- return tmodel->Prob(src, letters[trg]);
-}
-
-void PYPLexicalTranslation::Increment(WordID src, WordID trg, MT19937* rng) {
- tmodel->Increment(src, letters[trg], rng);
-}
-
-void PYPLexicalTranslation::Decrement(WordID src, WordID trg, MT19937* rng) {
- tmodel->Decrement(src, letters[trg], rng);
-}
-
diff --git a/gi/pf/pyp_tm.h b/gi/pf/pyp_tm.h
deleted file mode 100644
index 2b076a25..00000000
--- a/gi/pf/pyp_tm.h
+++ /dev/null
@@ -1,36 +0,0 @@
-#ifndef PYP_LEX_TRANS
-#define PYP_LEX_TRANS
-
-#include <vector>
-#include "wordid.h"
-#include "prob.h"
-#include "sampler.h"
-#include "freqdict.h"
-#include "poisson_uniform_word_model.h"
-
-struct FreqBinner;
-template <typename T, class B> struct ConditionalPYPWordModel;
-
-struct PYPLexicalTranslation {
- explicit PYPLexicalTranslation(const std::vector<std::vector<WordID> >& lets,
- const unsigned vocab_size,
- const unsigned num_letters);
-
- prob_t Likelihood() const;
-
- void ResampleHyperparameters(MT19937* rng);
- prob_t Prob(WordID src, WordID trg) const; // return p(trg | src)
- void Summary() const;
- void Increment(WordID src, WordID trg, MT19937* rng);
- void Decrement(WordID src, WordID trg, MT19937* rng);
- unsigned UniqueConditioningContexts() const;
-
- private:
- const std::vector<std::vector<WordID> >& letters; // spelling dictionary
- PoissonUniformWordModel base; // "generator" of English types
- ConditionalPYPWordModel<PoissonUniformWordModel, FreqBinner>* tmodel; // translation distributions
- // (model English word | French word)
- const WordID kX;
-};
-
-#endif
diff --git a/gi/pf/pyp_word_model.h b/gi/pf/pyp_word_model.h
deleted file mode 100644
index 224a9034..00000000
--- a/gi/pf/pyp_word_model.h
+++ /dev/null
@@ -1,62 +0,0 @@
-#ifndef _PYP_WORD_MODEL_H_
-#define _PYP_WORD_MODEL_H_
-
-#include <iostream>
-#include <cmath>
-#include <vector>
-#include "prob.h"
-#include "ccrp.h"
-#include "m.h"
-#include "tdict.h"
-#include "os_phrase.h"
-
-// PYP(d,s,poisson-uniform) represented as a CRP
-template <class Base>
-struct PYPWordModel {
- explicit PYPWordModel(Base* b) :
- base(*b),
- r(1,1,1,1,0.66,50.0)
- {}
-
- void ResampleHyperparameters(MT19937* rng) {
- r.resample_hyperparameters(rng);
- std::cerr << " PYPWordModel(d=" << r.discount() << ",s=" << r.strength() << ")\n";
- }
-
- inline prob_t operator()(const std::vector<WordID>& s) const {
- return r.prob(s, base(s));
- }
-
- inline void Increment(const std::vector<WordID>& s, MT19937* rng) {
- if (r.increment(s, base(s), rng))
- base.Increment(s, rng);
- }
-
- inline void Decrement(const std::vector<WordID>& s, MT19937 *rng) {
- if (r.decrement(s, rng))
- base.Decrement(s, rng);
- }
-
- inline prob_t Likelihood() const {
- prob_t p; p.logeq(r.log_crp_prob());
- p *= base.Likelihood();
- return p;
- }
-
- void Summary() const {
- std::cerr << "PYPWordModel: generations=" << r.num_customers()
- << " PYP(d=" << r.discount() << ",s=" << r.strength() << ')' << std::endl;
- for (typename CCRP<std::vector<WordID> >::const_iterator it = r.begin(); it != r.end(); ++it) {
- std::cerr << " " << it->second.total_dish_count_
- << " (on " << it->second.table_counts_.size() << " tables) "
- << TD::GetString(it->first) << std::endl;
- }
- }
-
- private:
-
- Base& base; // keeps track of the draws from the base distribution
- CCRP<std::vector<WordID> > r;
-};
-
-#endif
diff --git a/gi/pf/quasi_model2.h b/gi/pf/quasi_model2.h
deleted file mode 100644
index 4075affe..00000000
--- a/gi/pf/quasi_model2.h
+++ /dev/null
@@ -1,177 +0,0 @@
-#ifndef _QUASI_MODEL2_H_
-#define _QUASI_MODEL2_H_
-
-#include <vector>
-#include <cmath>
-#include <tr1/unordered_map>
-#include "boost/functional.hpp"
-#include "prob.h"
-#include "array2d.h"
-#include "slice_sampler.h"
-#include "m.h"
-#include "have_64_bits.h"
-
-struct AlignmentObservation {
- AlignmentObservation() : src_len(), trg_len(), j(), a_j() {}
- AlignmentObservation(unsigned sl, unsigned tl, unsigned tw, unsigned sw) :
- src_len(sl), trg_len(tl), j(tw), a_j(sw) {}
- unsigned short src_len;
- unsigned short trg_len;
- unsigned short j;
- unsigned short a_j;
-};
-
-#ifdef HAVE_64_BITS
-inline size_t hash_value(const AlignmentObservation& o) {
- return reinterpret_cast<const size_t&>(o);
-}
-inline bool operator==(const AlignmentObservation& a, const AlignmentObservation& b) {
- return hash_value(a) == hash_value(b);
-}
-#else
-inline size_t hash_value(const AlignmentObservation& o) {
- size_t h = 1;
- boost::hash_combine(h, o.src_len);
- boost::hash_combine(h, o.trg_len);
- boost::hash_combine(h, o.j);
- boost::hash_combine(h, o.a_j);
- return h;
-}
-#endif
-
-struct QuasiModel2 {
- explicit QuasiModel2(double alpha, double pnull = 0.1) :
- alpha_(alpha),
- pnull_(pnull),
- pnotnull_(1 - pnull) {}
-
- // a_j = 0 => NULL; src_len does *not* include null
- prob_t Prob(unsigned a_j, unsigned j, unsigned src_len, unsigned trg_len) const {
- if (!a_j) return pnull_;
- return pnotnull_ *
- prob_t(UnnormalizedProb(a_j, j, src_len, trg_len, alpha_) / GetOrComputeZ(j, src_len, trg_len));
- }
-
- void Increment(unsigned a_j, unsigned j, unsigned src_len, unsigned trg_len) {
- assert(a_j <= src_len);
- assert(j < trg_len);
- ++obs_[AlignmentObservation(src_len, trg_len, j, a_j)];
- }
-
- void Decrement(unsigned a_j, unsigned j, unsigned src_len, unsigned trg_len) {
- const AlignmentObservation ao(src_len, trg_len, j, a_j);
- int &cc = obs_[ao];
- assert(cc > 0);
- --cc;
- if (!cc) obs_.erase(ao);
- }
-
- struct PNullResampler {
- PNullResampler(const QuasiModel2& m) : m_(m) {}
- const QuasiModel2& m_;
- double operator()(const double& proposed_pnull) const {
- return log(m_.Likelihood(m_.alpha_, proposed_pnull));
- }
- };
-
- struct AlphaResampler {
- AlphaResampler(const QuasiModel2& m) : m_(m) {}
- const QuasiModel2& m_;
- double operator()(const double& proposed_alpha) const {
- return log(m_.Likelihood(proposed_alpha, m_.pnull_.as_float()));
- }
- };
-
- void ResampleHyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) {
- const PNullResampler dr(*this);
- const AlphaResampler ar(*this);
- for (unsigned i = 0; i < nloop; ++i) {
- double pnull = slice_sampler1d(dr, pnull_.as_float(), *rng, 0.00000001,
- 1.0, 0.0, niterations, 100*niterations);
- pnull_ = prob_t(pnull);
- alpha_ = slice_sampler1d(ar, alpha_, *rng, 0.00000001,
- std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations);
- }
- std::cerr << "QuasiModel2(alpha=" << alpha_ << ",p_null="
- << pnull_.as_float() << ") = " << Likelihood() << std::endl;
- zcache_.clear();
- }
-
- prob_t Likelihood() const {
- return Likelihood(alpha_, pnull_.as_float());
- }
-
- prob_t Likelihood(double alpha, double ppnull) const {
- const prob_t pnull(ppnull);
- const prob_t pnotnull(1 - ppnull);
-
- prob_t p;
- p.logeq(Md::log_gamma_density(alpha, 0.1, 25)); // TODO configure
- assert(!p.is_0());
- prob_t prob_of_ppnull; prob_of_ppnull.logeq(Md::log_beta_density(ppnull, 2, 10));
- assert(!prob_of_ppnull.is_0());
- p *= prob_of_ppnull;
- for (ObsCount::const_iterator it = obs_.begin(); it != obs_.end(); ++it) {
- const AlignmentObservation& ao = it->first;
- if (ao.a_j) {
- prob_t u = XUnnormalizedProb(ao.a_j, ao.j, ao.src_len, ao.trg_len, alpha);
- prob_t z = XComputeZ(ao.j, ao.src_len, ao.trg_len, alpha);
- prob_t pa(u / z);
- pa *= pnotnull;
- pa.poweq(it->second);
- p *= pa;
- } else {
- p *= pnull.pow(it->second);
- }
- }
- return p;
- }
-
- private:
- static prob_t XUnnormalizedProb(unsigned a_j, unsigned j, unsigned src_len, unsigned trg_len, double alpha) {
- prob_t p;
- p.logeq(-fabs(double(a_j - 1) / src_len - double(j) / trg_len) * alpha);
- return p;
- }
-
- static prob_t XComputeZ(unsigned j, unsigned src_len, unsigned trg_len, double alpha) {
- prob_t z = prob_t::Zero();
- for (int a_j = 1; a_j <= src_len; ++a_j)
- z += XUnnormalizedProb(a_j, j, src_len, trg_len, alpha);
- return z;
- }
-
- static double UnnormalizedProb(unsigned a_j, unsigned j, unsigned src_len, unsigned trg_len, double alpha) {
- return exp(-fabs(double(a_j - 1) / src_len - double(j) / trg_len) * alpha);
- }
-
- static double ComputeZ(unsigned j, unsigned src_len, unsigned trg_len, double alpha) {
- double z = 0;
- for (int a_j = 1; a_j <= src_len; ++a_j)
- z += UnnormalizedProb(a_j, j, src_len, trg_len, alpha);
- return z;
- }
-
- const double& GetOrComputeZ(unsigned j, unsigned src_len, unsigned trg_len) const {
- if (src_len >= zcache_.size())
- zcache_.resize(src_len + 1);
- if (trg_len >= zcache_[src_len].size())
- zcache_[src_len].resize(trg_len + 1);
- std::vector<double>& zv = zcache_[src_len][trg_len];
- if (zv.size() == 0)
- zv.resize(trg_len);
- double& z = zv[j];
- if (!z)
- z = ComputeZ(j, src_len, trg_len, alpha_);
- return z;
- }
-
- double alpha_;
- prob_t pnull_;
- prob_t pnotnull_;
- mutable std::vector<std::vector<std::vector<double> > > zcache_;
- typedef std::tr1::unordered_map<AlignmentObservation, int, boost::hash<AlignmentObservation> > ObsCount;
- ObsCount obs_;
-};
-
-#endif
diff --git a/gi/pf/reachability.cc b/gi/pf/reachability.cc
deleted file mode 100644
index 7d0d04ac..00000000
--- a/gi/pf/reachability.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-#include "reachability.h"
-
-#include <vector>
-#include <iostream>
-
-using namespace std;
-
-struct SState {
- SState() : prev_src_covered(), prev_trg_covered() {}
- SState(int i, int j) : prev_src_covered(i), prev_trg_covered(j) {}
- int prev_src_covered;
- int prev_trg_covered;
-};
-
-void Reachability::ComputeReachability(int srclen, int trglen, int src_max_phrase_len, int trg_max_phrase_len) {
- typedef boost::multi_array<vector<SState>, 2> array_type;
- array_type a(boost::extents[srclen + 1][trglen + 1]);
- a[0][0].push_back(SState());
- for (int i = 0; i < srclen; ++i) {
- for (int j = 0; j < trglen; ++j) {
- if (a[i][j].size() == 0) continue;
- const SState prev(i,j);
- for (int k = 1; k <= src_max_phrase_len; ++k) {
- if ((i + k) > srclen) continue;
- for (int l = 1; l <= trg_max_phrase_len; ++l) {
- if ((j + l) > trglen) continue;
- a[i + k][j + l].push_back(prev);
- }
- }
- }
- }
- a[0][0].clear();
- //cerr << srclen << "," << trglen << ": Final cell contains " << a[srclen][trglen].size() << " back pointers\n";
- if (a[srclen][trglen].empty()) {
- cerr << "Sequence pair with lengths (" << srclen << ',' << trglen << ") violates reachability constraints\n";
- nodes = 0;
- return;
- }
-
- typedef boost::multi_array<bool, 2> rarray_type;
- rarray_type r(boost::extents[srclen + 1][trglen + 1]);
- r[srclen][trglen] = true;
- nodes = 0;
- for (int i = srclen; i >= 0; --i) {
- for (int j = trglen; j >= 0; --j) {
- vector<SState>& prevs = a[i][j];
- if (!r[i][j]) { prevs.clear(); }
- for (int k = 0; k < prevs.size(); ++k) {
- r[prevs[k].prev_src_covered][prevs[k].prev_trg_covered] = true;
- int src_delta = i - prevs[k].prev_src_covered;
- edges[prevs[k].prev_src_covered][prevs[k].prev_trg_covered][src_delta][j - prevs[k].prev_trg_covered] = true;
- valid_deltas[prevs[k].prev_src_covered][prevs[k].prev_trg_covered].push_back(make_pair<short,short>(src_delta,j - prevs[k].prev_trg_covered));
- short &msd = max_src_delta[prevs[k].prev_src_covered][prevs[k].prev_trg_covered];
- if (src_delta > msd) msd = src_delta;
- }
- }
- }
- assert(!edges[0][0][1][0]);
- assert(!edges[0][0][0][1]);
- assert(!edges[0][0][0][0]);
- assert(max_src_delta[0][0] > 0);
- nodes = 0;
- for (int i = 0; i < srclen; ++i) {
- for (int j = 0; j < trglen; ++j) {
- if (valid_deltas[i][j].size() > 0) {
- node_addresses[i][j] = nodes++;
- } else {
- node_addresses[i][j] = -1;
- }
- }
- }
- cerr << "Sequence pair with lengths (" << srclen << ',' << trglen << ") has " << valid_deltas[0][0].size() << " out edges in its root node, " << nodes << " nodes in total, and outside estimate matrix will require " << sizeof(float)*nodes << " bytes\n";
- }
-
diff --git a/gi/pf/reachability.h b/gi/pf/reachability.h
deleted file mode 100644
index 1e22c76a..00000000
--- a/gi/pf/reachability.h
+++ /dev/null
@@ -1,34 +0,0 @@
-#ifndef _REACHABILITY_H_
-#define _REACHABILITY_H_
-
-#include "boost/multi_array.hpp"
-
-// determines minimum and maximum lengths of outgoing edges from all
-// coverage positions such that the alignment path respects src and
-// trg maximum phrase sizes
-//
-// runs in O(n^2 * src_max * trg_max) time but should be relatively fast
-//
-// currently forbids 0 -> n and n -> 0 alignments
-
-struct Reachability {
- unsigned nodes;
- boost::multi_array<bool, 4> edges; // edges[src_covered][trg_covered][src_delta][trg_delta] is this edge worth exploring?
- boost::multi_array<short, 2> max_src_delta; // msd[src_covered][trg_covered] -- the largest src delta that's valid
- boost::multi_array<short, 2> node_addresses; // na[src_covered][trg_covered] -- the index of the node in a one-dimensional array (of size "nodes")
- boost::multi_array<std::vector<std::pair<short,short> >, 2> valid_deltas; // valid_deltas[src_covered][trg_covered] list of valid transitions leaving a particular node
-
- Reachability(int srclen, int trglen, int src_max_phrase_len, int trg_max_phrase_len) :
- nodes(),
- edges(boost::extents[srclen][trglen][src_max_phrase_len+1][trg_max_phrase_len+1]),
- max_src_delta(boost::extents[srclen][trglen]),
- node_addresses(boost::extents[srclen][trglen]),
- valid_deltas(boost::extents[srclen][trglen]) {
- ComputeReachability(srclen, trglen, src_max_phrase_len, trg_max_phrase_len);
- }
-
- private:
- void ComputeReachability(int srclen, int trglen, int src_max_phrase_len, int trg_max_phrase_len);
-};
-
-#endif
diff --git a/gi/pf/tied_resampler.h b/gi/pf/tied_resampler.h
deleted file mode 100644
index a4f4af36..00000000
--- a/gi/pf/tied_resampler.h
+++ /dev/null
@@ -1,122 +0,0 @@
-#ifndef _TIED_RESAMPLER_H_
-#define _TIED_RESAMPLER_H_
-
-#include <set>
-#include <vector>
-#include "sampler.h"
-#include "slice_sampler.h"
-#include "m.h"
-
-template <class CRP>
-struct TiedResampler {
- explicit TiedResampler(double da, double db, double ss, double sr, double d=0.5, double s=1.0) :
- d_alpha(da),
- d_beta(db),
- s_shape(ss),
- s_rate(sr),
- discount(d),
- strength(s) {}
-
- void Add(CRP* crp) {
- crps.insert(crp);
- crp->set_discount(discount);
- crp->set_strength(strength);
- assert(!crp->has_discount_prior());
- assert(!crp->has_strength_prior());
- }
-
- void Remove(CRP* crp) {
- crps.erase(crp);
- }
-
- size_t size() const {
- return crps.size();
- }
-
- double LogLikelihood(double d, double s) const {
- if (s <= -d) return -std::numeric_limits<double>::infinity();
- double llh = Md::log_beta_density(d, d_alpha, d_beta) +
- Md::log_gamma_density(d + s, s_shape, s_rate);
- for (typename std::set<CRP*>::iterator it = crps.begin(); it != crps.end(); ++it)
- llh += (*it)->log_crp_prob(d, s);
- return llh;
- }
-
- double LogLikelihood() const {
- return LogLikelihood(discount, strength);
- }
-
- struct DiscountResampler {
- DiscountResampler(const TiedResampler& m) : m_(m) {}
- const TiedResampler& m_;
- double operator()(const double& proposed_discount) const {
- return m_.LogLikelihood(proposed_discount, m_.strength);
- }
- };
-
- struct AlphaResampler {
- AlphaResampler(const TiedResampler& m) : m_(m) {}
- const TiedResampler& m_;
- double operator()(const double& proposed_strength) const {
- return m_.LogLikelihood(m_.discount, proposed_strength);
- }
- };
-
- void ResampleHyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) {
- if (size() == 0) { std::cerr << "EMPTY - not resampling\n"; return; }
- const DiscountResampler dr(*this);
- const AlphaResampler ar(*this);
- for (int iter = 0; iter < nloop; ++iter) {
- strength = slice_sampler1d(ar, strength, *rng, -discount + std::numeric_limits<double>::min(),
- std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations);
- double min_discount = std::numeric_limits<double>::min();
- if (strength < 0.0) min_discount -= strength;
- discount = slice_sampler1d(dr, discount, *rng, min_discount,
- 1.0, 0.0, niterations, 100*niterations);
- }
- strength = slice_sampler1d(ar, strength, *rng, -discount + std::numeric_limits<double>::min(),
- std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations);
- std::cerr << "TiedCRPs(d=" << discount << ",s="
- << strength << ") = " << LogLikelihood(discount, strength) << std::endl;
- for (typename std::set<CRP*>::iterator it = crps.begin(); it != crps.end(); ++it)
- (*it)->set_hyperparameters(discount, strength);
- }
- private:
- std::set<CRP*> crps;
- const double d_alpha, d_beta, s_shape, s_rate;
- double discount, strength;
-};
-
-// split according to some criterion
-template <class CRP>
-struct BinTiedResampler {
- explicit BinTiedResampler(unsigned nbins) :
- resamplers(nbins, TiedResampler<CRP>(1,1,1,1)) {}
-
- void Add(unsigned bin, CRP* crp) {
- resamplers[bin].Add(crp);
- }
-
- void Remove(unsigned bin, CRP* crp) {
- resamplers[bin].Remove(crp);
- }
-
- void ResampleHyperparameters(MT19937* rng) {
- for (unsigned i = 0; i < resamplers.size(); ++i) {
- std::cerr << "BIN " << i << " (" << resamplers[i].size() << " CRPs): " << std::flush;
- resamplers[i].ResampleHyperparameters(rng);
- }
- }
-
- double LogLikelihood() const {
- double llh = 0;
- for (unsigned i = 0; i < resamplers.size(); ++i)
- llh += resamplers[i].LogLikelihood();
- return llh;
- }
-
- private:
- std::vector<TiedResampler<CRP> > resamplers;
-};
-
-#endif
diff --git a/gi/pf/tpf.cc b/gi/pf/tpf.cc
deleted file mode 100644
index 7348d21c..00000000
--- a/gi/pf/tpf.cc
+++ /dev/null
@@ -1,99 +0,0 @@
-#include <iostream>
-#include <tr1/memory>
-#include <queue>
-
-#include "sampler.h"
-
-using namespace std;
-using namespace tr1;
-
-shared_ptr<MT19937> prng;
-
-struct Particle {
- Particle() : weight(prob_t::One()) {}
- vector<int> states;
- prob_t weight;
- prob_t gamma_last;
-};
-
-ostream& operator<<(ostream& os, const Particle& p) {
- os << "[";
- for (int i = 0; i < p.states.size(); ++i) os << p.states[i] << ' ';
- os << "| w=" << log(p.weight) << ']';
- return os;
-}
-
-void Rejuvenate(vector<Particle>& pps) {
- SampleSet<prob_t> ss;
- vector<Particle> nps(pps.size());
- for (int i = 0; i < pps.size(); ++i) {
-// cerr << pps[i] << endl;
- ss.add(pps[i].weight);
- }
-// cerr << "REJUVINATING...\n";
- for (int i = 0; i < pps.size(); ++i) {
- nps[i] = pps[prng->SelectSample(ss)];
- nps[i].weight = prob_t(1.0 / pps.size());
-// cerr << nps[i] << endl;
- }
- nps.swap(pps);
-// exit(1);
-}
-
-int main(int argc, char** argv) {
- const unsigned particles = 100;
- prng.reset(new MT19937);
- MT19937& rng = *prng;
-
- // q(a) = 0.8
- // q(b) = 0.8
- // q(c) = 0.4
- SampleSet<double> ssq;
- ssq.add(0.4);
- ssq.add(0.6);
- ssq.add(0);
- double qz = 1;
-
- // p(a) = 0.2
- // p(b) = 0.8
- vector<double> p(3);
- p[0] = 0.2;
- p[1] = 0.8;
- p[2] = 0;
-
- vector<int> counts(3);
- int tot = 0;
-
- vector<Particle> pps(particles);
- SampleSet<prob_t> ppss;
- int LEN = 12;
- int PP = 1;
- while (pps[0].states.size() < LEN) {
- for (int pi = 0; pi < particles; ++pi) {
- Particle& prt = pps[pi];
-
- bool redo = true;
- const Particle savedp = prt;
- while (redo) {
- redo = false;
- for (int i = 0; i < PP; ++i) {
- int s = rng.SelectSample(ssq);
- double gamma_last = p[s];
- if (!gamma_last) { redo = true; break; }
- double q = ssq[s] / qz;
- prt.states.push_back(s);
- prt.weight *= prob_t(gamma_last / q);
- }
- if (redo) { prt = savedp; continue; }
- }
- }
- Rejuvenate(pps);
- }
- ppss.clear();
- for (int i = 0; i < particles; ++i) { ppss.add(pps[i].weight); }
- int sp = rng.SelectSample(ppss);
- cerr << pps[sp] << endl;
-
- return 0;
-}
-
diff --git a/gi/pf/transliterations.cc b/gi/pf/transliterations.cc
deleted file mode 100644
index b2996f65..00000000
--- a/gi/pf/transliterations.cc
+++ /dev/null
@@ -1,334 +0,0 @@
-#include "transliterations.h"
-
-#include <iostream>
-#include <vector>
-
-#include "boost/shared_ptr.hpp"
-
-#include "backward.h"
-#include "filelib.h"
-#include "tdict.h"
-#include "trule.h"
-#include "filelib.h"
-#include "ccrp_nt.h"
-#include "m.h"
-#include "reachability.h"
-
-using namespace std;
-using namespace std::tr1;
-
-struct TruncatedConditionalLengthModel {
- TruncatedConditionalLengthModel(unsigned max_src_size, unsigned max_trg_size, double expected_src_to_trg_ratio) :
- plens(max_src_size+1, vector<prob_t>(max_trg_size+1, 0.0)) {
- for (unsigned i = 1; i <= max_src_size; ++i) {
- prob_t z = prob_t::Zero();
- for (unsigned j = 1; j <= max_trg_size; ++j)
- z += (plens[i][j] = prob_t(0.01 + exp(Md::log_poisson(j, i * expected_src_to_trg_ratio))));
- for (unsigned j = 1; j <= max_trg_size; ++j)
- plens[i][j] /= z;
- //for (unsigned j = 1; j <= max_trg_size; ++j)
- // cerr << "P(trg_len=" << j << " | src_len=" << i << ") = " << plens[i][j] << endl;
- }
- }
-
- // return p(tlen | slen) for *chunks* not full words
- inline const prob_t& operator()(int slen, int tlen) const {
- return plens[slen][tlen];
- }
-
- vector<vector<prob_t> > plens;
-};
-
-struct CondBaseDist {
- CondBaseDist(unsigned max_src_size, unsigned max_trg_size, double expected_src_to_trg_ratio) :
- tclm(max_src_size, max_trg_size, expected_src_to_trg_ratio) {}
-
- prob_t operator()(const vector<WordID>& src, unsigned sf, unsigned st,
- const vector<WordID>& trg, unsigned tf, unsigned tt) const {
- prob_t p = tclm(st - sf, tt - tf); // target len | source length ~ TCLM(source len)
- assert(!"not impl");
- return p;
- }
- inline prob_t operator()(const vector<WordID>& src, const vector<WordID>& trg) const {
- return (*this)(src, 0, src.size(), trg, 0, trg.size());
- }
- TruncatedConditionalLengthModel tclm;
-};
-
-// represents transliteration phrase probabilities, e.g.
-// p( a l - | A l ) , p( o | A w ) , ...
-struct TransliterationChunkConditionalModel {
- explicit TransliterationChunkConditionalModel(const CondBaseDist& pp0) :
- d(0.0),
- strength(1.0),
- rp0(pp0) {
- }
-
- void Summary() const {
- std::cerr << "Number of conditioning contexts: " << r.size() << std::endl;
- for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) {
- std::cerr << TD::GetString(it->first) << " \t(\\alpha = " << it->second.alpha() << ") --------------------------" << std::endl;
- for (CCRP_NoTable<TRule>::const_iterator i2 = it->second.begin(); i2 != it->second.end(); ++i2)
- std::cerr << " " << i2->second << '\t' << i2->first << std::endl;
- }
- }
-
- int DecrementRule(const TRule& rule) {
- RuleModelHash::iterator it = r.find(rule.f_);
- assert(it != r.end());
- int count = it->second.decrement(rule);
- if (count) {
- if (it->second.num_customers() == 0) r.erase(it);
- }
- return count;
- }
-
- int IncrementRule(const TRule& rule) {
- RuleModelHash::iterator it = r.find(rule.f_);
- if (it == r.end()) {
- it = r.insert(make_pair(rule.f_, CCRP_NoTable<TRule>(strength))).first;
- }
- int count = it->second.increment(rule);
- return count;
- }
-
- void IncrementRules(const std::vector<TRulePtr>& rules) {
- for (int i = 0; i < rules.size(); ++i)
- IncrementRule(*rules[i]);
- }
-
- void DecrementRules(const std::vector<TRulePtr>& rules) {
- for (int i = 0; i < rules.size(); ++i)
- DecrementRule(*rules[i]);
- }
-
- prob_t RuleProbability(const TRule& rule) const {
- prob_t p;
- RuleModelHash::const_iterator it = r.find(rule.f_);
- if (it == r.end()) {
- p = rp0(rule.f_, rule.e_);
- } else {
- p = it->second.prob(rule, rp0(rule.f_, rule.e_));
- }
- return p;
- }
-
- double LogLikelihood(const double& dd, const double& aa) const {
- if (aa <= -dd) return -std::numeric_limits<double>::infinity();
- //double llh = Md::log_beta_density(dd, 10, 3) + Md::log_gamma_density(aa, 1, 1);
- double llh = //Md::log_beta_density(dd, 1, 1) +
- Md::log_gamma_density(dd + aa, 1, 1);
- std::tr1::unordered_map<std::vector<WordID>, CCRP_NoTable<TRule>, boost::hash<std::vector<WordID> > >::const_iterator it;
- for (it = r.begin(); it != r.end(); ++it)
- llh += it->second.log_crp_prob(aa);
- return llh;
- }
-
- struct AlphaResampler {
- AlphaResampler(const TransliterationChunkConditionalModel& m) : m_(m) {}
- const TransliterationChunkConditionalModel& m_;
- double operator()(const double& proposed_strength) const {
- return m_.LogLikelihood(m_.d, proposed_strength);
- }
- };
-
- void ResampleHyperparameters(MT19937* rng) {
- std::tr1::unordered_map<std::vector<WordID>, CCRP_NoTable<TRule>, boost::hash<std::vector<WordID> > >::iterator it;
- //const unsigned nloop = 5;
- const unsigned niterations = 10;
- //DiscountResampler dr(*this);
- AlphaResampler ar(*this);
-#if 0
- for (int iter = 0; iter < nloop; ++iter) {
- strength = slice_sampler1d(ar, strength, *rng, -d + std::numeric_limits<double>::min(),
- std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations);
- double min_discount = std::numeric_limits<double>::min();
- if (strength < 0.0) min_discount -= strength;
- d = slice_sampler1d(dr, d, *rng, min_discount,
- 1.0, 0.0, niterations, 100*niterations);
- }
-#endif
- strength = slice_sampler1d(ar, strength, *rng, -d,
- std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations);
- std::cerr << "CTMModel(alpha=" << strength << ") = " << LogLikelihood(d, strength) << std::endl;
- for (it = r.begin(); it != r.end(); ++it) {
-#if 0
- it->second.set_discount(d);
-#endif
- it->second.set_alpha(strength);
- }
- }
-
- prob_t Likelihood() const {
- prob_t p; p.logeq(LogLikelihood(d, strength));
- return p;
- }
-
- const CondBaseDist& rp0;
- typedef std::tr1::unordered_map<std::vector<WordID>,
- CCRP_NoTable<TRule>,
- boost::hash<std::vector<WordID> > > RuleModelHash;
- RuleModelHash r;
- double d, strength;
-};
-
-struct GraphStructure {
- GraphStructure() : r() {}
- // leak memory - these are basically static
- const Reachability* r;
- bool IsReachable() const { return r->nodes > 0; }
-};
-
-struct ProbabilityEstimates {
- ProbabilityEstimates() : gs(), backward() {}
- explicit ProbabilityEstimates(const GraphStructure& g) :
- gs(&g), backward() {
- if (g.r->nodes > 0)
- backward = new float[g.r->nodes];
- }
- // leak memory, these are static
-
- // returns an estimate of the marginal probability
- double MarginalEstimate() const {
- if (!backward) return 0;
- return backward[0];
- }
-
- // returns an backward estimate
- double Backward(int src_covered, int trg_covered) const {
- if (!backward) return 0;
- int ind = gs->r->node_addresses[src_covered][trg_covered];
- if (ind < 0) return 0;
- return backward[ind];
- }
-
- prob_t estp;
- float* backward;
- private:
- const GraphStructure* gs;
-};
-
-struct TransliterationsImpl {
- TransliterationsImpl(int max_src, int max_trg, double sr, const BackwardEstimator& b) :
- cp0(max_src, max_trg, sr),
- tccm(cp0),
- be(b),
- kMAX_SRC_CHUNK(max_src),
- kMAX_TRG_CHUNK(max_trg),
- kS2T_RATIO(sr),
- tot_pairs(), tot_mem() {
- }
- const CondBaseDist cp0;
- TransliterationChunkConditionalModel tccm;
- const BackwardEstimator& be;
-
- void Initialize(WordID src, const vector<WordID>& src_lets, WordID trg, const vector<WordID>& trg_lets) {
- const size_t src_len = src_lets.size();
- const size_t trg_len = trg_lets.size();
-
- // init graph structure
- if (src_len >= graphs.size()) graphs.resize(src_len + 1);
- if (trg_len >= graphs[src_len].size()) graphs[src_len].resize(trg_len + 1);
- GraphStructure& gs = graphs[src_len][trg_len];
- if (!gs.r) {
- double rat = exp(fabs(log(trg_len / (src_len * kS2T_RATIO))));
- if (rat > 1.5 || (rat > 2.4 && src_len < 6)) {
- cerr << " ** Forbidding transliterations of size " << src_len << "," << trg_len << ": " << rat << endl;
- gs.r = new Reachability(src_len, trg_len, 0, 0);
- } else {
- gs.r = new Reachability(src_len, trg_len, kMAX_SRC_CHUNK, kMAX_TRG_CHUNK);
- }
- }
-
- const Reachability& r = *gs.r;
-
- // init backward estimates
- if (src >= ests.size()) ests.resize(src + 1);
- unordered_map<WordID, ProbabilityEstimates>::iterator it = ests[src].find(trg);
- if (it != ests[src].end()) return; // already initialized
-
- it = ests[src].insert(make_pair(trg, ProbabilityEstimates(gs))).first;
- ProbabilityEstimates& est = it->second;
- if (!gs.r->nodes) return; // not derivable subject to length constraints
-
- be.InitializeGrid(src_lets, trg_lets, r, kS2T_RATIO, est.backward);
- cerr << TD::GetString(src_lets) << " ||| " << TD::GetString(trg_lets) << " ||| " << (est.backward[0] / trg_lets.size()) << endl;
- tot_pairs++;
- tot_mem += sizeof(float) * gs.r->nodes;
- }
-
- void Forbid(WordID src, const vector<WordID>& src_lets, WordID trg, const vector<WordID>& trg_lets) {
- const size_t src_len = src_lets.size();
- const size_t trg_len = trg_lets.size();
- // TODO
- }
-
- prob_t EstimateProbability(WordID s, const vector<WordID>& src, WordID t, const vector<WordID>& trg) const {
- assert(src.size() < graphs.size());
- const vector<GraphStructure>& tv = graphs[src.size()];
- assert(trg.size() < tv.size());
- const GraphStructure& gs = tv[trg.size()];
- if (gs.r->nodes == 0)
- return prob_t::Zero();
- const unordered_map<WordID, ProbabilityEstimates>::const_iterator it = ests[s].find(t);
- assert(it != ests[s].end());
- return it->second.estp;
- }
-
- void GraphSummary() const {
- double to = 0;
- double tn = 0;
- double tt = 0;
- for (int i = 0; i < graphs.size(); ++i) {
- const vector<GraphStructure>& vt = graphs[i];
- for (int j = 0; j < vt.size(); ++j) {
- const GraphStructure& gs = vt[j];
- if (!gs.r) continue;
- tt++;
- for (int k = 0; k < i; ++k) {
- for (int l = 0; l < j; ++l) {
- size_t c = gs.r->valid_deltas[k][l].size();
- if (c) {
- tn += 1;
- to += c;
- }
- }
- }
- }
- }
- cerr << " Average nodes = " << (tn / tt) << endl;
- cerr << "Average out-degree = " << (to / tn) << endl;
- cerr << " Unique structures = " << tt << endl;
- cerr << " Unique pairs = " << tot_pairs << endl;
- cerr << " BEs size = " << (tot_mem / (1024.0*1024.0)) << " MB" << endl;
- }
-
- const int kMAX_SRC_CHUNK;
- const int kMAX_TRG_CHUNK;
- const double kS2T_RATIO;
- unsigned tot_pairs;
- size_t tot_mem;
- vector<vector<GraphStructure> > graphs; // graphs[src_len][trg_len]
- vector<unordered_map<WordID, ProbabilityEstimates> > ests; // ests[src][trg]
-};
-
-Transliterations::Transliterations(int max_src, int max_trg, double sr, const BackwardEstimator& be) :
- pimpl_(new TransliterationsImpl(max_src, max_trg, sr, be)) {}
-Transliterations::~Transliterations() { delete pimpl_; }
-
-void Transliterations::Initialize(WordID src, const vector<WordID>& src_lets, WordID trg, const vector<WordID>& trg_lets) {
- pimpl_->Initialize(src, src_lets, trg, trg_lets);
-}
-
-prob_t Transliterations::EstimateProbability(WordID s, const vector<WordID>& src, WordID t, const vector<WordID>& trg) const {
- return pimpl_->EstimateProbability(s, src,t, trg);
-}
-
-void Transliterations::Forbid(WordID src, const vector<WordID>& src_lets, WordID trg, const vector<WordID>& trg_lets) {
- pimpl_->Forbid(src, src_lets, trg, trg_lets);
-}
-
-void Transliterations::GraphSummary() const {
- pimpl_->GraphSummary();
-}
-
diff --git a/gi/pf/transliterations.h b/gi/pf/transliterations.h
deleted file mode 100644
index 49d14684..00000000
--- a/gi/pf/transliterations.h
+++ /dev/null
@@ -1,24 +0,0 @@
-#ifndef _TRANSLITERATIONS_H_
-#define _TRANSLITERATIONS_H_
-
-#include <vector>
-#include "wordid.h"
-#include "prob.h"
-
-struct BackwardEstimator;
-struct TransliterationsImpl;
-struct Transliterations {
- // max_src and max_trg indicate how big the transliteration phrases can be
- // see reachability.h for information about filter_ratio
- explicit Transliterations(int max_src, int max_trg, double s2t_rat, const BackwardEstimator& be);
- ~Transliterations();
- void Initialize(WordID src, const std::vector<WordID>& src_lets, WordID trg, const std::vector<WordID>& trg_lets);
- void Forbid(WordID src, const std::vector<WordID>& src_lets, WordID trg, const std::vector<WordID>& trg_lets);
- void GraphSummary() const;
- prob_t EstimateProbability(WordID s, const std::vector<WordID>& src, WordID t, const std::vector<WordID>& trg) const;
- private:
- TransliterationsImpl* pimpl_;
-};
-
-#endif
-
diff --git a/gi/pf/unigrams.cc b/gi/pf/unigrams.cc
deleted file mode 100644
index 40829775..00000000
--- a/gi/pf/unigrams.cc
+++ /dev/null
@@ -1,80 +0,0 @@
-#include "unigrams.h"
-
-#include <string>
-#include <cmath>
-
-#include "stringlib.h"
-#include "filelib.h"
-
-using namespace std;
-
-void UnigramModel::LoadUnigrams(const string& fname) {
- cerr << "Loading unigram probabilities from " << fname << " ..." << endl;
- ReadFile rf(fname);
- string line;
- istream& in = *rf.stream();
- assert(in);
- getline(in, line);
- assert(line.empty());
- getline(in, line);
- assert(line == "\\data\\");
- getline(in, line);
- size_t pos = line.find("ngram 1=");
- assert(pos == 0);
- assert(line.size() > 8);
- const size_t num_unigrams = atoi(&line[8]);
- getline(in, line);
- assert(line.empty());
- getline(in, line);
- assert(line == "\\1-grams:");
- for (size_t i = 0; i < num_unigrams; ++i) {
- getline(in, line);
- assert(line.size() > 0);
- pos = line.find('\t');
- assert(pos > 0);
- assert(pos + 1 < line.size());
- const WordID w = TD::Convert(line.substr(pos + 1));
- line[pos] = 0;
- float p = atof(&line[0]);
- if (w < probs_.size()) probs_[w].logeq(p * log(10)); else cerr << "WARNING: don't know about '" << TD::Convert(w) << "'\n";
- }
-}
-
-void UnigramWordModel::LoadUnigrams(const string& fname) {
- cerr << "Loading unigram probabilities from " << fname << " ..." << endl;
- ReadFile rf(fname);
- string line;
- istream& in = *rf.stream();
- assert(in);
- getline(in, line);
- assert(line.empty());
- getline(in, line);
- assert(line == "\\data\\");
- getline(in, line);
- size_t pos = line.find("ngram 1=");
- assert(pos == 0);
- assert(line.size() > 8);
- const size_t num_unigrams = atoi(&line[8]);
- getline(in, line);
- assert(line.empty());
- getline(in, line);
- assert(line == "\\1-grams:");
- for (size_t i = 0; i < num_unigrams; ++i) {
- getline(in, line);
- assert(line.size() > 0);
- pos = line.find('\t');
- assert(pos > 0);
- assert(pos + 1 < line.size());
- size_t cur = pos + 1;
- vector<WordID> w;
- while (cur < line.size()) {
- const size_t len = UTF8Len(line[cur]);
- w.push_back(TD::Convert(line.substr(cur, len)));
- cur += len;
- }
- line[pos] = 0;
- float p = atof(&line[0]);
- probs_[w].logeq(p * log(10.0));
- }
-}
-
diff --git a/gi/pf/unigrams.h b/gi/pf/unigrams.h
deleted file mode 100644
index 1660d1ed..00000000
--- a/gi/pf/unigrams.h
+++ /dev/null
@@ -1,69 +0,0 @@
-#ifndef _UNIGRAMS_H_
-#define _UNIGRAMS_H_
-
-#include <vector>
-#include <string>
-#include <tr1/unordered_map>
-#include <boost/functional.hpp>
-
-#include "wordid.h"
-#include "prob.h"
-#include "tdict.h"
-
-struct UnigramModel {
- explicit UnigramModel(const std::string& fname, unsigned vocab_size) :
- use_uniform_(fname.size() == 0),
- uniform_(1.0 / vocab_size),
- probs_() {
- if (fname.size() > 0) {
- probs_.resize(TD::NumWords() + 1);
- LoadUnigrams(fname);
- }
- }
-
- const prob_t& operator()(const WordID& w) const {
- assert(w);
- if (use_uniform_) return uniform_;
- return probs_[w];
- }
-
- private:
- void LoadUnigrams(const std::string& fname);
-
- const bool use_uniform_;
- const prob_t uniform_;
- std::vector<prob_t> probs_;
-};
-
-
-// reads an ARPA unigram file and converts words like 'cat' into a string 'c a t'
-struct UnigramWordModel {
- explicit UnigramWordModel(const std::string& fname) :
- use_uniform_(false),
- uniform_(1.0),
- probs_() {
- LoadUnigrams(fname);
- }
-
- explicit UnigramWordModel(const unsigned vocab_size) :
- use_uniform_(true),
- uniform_(1.0 / vocab_size),
- probs_() {}
-
- const prob_t& operator()(const std::vector<WordID>& s) const {
- if (use_uniform_) return uniform_;
- const VectorProbHash::const_iterator it = probs_.find(s);
- assert(it != probs_.end());
- return it->second;
- }
-
- private:
- void LoadUnigrams(const std::string& fname);
-
- const bool use_uniform_;
- const prob_t uniform_;
- typedef std::tr1::unordered_map<std::vector<WordID>, prob_t, boost::hash<std::vector<WordID> > > VectorProbHash;
- VectorProbHash probs_;
-};
-
-#endif
diff --git a/gi/pipeline/OLD.clsp.config b/gi/pipeline/OLD.clsp.config
deleted file mode 100644
index cd0f9d65..00000000
--- a/gi/pipeline/OLD.clsp.config
+++ /dev/null
@@ -1,9 +0,0 @@
-# THIS FILE GIVES THE LOCATIONS OF THE CORPORA USED
-# name path aligned-corpus LM xfeats.grammar dev dev-refs test1 testt-eval.sh ...
-btec /export/ws10smt/data/btec/ split.zh-en.al lm/en.3gram.lm.gz xgrammar/grammar.gz devtest/devset1_2.zh devtest/devset1_2.lc.en* devtest/devset3.zh eval-devset3.sh
-fbis /export/ws10smt/data/chinese-english.fbis corpus.zh-en.al
-zhen /export/ws10smt/data/chinese-english corpus.zh-en.al
-aren /export/ws10smt/data/arabic-english corpus.ar-en.al
-uren /export/ws10smt/data/urdu-english corpus.ur-en.al
-nlfr /export/ws10smt/data/dutch-french corpus.nl-fr.al
-
diff --git a/gi/pipeline/OLD.evaluation-pipeline.pl b/gi/pipeline/OLD.evaluation-pipeline.pl
deleted file mode 100755
index 49c303eb..00000000
--- a/gi/pipeline/OLD.evaluation-pipeline.pl
+++ /dev/null
@@ -1,277 +0,0 @@
-#!/usr/bin/perl -w
-use strict;
-use Getopt::Long;
-use Cwd;
-my $CWD = getcwd;
-
-my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR; }
-
-my @DEFAULT_FEATS = qw(
- LogRuleCount SingletonRule LexE2F LexF2E WordPenalty
- LogFCount LanguageModel Glue GlueTop PassThrough SingletonF
-);
-
-my %init_weights = qw(
- LogRuleCount 0.2
- LexE2F -0.3
- LexF2E -0.3
- LogFCount 0.1
- WordPenalty -1.5
- LanguageModel 1.2
- Glue -1.0
- GlueTop 0.00001
- PassThrough -10.0
- SingletonRule -0.1
- X_EGivenF -0.3
- X_FGivenE -0.3
- X_LogECount -1
- X_LogFCount -0.1
- X_LogRuleCount 0.3
- X_SingletonE -0.1
- X_SingletonF -0.1
- X_SingletonRule -0.5
-);
-
-my $CDEC = "$SCRIPT_DIR/../../decoder/cdec";
-my $PARALLELIZE = "$SCRIPT_DIR/../../vest/parallelize.pl";
-my $EXTOOLS = "$SCRIPT_DIR/../../extools";
-die "Can't find extools: $EXTOOLS" unless -e $EXTOOLS && -d $EXTOOLS;
-my $VEST = "$SCRIPT_DIR/../../vest";
-die "Can't find vest: $VEST" unless -e $VEST && -d $VEST;
-my $DISTVEST = "$VEST/dist-vest.pl";
-my $FILTSCORE = "$EXTOOLS/filter_score_grammar";
-my $ADDXFEATS = "$SCRIPT_DIR/scripts/xfeats.pl";
-assert_exec($CDEC, $PARALLELIZE, $FILTSCORE, $DISTVEST, $ADDXFEATS);
-
-my $config = "$SCRIPT_DIR/OLD.clsp.config";
-print STDERR "CORPORA CONFIGURATION: $config\n";
-open CONF, "<$config" or die "Can't read $config: $!";
-my %paths;
-my %corpora;
-my %lms;
-my %devs;
-my %devrefs;
-my %tests;
-my %testevals;
-my %xgrammars;
-print STDERR " LANGUAGE PAIRS:";
-while(<CONF>) {
- chomp;
- next if /^#/;
- next if /^\s*$/;
- s/^\s+//;
- s/\s+$//;
- my ($name, $path, $corpus, $lm, $xgrammar, $dev, $devref, @xtests) = split /\s+/;
- $paths{$name} = $path;
- $corpora{$name} = $corpus;
- $lms{$name} = $lm;
- $xgrammars{$name} = $xgrammar;
- $devs{$name} = $dev;
- $devrefs{$name} = $devref;
- $tests{$name} = $xtests[0];
- $testevals{$name} = $xtests[1];
- print STDERR " $name";
-}
-print STDERR "\n";
-
-my %langpairs = map { $_ => 1 } qw( btec zhen fbis aren uren nlfr );
-
-my $outdir = "$CWD/exp";
-my $help;
-my $XFEATS;
-my $EXTRA_FILTER = '';
-my $dataDir = '/export/ws10smt/data';
-if (GetOptions(
- "data=s" => \$dataDir,
- "xfeats" => \$XFEATS,
-) == 0 || @ARGV!=2 || $help) {
- print_help();
- exit;
-}
-my $lp = $ARGV[0];
-my $grammar = $ARGV[1];
-print STDERR " CORPUS REPO: $dataDir\n";
-print STDERR " LANGUAGE PAIR: $lp\n";
-die "I don't know about that language pair\n" unless $paths{$lp};
-my $corpdir = "$dataDir";
-if ($paths{$lp} =~ /^\//) { $corpdir = $paths{$lp}; } else { $corpdir .= '/' . $paths{$lp}; }
-die "I can't find the corpora directory: $corpdir" unless -d $corpdir;
-print STDERR " GRAMMAR: $grammar\n";
-my $LANG_MODEL = mydircat($corpdir, $lms{$lp});
-print STDERR " LM: $LANG_MODEL\n";
-my $CORPUS = mydircat($corpdir, $corpora{$lp});
-die "Can't find corpus: $CORPUS" unless -f $CORPUS;
-
-my $dev = mydircat($corpdir, $devs{$lp});
-my $drefs = $devrefs{$lp};
-die "Can't find dev: $dev\n" unless -f $dev;
-die "Dev refs not set" unless $drefs;
-$drefs = mydircat($corpdir, $drefs);
-
-my $test = mydircat($corpdir, $tests{$lp});
-my $teval = mydircat($corpdir, $testevals{$lp});
-die "Can't find test: $test\n" unless -f $test;
-assert_exec($teval);
-
-if ($XFEATS) {
- my $xgram = mydircat($corpdir, $xgrammars{$lp});
- die "Can't find x-grammar: $xgram" unless -f $xgram;
- $EXTRA_FILTER = "$ADDXFEATS $xgram |";
- print STDERR "ADDING X-FEATS FROM $xgram\n";
-}
-
-# MAKE DEV
-print STDERR "\nFILTERING FOR dev...\n";
-print STDERR "DEV: $dev (REFS=$drefs)\n";
-`mkdir -p $outdir`;
-my $devgrammar = filter($grammar, $dev, 'dev', $outdir);
-my $devini = mydircat($outdir, "cdec-dev.ini");
-write_cdec_ini($devini, $devgrammar);
-
-
-# MAKE TEST
-print STDERR "\nFILTERING FOR test...\n";
-print STDERR "TEST: $test (EVAL=$teval)\n";
-`mkdir -p $outdir`;
-my $testgrammar = filter($grammar, $test, 'test', $outdir);
-my $testini = mydircat($outdir, "cdec-test.ini");
-write_cdec_ini($testini, $testgrammar);
-
-
-# CREATE INIT WEIGHTS
-print STDERR "\nCREATING INITIAL WEIGHTS FILE: weights.init\n";
-my $weights = mydircat($outdir, "weights.init");
-write_random_weights_file($weights);
-
-
-# VEST
-print STDERR "\nMINIMUM ERROR TRAINING\n";
-my $tuned_weights = mydircat($outdir, 'weights.tuned');
-if (-f $tuned_weights) {
- print STDERR "TUNED WEIGHTS $tuned_weights EXISTS: REUSING\n";
-} else {
- my $cmd = "$DISTVEST --ref-files=$drefs --source-file=$dev --weights $weights $devini";
- print STDERR "MERT COMMAND: $cmd\n";
- `rm -rf $outdir/vest 2> /dev/null`;
- chdir $outdir or die "Can't chdir to $outdir: $!";
- $weights = `$cmd`;
- die "MERT reported non-zero exit code" unless $? == 0;
- chomp $weights;
- safesystem($tuned_weights, "cp $weights $tuned_weights");
- print STDERR "TUNED WEIGHTS: $tuned_weights\n";
- die "$tuned_weights is missing!" unless -f $tuned_weights;
-}
-
-# DECODE
-print STDERR "\nDECODE TEST SET\n";
-my $decolog = mydircat($outdir, "test-decode.log");
-my $testtrans = mydircat($outdir, "test.trans");
-my $cmd = "cat $test | $PARALLELIZE -j 20 -e $decolog -- $CDEC -c $testini -w $tuned_weights > $testtrans";
-safesystem($testtrans, $cmd) or die "Failed to decode test set!";
-
-
-# EVALUATE
-print STDERR "\nEVALUATE TEST SET\n";
-print STDERR "TEST: $testtrans\n";
-$cmd = "$teval $testtrans";
-safesystem(undef, $cmd) or die "Failed to evaluate!";
-exit 0;
-
-
-sub write_random_weights_file {
- my ($file, @extras) = @_;
- open F, ">$file" or die "Can't write $file: $!";
- my @feats = (@DEFAULT_FEATS, @extras);
- if ($XFEATS) {
- my @xfeats = qw(
- X_LogRuleCount X_LogECount X_LogFCount X_EGivenF X_FGivenE X_SingletonRule X_SingletonE X_SingletonF
- );
- @feats = (@feats, @xfeats);
- }
- for my $feat (@feats) {
- my $r = rand(1.6);
- my $w = $init_weights{$feat} * $r;
- if ($w == 0) { $w = 0.0001; print STDERR "WARNING: $feat had no initial weight!\n"; }
- print F "$feat $w\n";
- }
- close F;
-}
-
-sub filter {
- my ($grammar, $set, $name, $outdir) = @_;
- my $outgrammar = mydircat($outdir, "$name.scfg.gz");
- if (-f $outgrammar) { print STDERR "$outgrammar exists - REUSING!\n"; } else {
- my $cmd = "gunzip -c $grammar | $FILTSCORE -c $CORPUS -t $set | $EXTRA_FILTER gzip > $outgrammar";
- safesystem($outgrammar, $cmd) or die "Can't filter and score grammar!";
- }
- return $outgrammar;
-}
-
-sub mydircat {
- my ($base, $suffix) = @_;
- if ($suffix =~ /^\//) { return $suffix; }
- my $res = $base . '/' . $suffix;
- $res =~ s/\/\//\//g;
- return $res;
-}
-
-sub write_cdec_ini {
- my ($filename, $grammar_path) = (@_);
- open CDECINI, ">$filename" or die "Can't write $filename: $!";
- print CDECINI <<EOT;
-formalism=scfg
-cubepruning_pop_limit=100
-add_pass_through_rules=true
-scfg_extra_glue_grammar=/export/ws10smt/data/glue/glue.scfg.gz
-grammar=$grammar_path
-feature_function=WordPenalty
-feature_function=LanguageModel -o 3 $LANG_MODEL
-EOT
- close CDECINI;
-};
-
-sub print_help {
- print STDERR<<EOT;
-
-Usage: $0 [OPTIONS] language-pair unfiltered-grammar.gz
-
-Given an induced grammar for an entire corpus (i.e., generated by
-local-gi-pipeline.pl), filter and featurize it for a dev and test set,
-run MERT, report scores.
-
-EOT
-}
-
-sub safesystem {
- my $output = shift @_;
- print STDERR "Executing: @_\n";
- system(@_);
- if ($? == -1) {
- print STDERR "ERROR: Failed to execute: @_\n $!\n";
- if (defined $output && -e $output) { printf STDERR "Removing $output\n"; `rm -rf $output`; }
- exit(1);
- }
- elsif ($? & 127) {
- printf STDERR "ERROR: Execution of: @_\n died with signal %d, %s coredump\n",
- ($? & 127), ($? & 128) ? 'with' : 'without';
- if (defined $output && -e $output) { printf STDERR "Removing $output\n"; `rm -rf $output`; }
- exit(1);
- }
- else {
- my $exitcode = $? >> 8;
- if ($exitcode) {
- print STDERR "Exit code: $exitcode\n";
- if (defined $output && -e $output) { printf STDERR "Removing $output\n"; `rm -rf $output`; }
- }
- return ! $exitcode;
- }
-}
-
-sub assert_exec {
- my @files = @_;
- for my $file (@files) {
- die "Can't find $file - did you run make?\n" unless -e $file;
- die "Can't execute $file" unless -e $file;
- }
-};
-
diff --git a/gi/pipeline/backoff-pipe.pl b/gi/pipeline/backoff-pipe.pl
deleted file mode 100644
index ac103c8b..00000000
--- a/gi/pipeline/backoff-pipe.pl
+++ /dev/null
@@ -1,215 +0,0 @@
-#!/usr/bin/perl -w
-use strict;
-
-use Getopt::Long "GetOptions";
-
-my @grammars;
-my $OUTPUTPREFIX = './giwork/bo.hier.grammar';
-safemkdir($OUTPUTPREFIX);
-my $backoff_levels = 1;
-my $glue_levels = 1;
-
-usage() unless &GetOptions('grmr=s@' => \ @grammars,
- 'outprefix=s' => \ $OUTPUTPREFIX,
- 'bo-lvls=i' => \ $backoff_levels,
- 'glue-lvls=i' => \ $glue_levels,
-);
-
-my $OUTDIR = $OUTPUTPREFIX . '/hier';
-print STDERR "@grammars\n";
-
-
-my %grmr = ();
-foreach my $grammar (@grammars) {
- $grammar =~ m/\/[^\/]*\.t(\d+)\.[^\/]*/;
- $grmr{$1} = $grammar;
-}
-
-my @index = sort keys %grmr;
-$OUTDIR = $OUTDIR . join('-',@index);
-safemkdir($OUTDIR);
-my $BACKOFF_GRMR = $OUTDIR . '/backoff.hier.gz';
-safesystem("echo \"\" | gzip > $BACKOFF_GRMR");
-my $GLUE_GRMR = $OUTDIR . '/glue.hier.gz';
-safesystem("echo \"\" | gzip > $GLUE_GRMR");
-my $joinedgrammars = $OUTDIR . '/grammar.hier.gz';
-
-join_grammars();
-
-for my $i (0..(scalar @index)-2) {
- my $freqs = extract_freqs($index[$i], $index[$i+1]);
- if ($i < $backoff_levels) {
- create_backoff_rules($index[$i],$index[$i+1],$freqs);
- }
- if ($i < $glue_levels) {
- add_glue_rules($index[$i]);
- }
-}
-
-output_grammar_info();
-
-
-sub usage {
- print <<EOT;
-
-Usage: $0 [OPTIONS] corpus.fr-en-al
-
-Induces a grammar using Pitman-Yor topic modeling or Posterior Regularisation.
-
-EOT
- exit 1;
-};
-
-sub safemkdir {
- my $dir = shift;
- if (-d $dir) { return 1; }
- return mkdir($dir);
-}
-
-
-sub safesystem {
- print STDERR "Executing: @_\n";
- system(@_);
- if ($? == -1) {
- print STDERR "ERROR: Failed to execute: @_\n $!\n";
- exit(1);
- }
- elsif ($? & 127) {
- printf STDERR "ERROR: Execution of: @_\n died with signal %d, %s coredump\n",
- ($? & 127), ($? & 128) ? 'with' : 'without';
- exit(1);
- }
- else {
- my $exitcode = $? >> 8;
- print STDERR "Exit code: $exitcode\n" if $exitcode;
- return ! $exitcode;
- }
-}
-
-
-sub join_grammars {
- print STDERR "\n!!! JOINING GRAMMARS\n";
- if(-e $joinedgrammars) {
- print STDERR "$joinedgrammars exists, reusing...\n";
- return;
- }
- safesystem("echo \"\" | gzip > $joinedgrammars");
- foreach my $i (@index) {
- my $g = $grmr{$i};
- safesystem("zcat $g | sed -r -e 's/X([0-9]+)/X$i\\1/g' - | gzip > $g.2.gz");
- safesystem("zcat $joinedgrammars $g.2.gz | gzip > $joinedgrammars.2.gz");
- safesystem("mv $joinedgrammars.2.gz $joinedgrammars");
- }
-}
-
-
-sub extract_freqs {
- my($grmr1,$grmr2) = @_;
- print STDERR "\n!!!EXTRACTING FREQUENCIES: $grmr1->$grmr2\n";
- my $IN_COARSE = substr($grmr{$grmr1},0,index($grmr{$grmr1},".grammar/")) . "/labeled_spans.txt";
- my $IN_FINE = substr($grmr{$grmr2},0,index($grmr{$grmr2},".grammar/")) . "/labeled_spans.txt";
- my $OUT_SPANS = "$OUTDIR/labeled_spans.hier$grmr1-$grmr2.txt";
- my $FREQS = "$OUTDIR/label_freq.hier$grmr1-$grmr2.txt";
- if(-e $OUT_SPANS && -e $FREQS) {
- print STDERR "$OUT_SPANS exists, reusing...\n";
- print STDERR "$FREQS exists, reusing...\n";
- return $FREQS;
- }
-
- safesystem("paste -d ' ' $IN_COARSE $IN_FINE > $OUT_SPANS");
-
- my %FREQ_HIER = ();
- my %finehier = ();
-
- open SPANS, $OUT_SPANS or die $!;
- while (<SPANS>) {
- my ($tmp, $coarse, $fine) = split /\|\|\|/;
- my @coarse_spans = $coarse =~ /\d+-\d+:X(\d+)/g;
- my @fine_spans = $fine =~ /\d+-\d+:X(\d+)/g;
-
- foreach my $i (0..(scalar @coarse_spans)-1) {
- my $coarse_cat = $coarse_spans[$i];
- my $fine_cat = $fine_spans[$i];
-
- $FREQ_HIER{$coarse_cat}{$fine_cat}++;
- }
- }
- close SPANS;
- foreach (values %FREQ_HIER) {
- my $coarse_freq = $_;
- my $total = 0;
- $total+=$_ for (values %{ $coarse_freq });
- $coarse_freq->{$_}=log($coarse_freq->{$_}/$total) for (keys %{ $coarse_freq });
- }
- open FREQS, ">", $FREQS or die $!;
- foreach my $coarse_cat (keys %FREQ_HIER) {
- print FREQS "$coarse_cat |||";
- foreach my $fine_cat (keys %{$FREQ_HIER{$coarse_cat}}) {
- my $freq = $FREQ_HIER{$coarse_cat}{$fine_cat};
- print FREQS " $fine_cat:$freq";
- if(! exists $finehier{$fine_cat} || $finehier{$fine_cat} < $freq) {
- $finehier{$fine_cat} = $coarse_cat;
- }
- }
- print FREQS "\n";
- }
-# foreach my $fine_cat (keys %finehier) {
-# print FREQS "$fine_cat -> $finehier{$fine_cat}\n";
-# }
- close FREQS;
- return $FREQS;
-}
-
-
-sub create_backoff_rules {
- print STDERR "\n!!! CREATING BACKOFF RULES\n";
- my ($grmr1, $grmr2, $freq) = @_;
- my $OUTFILE = "$OUTDIR/backoff.hier$grmr1-$grmr2.txt";
- if(-e $OUTFILE) {
- print STDERR "$OUTFILE exists, reusing...\n";
- return;
- }
- open FREQS, $freq or die $!;
- open TMP, ">", $OUTFILE or die $!;
- while (<FREQS>) {
- my $line = $_;
- $line = m/^(\d+) \|\|\| (.+)$/;
- my $coarse = $1;
- $line = $2;
- my @finefreq = $line =~ m/(\d+):(\S+)/g;
- for(my $i = 0; $i < scalar @finefreq; $i+=2) {
- my $finecat = $finefreq[$i];
- my $finefreq = $finefreq[$i+1];
- print TMP "[X$grmr1$coarse] ||| [X$grmr2$finecat,1]\t[1] ||| BackoffRule=$finefreq A=0-0\n";
- }
- }
- close TMP;
- close FREQS;
- safesystem("zcat $BACKOFF_GRMR | cat - $OUTFILE | gzip > $BACKOFF_GRMR.2.gz");
- safesystem("mv $BACKOFF_GRMR.2.gz $BACKOFF_GRMR");
-}
-
-sub add_glue_rules {
- print STDERR "\n!!! CREATING GLUE RULES\n";
- my ($grmr) = @_;
- my $OUTFILE = "$OUTDIR/glue.$grmr.gz";
- if (-e $OUTFILE) {
- print STDERR "$OUTFILE exists, reusing...\n";
- return;
- }
- open TMP, ">", $OUTFILE or die $!;
- for my $i (0..($grmr-1)) {
- print TMP "[S] ||| [S,1] [X$grmr$i,2] ||| [1] [2] ||| Glue=1\n";
- print TMP "[S] ||| [X$grmr$i,1] ||| [1] ||| GlueTop=1\n";
- }
- close TMP;
- safesystem("zcat $GLUE_GRMR | cat - $OUTFILE | gzip > $GLUE_GRMR.2.gz");
- safesystem("mv $GLUE_GRMR.2.gz $GLUE_GRMR");
-}
-
-sub output_grammar_info {
- print STDERR "\n!!! GRAMMAR INFORMATION\n";
- print STDOUT "GRAMMAR: \t$joinedgrammars\n";
- print STDOUT "GLUE: \t$GLUE_GRMR\n";
- print STDOUT "BACKOFF: \t$BACKOFF_GRMR\n";
-}
diff --git a/gi/pipeline/blacklight.config b/gi/pipeline/blacklight.config
deleted file mode 100644
index fc59a604..00000000
--- a/gi/pipeline/blacklight.config
+++ /dev/null
@@ -1,9 +0,0 @@
-# THIS FILE GIVES THE LOCATIONS OF THE CORPORA USED
-# name path aligned-corpus LM dev dev-refs test1 testt-eval.sh ...
-/usr/users/0/cdyer/ws10smt/data
-btec /home/cdyer/ws10smt-data/btec/ split.zh-en.al lm/en.3gram.lm.gz devtest/devset1_2.zh devtest/devset1_2.lc.en* devtest/devset3.zh eval-devset3.sh
-zhen /home/cdyer/ws10smt-data/chinese-english corpus.zh-en.al lm/c2e.3gram.lm.gz dev_and_test/mt02.src.txt dev_and_test/mt02.ref.* dev_and_test/mt03.src.txt eval-mt03.sh
-aren /home/cdyer/ws10smt-data/arabic-english corpus.ar-en-al lm/a2e.3gram.lm.gz dev_and_test/dev.src.txt dev_and_test/dev.ref.txt.* dev_and_test/mt05.src.txt eval-mt05.sh
-uren /usr/users/0/cdyer/ws10smt/data/urdu-english corpus.ur-en.al lm/u2e.en.lm.gz dev/dev.ur dev/dev.en* devtest/devtest.ur eval-devtest.sh
-nlfr /home/cdyer/ws10smt-data/dutch-french corpus.nl-fr.al
-
diff --git a/gi/pipeline/clsp.config b/gi/pipeline/clsp.config
deleted file mode 100644
index c23d409f..00000000
--- a/gi/pipeline/clsp.config
+++ /dev/null
@@ -1,10 +0,0 @@
-# THIS FILE GIVES THE LOCATIONS OF THE CORPORA USED
-# name path aligned-corpus LM dev dev-refs test1 testt-eval.sh ...
-/export/ws10smt/data
-btec /export/ws10smt/data/btec/ split.zh-en.al lm/en.3gram.lm.gz devtest/devset1_2.zh devtest/devset1_2.lc.en* devtest/devset3.zh eval-devset3.sh
-fbis /export/ws10smt/data/chinese-english.fbis corpus.zh-en.al
-zhen /export/ws10smt/data/chinese-english corpus.zh-en.al lm/c2e.3gram.lm.gz dev_and_test/mt02.src.txt dev_and_test/mt02.ref.* dev_and_test/mt03.src.txt eval-mt03.sh
-aren /export/ws10smt/data/arabic-english corpus.ar-en-al lm/a2e.3gram.lm.gz dev_and_test/dev.src.txt dev_and_test/dev.ref.txt.* dev_and_test/mt05.src.txt eval-mt05.sh
-uren /export/ws10smt/data/urdu-english corpus.ur-en.al lm/u2e.en.lm.gz dev/dev.ur dev/dev.en* devtest/devtest.ur eval-devtest.sh
-nlfr /export/ws10smt/data/dutch-french corpus.nl-fr.al
-
diff --git a/gi/pipeline/evaluation-pipeline.pl b/gi/pipeline/evaluation-pipeline.pl
deleted file mode 100755
index 4b4529d9..00000000
--- a/gi/pipeline/evaluation-pipeline.pl
+++ /dev/null
@@ -1,364 +0,0 @@
-#!/usr/bin/perl -w
-use strict;
-use Getopt::Long;
-use Cwd;
-my $CWD = getcwd;
-
-my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../../environment"; }
-use LocalConfig;
-
-my $JOBS = 15;
-my $PMEM = "9G";
-my $NUM_TRANSLATIONS = 50;
-my $GOAL = "S";
-
-# featurize_grammar may add multiple features from a single feature extractor
-# the key in this map is the extractor name, the value is a list of the extracted features
-my $feat_map = {
- "LogRuleCount" => [ "LogRuleCount", "SingletonRule" ] ,
-# "XFeatures" => [ "XFE","XEF" ] ,
- "XFeatures" => [ "XFE","XEF","LabelledEF","LabelledFE"], # ,"XE_Singleton","XF_Singleton"] ,
- "LabelledRuleConditionals" => [ "LabelledFE","LabelledEF" ] ,
- "LexProb" => [ "LexE2F", "LexF2E" ] ,
- "BackoffRule" => [ "BackoffRule" ] ,
- "RulePenalty" => [ "RulePenalty" ] ,
- "LHSProb" => [ "LHSProb" ] ,
- "LabellingShape" => [ "LabellingShape" ] ,
- "GenerativeProb" => [ "GenerativeProb" ] ,
-};
-
-my %init_weights = qw(
- EGivenF -0.735245
- FGivenE -0.219391
- Glue -0.306709
- GlueTop 0.0473331
- LanguageModel 2.40403
- LexE2F -0.266989
- LexF2E -0.550373
- LogECount -0.129853
- LogFCount -0.194037
- LogRuleCount 0.256706
- BackoffRule 0.5
- XFE -0.256706
- XEF -0.256706
- XF_Singleton -0.05
- XE_Singleton -0.8
- LabelledFE -0.256706
- LabelledEF -0.256706
- PassThrough -0.9304905
- SingletonE -3.04161
- SingletonF 0.0714027
- SingletonRule -0.889377
- WordPenalty -1.99495
- RulePenalty -0.1
- LabellingShape -0.1
- LHSProb -0.1
- GenerativeProb -0.1
-);
-
-
-# these features are included by default
-my @DEFAULT_FEATS = qw( PassThrough Glue GlueTop LanguageModel WordPenalty );
-
-
-
-my $FILTERBYF = "$SCRIPT_DIR/scripts/filter-by-f.pl";
-my $CDEC = "$SCRIPT_DIR/../../decoder/cdec";
-my $PARALLELIZE = "$SCRIPT_DIR/../../vest/parallelize.pl";
-my $EXTOOLS = "$SCRIPT_DIR/../../extools";
-die "Can't find extools: $EXTOOLS" unless -e $EXTOOLS && -d $EXTOOLS;
-my $VEST = "$SCRIPT_DIR/../../vest";
-die "Can't find vest: $VEST" unless -e $VEST && -d $VEST;
-my $DISTVEST = "$VEST/dist-vest.pl";
-my $FILTER = "$EXTOOLS/filter_grammar";
-my $FEATURIZE = "$EXTOOLS/featurize_grammar";
-assert_exec($CDEC, $PARALLELIZE, $FILTER, $FEATURIZE, $DISTVEST, $FILTERBYF);
-
-my $numtopics = 25;
-
-my $config = "$SCRIPT_DIR/" . (lc environment_name()) . '.config';
-print STDERR "CORPORA CONFIGURATION: $config\n";
-open CONF, "<$config" or die "Can't read $config: $!";
-my %paths;
-my %corpora;
-my %lms;
-my %devs;
-my %devrefs;
-my %tests;
-my %testevals;
-my $datadir;
-print STDERR " LANGUAGE PAIRS:";
-while(<CONF>) {
- chomp;
- next if /^#/;
- next if /^\s*$/;
- s/^\s+//;
- s/\s+$//;
- if (! defined $datadir) { $datadir = $_; next; }
- my ($name, $path, $corpus, $lm, $dev, $devref, @xtests) = split /\s+/;
- $paths{$name} = $path;
- $corpora{$name} = $corpus;
- $lms{$name} = $lm;
- $devs{$name} = $dev;
- $devrefs{$name} = $devref;
- $tests{$name} = $xtests[0];
- $testevals{$name} = $xtests[1];
- print STDERR " $name";
-}
-print STDERR "\n";
-
-my %langpairs = map { $_ => 1 } qw( btec zhen fbis aren uren nlfr );
-
-my $outdir = "$CWD/exp";
-my $help;
-my $FEATURIZER_OPTS = '';
-my $dataDir = '/export/ws10smt/data';
-my @features;
-my $bkoffgram;
-my $gluegram;
-my $oovgram;
-my $usefork;
-my $lmorder = 3;
-my $density;
-if (GetOptions(
- "backoff-grammar=s" => \$bkoffgram,
- "density-prune=f" => \$density,
- "glue-grammar=s" => \$gluegram,
- "oov-grammar=s" => \$oovgram,
- "data=s" => \$dataDir,
- "pmem=s" => \$PMEM,
- "n=i" => \$NUM_TRANSLATIONS,
- "features=s@" => \@features,
- "use-fork" => \$usefork,
- "jobs=i" => \$JOBS,
- "out-dir=s" => \$outdir,
- "lmorder=i" => \$lmorder,
- "goal=s" => \$GOAL,
-) == 0 || @ARGV!=2 || $help) {
- print_help();
- exit;
-}
-my $DENSITY_PRUNE = '';
-if ($density) {
- $DENSITY_PRUNE = "--density-prune $density";
-}
-if ($usefork) { $usefork="--use-fork"; } else { $usefork = ''; }
-my @fkeys = keys %$feat_map;
-die "You must specify one or more features with -f. Known features: @fkeys\n" unless scalar @features > 0;
-my @xfeats;
-for my $feat (@features) {
- my $rs = $feat_map->{$feat};
- if (!defined $rs) { die "DON'T KNOW ABOUT FEATURE $feat\n"; }
- my @xfs = @$rs;
- @xfeats = (@xfeats, @xfs);
- $FEATURIZER_OPTS .= " -f $feat" unless $feat eq "BackoffRule";
-}
-print STDERR "X-FEATS: @xfeats\n";
-
-my $lp = $ARGV[0];
-my $grammar = $ARGV[1];
-print STDERR " CORPUS REPO: $dataDir\n";
-print STDERR " LANGUAGE PAIR: $lp\n";
-die "I don't know about that language pair\n" unless $paths{$lp};
-my $corpdir = "$dataDir";
-if ($paths{$lp} =~ /^\//) { $corpdir = $paths{$lp}; } else { $corpdir .= '/' . $paths{$lp}; }
-die "I can't find the corpora directory: $corpdir" unless -d $corpdir;
-print STDERR " GRAMMAR: $grammar\n";
-my $LANG_MODEL = mydircat($corpdir, $lms{$lp});
-print STDERR " LM: $LANG_MODEL\n";
-my $CORPUS = mydircat($corpdir, $corpora{$lp});
-die "Can't find corpus: $CORPUS" unless -f $CORPUS;
-
-my $dev = mydircat($corpdir, $devs{$lp});
-my $drefs = $devrefs{$lp};
-die "Can't find dev: $dev\n" unless -f $dev;
-die "Dev refs not set" unless $drefs;
-$drefs = mydircat($corpdir, $drefs);
-
-my $test = mydircat($corpdir, $tests{$lp});
-my $teval = mydircat($corpdir, $testevals{$lp});
-#die "Can't find test: $test\n" unless -f $test;
-#assert_exec($teval);
-
-`mkdir -p $outdir`;
-
-# CREATE INIT WEIGHTS
-print STDERR "\nCREATING INITIAL WEIGHTS FILE: weights.init\n";
-my $weights = mydircat($outdir, "weights.init");
-write_random_weights_file($weights, @xfeats);
-
-my $bkoff_grmr;
-my $glue_grmr;
-if($bkoffgram) {
- print STDERR "Placing backoff grammar…\n";
- $bkoff_grmr = mydircat($outdir, "backoff.scfg.gz");
- print STDERR "cp $bkoffgram $bkoff_grmr\n";
- safesystem(undef,"cp $bkoffgram $bkoff_grmr");
-}
-if($gluegram) {
- print STDERR "Placing glue grammar…\n";
- $glue_grmr = mydircat($outdir, "glue.bo.scfg.gz");
- print STDERR "cp $gluegram $glue_grmr\n";
- safesystem(undef,"cp $gluegram $glue_grmr");
-}
-
-# MAKE DEV
-print STDERR "\nFILTERING FOR dev...\n";
-print STDERR "DEV: $dev (REFS=$drefs)\n";
-my $devgrammar = filter($grammar, $dev, 'dev', $outdir);
-my $devini = mydircat($outdir, "cdec-dev.ini");
-write_cdec_ini($devini, $devgrammar);
-
-
-# MAKE TEST
-print STDERR "\nFILTERING FOR test...\n";
-print STDERR "TEST: $test (EVAL=$teval)\n";
-`mkdir -p $outdir`;
-my $testgrammar = filter($grammar, $test, 'test', $outdir);
-my $testini = mydircat($outdir, "cdec-test.ini");
-write_cdec_ini($testini, $testgrammar);
-
-
-# VEST
-print STDERR "\nMINIMUM ERROR TRAINING\n";
-my $tuned_weights = mydircat($outdir, 'weights.tuned');
-if (-f $tuned_weights) {
- print STDERR "TUNED WEIGHTS $tuned_weights EXISTS: REUSING\n";
-} else {
- my $cmd = "$DISTVEST $usefork $DENSITY_PRUNE --decode-nodes $JOBS --pmem=$PMEM --ref-files=$drefs --source-file=$dev --weights $weights $devini";
- print STDERR "MERT COMMAND: $cmd\n";
- `rm -rf $outdir/vest 2> /dev/null`;
- chdir $outdir or die "Can't chdir to $outdir: $!";
- $weights = `$cmd`;
- die "MERT reported non-zero exit code" unless $? == 0;
- chomp $weights;
- safesystem($tuned_weights, "cp $weights $tuned_weights");
- print STDERR "TUNED WEIGHTS: $tuned_weights\n";
- die "$tuned_weights is missing!" unless -f $tuned_weights;
-}
-
-# DECODE
-print STDERR "\nDECODE TEST SET\n";
-my $decolog = mydircat($outdir, "test-decode.log");
-my $testtrans = mydircat($outdir, "test.trans");
-my $cmd = "cat $test | $PARALLELIZE $usefork -j $JOBS -e $decolog -- $CDEC -c $testini -w $tuned_weights > $testtrans";
-safesystem($testtrans, $cmd) or die "Failed to decode test set!";
-
-
-# EVALUATE
-print STDERR "\nEVALUATE TEST SET\n";
-print STDERR "TEST: $testtrans\n";
-$cmd = "$teval $testtrans";
-safesystem(undef, $cmd) or die "Failed to evaluate!";
-exit 0;
-
-
-sub write_random_weights_file {
- my ($file, @extras) = @_;
- if (-f $file) {
- print STDERR "$file exists - REUSING!\n";
- return;
- }
- open F, ">$file" or die "Can't write $file: $!";
- my @feats = (@DEFAULT_FEATS, @extras);
- for my $feat (@feats) {
- my $r = rand(0.4) + 0.8;
- my $w = $init_weights{$feat} * $r;
- if ($w == 0) { $w = 0.0001; print STDERR "WARNING: $feat had no initial weight!\n"; }
- print F "$feat $w\n";
- }
- close F;
-}
-
-sub filter {
- my ($grammar, $set, $name, $outdir) = @_;
- my $out1 = mydircat($outdir, "$name.filt.gz");
- my $out2 = mydircat($outdir, "$name.f_feat.gz");
- my $outgrammar = mydircat($outdir, "$name.scfg.gz");
- if (-f $outgrammar) { print STDERR "$outgrammar exists - REUSING!\n"; } else {
- my $cmd = "gunzip -c $grammar | $FILTER -t $set | gzip > $out1";
- safesystem($out1, $cmd) or die "Filtering failed.";
- $cmd = "gunzip -c $out1 | $FEATURIZE $FEATURIZER_OPTS -g $out1 -c $CORPUS | gzip > $out2";
- safesystem($out2, $cmd) or die "Featurizing failed";
- $cmd = "$FILTERBYF $NUM_TRANSLATIONS $out2 $outgrammar";
- safesystem($outgrammar, $cmd) or die "Secondary filtering failed";
- }
- return $outgrammar;
-}
-
-sub mydircat {
- my ($base, $suffix) = @_;
- if ($suffix =~ /^\//) { return $suffix; }
- my $res = $base . '/' . $suffix;
- $res =~ s/\/\//\//g;
- return $res;
-}
-
-sub write_cdec_ini {
- my ($filename, $grammar_path) = (@_);
- open CDECINI, ">$filename" or die "Can't write $filename: $!";
- my $glue = ($gluegram ? "$glue_grmr" : "$datadir/glue/glue.scfg.gz");
- my $oov = ($oovgram ? "$oovgram" : "$datadir/oov.scfg.gz");
- print CDECINI <<EOT;
-formalism=scfg
-cubepruning_pop_limit=100
-add_pass_through_rules=true
-scfg_extra_glue_grammar=$glue
-grammar=$oov
-grammar=$grammar_path
-scfg_default_nt=OOV
-scfg_no_hiero_glue_grammar=true
-feature_function=WordPenalty
-feature_function=LanguageModel -o $lmorder $LANG_MODEL
-goal=$GOAL
-EOT
- print CDECINI "grammar=$bkoff_grmr\n" if $bkoffgram;
- close CDECINI;
-};
-
-sub print_help {
- print STDERR<<EOT;
-
-Usage: $0 [-c data-config-file] [-n N] language-pair grammar.bidir.gz [OPTIONS]
-
-Given an induced grammar for an entire corpus (i.e., generated by
-local-gi-pipeline.pl), filter and featurize it for a dev and test set,
-run MERT, report scores. Use -n to specify the number of translations
-to keep for a given source (30 is default).
-
-EOT
-}
-
-sub safesystem {
- my $output = shift @_;
- print STDERR "Executing: @_\n";
- system(@_);
- if ($? == -1) {
- print STDERR "ERROR: Failed to execute: @_\n $!\n";
- if (defined $output && -e $output) { printf STDERR "Removing $output\n"; `rm -rf $output`; }
- exit(1);
- }
- elsif ($? & 127) {
- printf STDERR "ERROR: Execution of: @_\n died with signal %d, %s coredump\n",
- ($? & 127), ($? & 128) ? 'with' : 'without';
- if (defined $output && -e $output) { printf STDERR "Removing $output\n"; `rm -rf $output`; }
- exit(1);
- }
- else {
- my $exitcode = $? >> 8;
- if ($exitcode) {
- print STDERR "Exit code: $exitcode\n";
- if (defined $output && -e $output) { printf STDERR "Removing $output\n"; `rm -rf $output`; }
- }
- return ! $exitcode;
- }
-}
-
-sub assert_exec {
- my @files = @_;
- for my $file (@files) {
- die "Can't find $file - did you run make?\n" unless -e $file;
- die "Can't execute $file" unless -e $file;
- }
-};
-
diff --git a/gi/pipeline/local-gi-pipeline.pl b/gi/pipeline/local-gi-pipeline.pl
deleted file mode 100755
index e31167a2..00000000
--- a/gi/pipeline/local-gi-pipeline.pl
+++ /dev/null
@@ -1,465 +0,0 @@
-#!/usr/bin/perl -w
-use strict;
-use File::Copy;
-
-my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path cwd /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR; }
-
-use Getopt::Long "GetOptions";
-
-my $GZIP = 'gzip';
-my $ZCAT = 'gunzip -c';
-my $SED = 'sed -e';
-my $BASE_PHRASE_MAX_SIZE = 10;
-my $COMPLETE_CACHE = 1;
-my $ITEMS_IN_MEMORY = 10000000; # cache size in extractors
-my $NUM_TOPICS = 50;
-my $NUM_TOPICS_COARSE;
-my $NUM_TOPICS_FINE = $NUM_TOPICS;
-my $NUM_SAMPLES = 1000;
-my $CONTEXT_SIZE = 1;
-my $BIDIR = 0;
-my $TOPICS_CONFIG = "pyp-topics.conf";
-my $LANGUAGE = "target";
-my $LABEL_THRESHOLD = "0";
-my $PRESERVE_PHRASES;
-
-my $MODEL = "pyp";
-my $NUM_ITERS = 100;
-my $PR_SCALE_P = 0;
-my $PR_SCALE_C = 0;
-my $PR_FLAGS = "";
-my $MORFMARK = "";
-
-my $EXTOOLS = "$SCRIPT_DIR/../../extools";
-die "Can't find extools: $EXTOOLS" unless -e $EXTOOLS && -d $EXTOOLS;
-my $PYPTOOLS = "$SCRIPT_DIR/../pyp-topics/src";
-die "Can't find pyp-topics: $PYPTOOLS" unless -e $PYPTOOLS && -d $PYPTOOLS;
-my $PYPSCRIPTS = "$SCRIPT_DIR/../pyp-topics/scripts";
-die "Can't find pyp-topics: $PYPSCRIPTS" unless -e $PYPSCRIPTS && -d $PYPSCRIPTS;
-my $PRTOOLS = "$SCRIPT_DIR/../posterior-regularisation";
-die "Can't find posterior-regularisation: $PRTOOLS" unless -e $PRTOOLS && -d $PRTOOLS;
-my $REDUCER = "$EXTOOLS/mr_stripe_rule_reduce";
-my $C2D = "$PYPSCRIPTS/contexts2documents.py";
-my $S2L = "$PYPSCRIPTS/spans2labels.py";
-my $SPLIT = "$SCRIPT_DIR/../posterior-regularisation/split-languages.py";
-
-my $PREM_TRAIN="$PRTOOLS/prjava/train-PR-cluster.sh";
-
-my $SORT_KEYS = "$SCRIPT_DIR/scripts/sort-by-key.sh";
-my $PATCH_CORPUS = "$SCRIPT_DIR/scripts/patch-corpus.pl";
-my $REMOVE_TAGS_CORPUS = "$SCRIPT_DIR/scripts/remove-tags-from-corpus.pl";
-my $REMOVE_TAGS_CONTEXT = "$SCRIPT_DIR/scripts/remove-tags-from-contexts.pl";
-my $EXTRACTOR = "$EXTOOLS/extractor";
-my $TOPIC_TRAIN = "$PYPTOOLS/pyp-contexts-train";
-my $MORF_DOC_FILTER = "$SCRIPT_DIR/../morf-segmentation/filter_docs.pl";
-
-assert_exec($PATCH_CORPUS, $SORT_KEYS, $REDUCER, $EXTRACTOR,
- $S2L, $C2D, $TOPIC_TRAIN, $SPLIT, $REMOVE_TAGS_CONTEXT, $REMOVE_TAGS_CORPUS, $MORF_DOC_FILTER);
-
-my $BACKOFF_GRAMMAR;
-my $DEFAULT_CAT;
-my $HIER_CAT;
-my %FREQ_HIER = ();
-my $TAGGED_CORPUS;
-
-my $NAME_SHORTCUT;
-
-my $OUTPUT = './giwork';
-usage() unless &GetOptions('base_phrase_max_size=i' => \$BASE_PHRASE_MAX_SIZE,
- 'backoff_grammar' => \$BACKOFF_GRAMMAR,
- 'output=s' => \$OUTPUT,
- 'model=s' => \$MODEL,
- 'topics=i' => \$NUM_TOPICS_FINE,
- 'coarse_topics=i' => \$NUM_TOPICS_COARSE,
- 'trg_context=i' => \$CONTEXT_SIZE,
- 'samples=i' => \$NUM_SAMPLES,
- 'label_threshold=f' => \$LABEL_THRESHOLD,
- 'use_default_cat' => \$DEFAULT_CAT,
- 'topics-config=s' => \$TOPICS_CONFIG,
- 'iterations=i' => \$NUM_ITERS,
- 'pr-scale-phrase=f' => \$PR_SCALE_P,
- 'pr-scale-context=f' => \$PR_SCALE_C,
- 'pr-flags=s' => \$PR_FLAGS,
- 'tagged_corpus=s' => \$TAGGED_CORPUS,
- 'language=s' => \$LANGUAGE,
- 'get_name_only' => \$NAME_SHORTCUT,
- 'preserve_phrases' => \$PRESERVE_PHRASES,
- 'morf=s' => \$MORFMARK,
- );
-if ($NAME_SHORTCUT) {
- $NUM_TOPICS = $NUM_TOPICS_FINE;
- print STDERR labeled_dir();
- exit 0;
-}
-usage() unless scalar @ARGV == 1;
-my $CORPUS = $ARGV[0];
-open F, "<$CORPUS" or die "Can't read $CORPUS: $!"; close F;
-
-$NUM_TOPICS = $NUM_TOPICS_FINE;
-
-$HIER_CAT = ( $NUM_TOPICS_COARSE ? 1 : 0 );
-
-print STDERR " Output: $OUTPUT\n";
-my $DATA_DIR = $OUTPUT . '/corpora';
-my $LEX_NAME = "corpus.f_e_a.$LANGUAGE.lex";
-my $CORPUS_LEX = $DATA_DIR . '/' . $LEX_NAME; # corpus used to extract rules
-my $CORPUS_CLUSTER = $DATA_DIR . "/corpus.f_e_a.$LANGUAGE.cluster"; # corpus used for clustering (often identical)
-
-my $CONTEXT_DIR = $OUTPUT . '/' . context_dir();
-my $CLUSTER_DIR = $OUTPUT . '/' . cluster_dir();
-my $LABELED_DIR = $OUTPUT . '/' . labeled_dir();
-my $CLUSTER_DIR_C;
-my $CLUSTER_DIR_F;
-my $LABELED_DIR_C;
-my $LABELED_DIR_F;
-if($HIER_CAT) {
- $CLUSTER_DIR_F = $CLUSTER_DIR;
- $LABELED_DIR_F = $LABELED_DIR;
- $NUM_TOPICS = $NUM_TOPICS_COARSE;
- $CLUSTER_DIR_C = $OUTPUT . '/' . cluster_dir();
- $LABELED_DIR_C = $OUTPUT . '/' . labeled_dir();
- $NUM_TOPICS = $NUM_TOPICS_FINE;
-}
-my $GRAMMAR_DIR = $OUTPUT . '/' . grammar_dir();
-print STDERR " Context: $CONTEXT_DIR\n Cluster: $CLUSTER_DIR\n Labeled: $LABELED_DIR\n Grammar: $GRAMMAR_DIR\n";
-safemkdir($OUTPUT) or die "Couldn't create output directory $OUTPUT: $!";
-safemkdir($DATA_DIR) or die "Couldn't create output directory $DATA_DIR: $!";
-safemkdir($CONTEXT_DIR) or die "Couldn't create output directory $CONTEXT_DIR: $!";
-safemkdir($CLUSTER_DIR) or die "Couldn't create output directory $CLUSTER_DIR: $!";
-if($HIER_CAT) {
- safemkdir($CLUSTER_DIR_C) or die "Couldn't create output directory $CLUSTER_DIR_C: $!";
- safemkdir($LABELED_DIR_C) or die "Couldn't create output directory $LABELED_DIR_C: $!";
-}
-safemkdir($LABELED_DIR) or die "Couldn't create output directory $LABELED_DIR: $!";
-safemkdir($GRAMMAR_DIR) or die "Couldn't create output directory $GRAMMAR_DIR: $!";
-if(-e $TOPICS_CONFIG) {
- copy($TOPICS_CONFIG, $CLUSTER_DIR) or die "Copy failed: $!";
-}
-
-setup_data();
-
-if (lc($MODEL) eq "blagree") {
- extract_bilingual_context();
-} else {
- extract_context();
-}
-
-if (lc($MODEL) eq "pyp") {
- if($HIER_CAT) {
- $NUM_TOPICS = $NUM_TOPICS_COARSE;
- $CLUSTER_DIR = $CLUSTER_DIR_C;
- topic_train();
- $NUM_TOPICS = $NUM_TOPICS_FINE;
- $CLUSTER_DIR = $CLUSTER_DIR_F;
- topic_train();
- } else {
- topic_train();
- }
-} elsif (lc($MODEL) =~ /pr|em|agree/) {
- prem_train();
-} else { die "Unsupported model type: $MODEL. Must be one of PYP or PREM.\n"; }
-if($HIER_CAT) {
- $NUM_TOPICS = $NUM_TOPICS_COARSE;
- $CLUSTER_DIR = $CLUSTER_DIR_C;
- $LABELED_DIR = $LABELED_DIR_C;
- label_spans_with_topics();
- $NUM_TOPICS = $NUM_TOPICS_FINE;
- $CLUSTER_DIR = $CLUSTER_DIR_F;
- $LABELED_DIR = $LABELED_DIR_F;
- label_spans_with_topics();
- extract_freqs();
-} else {
- label_spans_with_topics();
-}
-my $res;
-if ($BIDIR) {
- $res = grammar_extract_bidir();
-} else {
- $res = grammar_extract();
-}
-print STDERR "\n!!!COMPLETE!!!\n";
-print STDERR "GRAMMAR: $res\nYou should probably run: $SCRIPT_DIR/evaluation-pipeline.pl LANGPAIR giwork/ct1s0.L10.PYP.t4.s20.grammar/grammar.gz -f FEAT1 -f FEAT2\n\n";
-exit 0;
-
-sub setup_data {
- print STDERR "\n!!!PREPARE CORPORA!!!\n";
- if (-f $CORPUS_LEX && $CORPUS_CLUSTER) {
- print STDERR "$CORPUS_LEX and $CORPUS_CLUSTER exist, reusing...\n";
- return;
- }
- copy($CORPUS, $CORPUS_LEX);
- if ($TAGGED_CORPUS) {
- die "Can't find $TAGGED_CORPUS" unless -f $TAGGED_CORPUS;
- my $opt="";
- $opt = "-s" if ($LANGUAGE eq "source");
- $opt = $opt . " -a" if ($PRESERVE_PHRASES);
- my $cmd="$PATCH_CORPUS $opt $TAGGED_CORPUS $CORPUS_LEX > $CORPUS_CLUSTER";
- safesystem($cmd) or die "Failed to extract contexts.";
- } else {
- symlink($LEX_NAME, $CORPUS_CLUSTER);
- }
-}
-
-sub context_dir {
- return "ct${CONTEXT_SIZE}s0.L$BASE_PHRASE_MAX_SIZE.l$LANGUAGE";
-}
-
-sub cluster_dir {
- if (lc($MODEL) eq "pyp") {
- return context_dir() . ".PYP.t$NUM_TOPICS.s$NUM_SAMPLES";
- } elsif (lc($MODEL) eq "em") {
- return context_dir() . ".EM.t$NUM_TOPICS.i$NUM_ITERS";
- } elsif (lc($MODEL) eq "pr") {
- return context_dir() . ".PR.t$NUM_TOPICS.i$NUM_ITERS.sp$PR_SCALE_P.sc$PR_SCALE_C";
- } elsif (lc($MODEL) eq "agree") {
- return context_dir() . ".AGREE.t$NUM_TOPICS.i$NUM_ITERS";
- } elsif (lc($MODEL) eq "blagree") {
- return context_dir() . ".BLAGREE.t$NUM_TOPICS.i$NUM_ITERS";
- }
-}
-
-sub labeled_dir {
- if (lc($MODEL) eq "pyp" && $LABEL_THRESHOLD ne "0") {
- return cluster_dir() . "_lt$LABEL_THRESHOLD";
- } else {
- return cluster_dir();
- }
-}
-
-sub grammar_dir {
- # TODO add grammar config options -- adjacent NTs, etc
- if($HIER_CAT) {
- return cluster_dir() . ".hier$NUM_TOPICS_COARSE-$NUM_TOPICS_FINE.grammar";
- } else {
- return labeled_dir() . ".grammar";
- }
-}
-
-
-
-sub safemkdir {
- my $dir = shift;
- if (-d $dir) { return 1; }
- return mkdir($dir);
-}
-
-sub usage {
- print <<EOT;
-
-Usage: $0 [OPTIONS] corpus.fr-en-al
-
-Induces a grammar using Pitman-Yor topic modeling or Posterior Regularisation.
-
-EOT
- exit 1;
-};
-
-sub assert_exec {
- my @files = @_;
- for my $file (@files) {
- die "Can't find $file - did you run make?\n" unless -e $file;
- die "Can't execute $file" unless -e $file;
- }
-};
-
-sub extract_context {
- print STDERR "\n!!!CONTEXT EXTRACTION\n";
- my $OUT_CONTEXTS = "$CONTEXT_DIR/context.txt.gz";
- if (-e $OUT_CONTEXTS) {
- print STDERR "$OUT_CONTEXTS exists, reusing...\n";
- } else {
- my $ccopt = "-c $ITEMS_IN_MEMORY";
- my $postsort = "| $REDUCER ";
- if ($COMPLETE_CACHE) {
- print STDERR "COMPLETE_CACHE is set: removing memory limits on cache.\n";
- $ccopt = "-c 0";
- $postsort = "" unless ($PRESERVE_PHRASES);
- }
-
- my $presort = ($PRESERVE_PHRASES ? "| $REMOVE_TAGS_CONTEXT --phrase=tok --context=tag " : "");
-
- if ($MORFMARK ne "") {
- $presort = $presort . "| $MORF_DOC_FILTER \"$MORFMARK\" ";
- }
-
- my $cmd = "$EXTRACTOR -i $CORPUS_CLUSTER $ccopt -L $BASE_PHRASE_MAX_SIZE -C -S $CONTEXT_SIZE --phrase_language $LANGUAGE --context_language $LANGUAGE $presort | $SORT_KEYS $postsort | $GZIP > $OUT_CONTEXTS";
- safesystem($cmd) or die "Failed to extract contexts.";
- }
-}
-
-sub extract_bilingual_context {
- print STDERR "\n!!!CONTEXT EXTRACTION\n";
- my $OUT_SRC_CONTEXTS = "$CONTEXT_DIR/context.source";
- my $OUT_TGT_CONTEXTS = "$CONTEXT_DIR/context.target";
-
- if (-e $OUT_SRC_CONTEXTS . ".gz" and -e $OUT_TGT_CONTEXTS . ".gz") {
- print STDERR "$OUT_SRC_CONTEXTS.gz and $OUT_TGT_CONTEXTS.gz exist, reusing...\n";
- } else {
- my $OUT_BI_CONTEXTS = "$CONTEXT_DIR/context.bilingual.txt.gz";
- my $cmd = "$EXTRACTOR -i $CORPUS_CLUSTER -c $ITEMS_IN_MEMORY -L $BASE_PHRASE_MAX_SIZE -C -S $CONTEXT_SIZE --phrase_language both --context_language both | $SORT_KEYS | $REDUCER | $GZIP > $OUT_BI_CONTEXTS";
- if ($COMPLETE_CACHE) {
- print STDERR "COMPLETE_CACHE is set: removing memory limits on cache.\n";
- $cmd = "$EXTRACTOR -i $CORPUS_CLUSTER -c 0 -L $BASE_PHRASE_MAX_SIZE -C -S $CONTEXT_SIZE --phrase_language both --context_language both | $SORT_KEYS | $GZIP > $OUT_BI_CONTEXTS";
- }
- safesystem($cmd) or die "Failed to extract contexts.";
-
- safesystem("$ZCAT $OUT_BI_CONTEXTS | $SPLIT $OUT_SRC_CONTEXTS $OUT_TGT_CONTEXTS") or die "Failed to split contexts.\n";
- safesystem("$GZIP -f $OUT_SRC_CONTEXTS") or die "Failed to zip output contexts.\n";
- safesystem("$GZIP -f $OUT_TGT_CONTEXTS") or die "Failed to zip output contexts.\n";
- }
-}
-
-
-sub topic_train {
- print STDERR "\n!!!TRAIN PYP TOPICS\n";
- my $IN_CONTEXTS = "$CONTEXT_DIR/context.txt.gz";
- my $OUT_CLUSTERS = "$CLUSTER_DIR/docs.txt.gz";
- if (-e $OUT_CLUSTERS) {
- print STDERR "$OUT_CLUSTERS exists, reusing...\n";
- } else {
- safesystem("$TOPIC_TRAIN --data $IN_CONTEXTS --backoff-type simple -t $NUM_TOPICS -s $NUM_SAMPLES -o $OUT_CLUSTERS -c $TOPICS_CONFIG -w /dev/null") or die "Topic training failed.\n";
- }
-}
-
-sub prem_train {
- print STDERR "\n!!!TRAIN PR/EM model\n";
- my $OUT_CLUSTERS = "$CLUSTER_DIR/docs.txt.gz";
- if (-e $OUT_CLUSTERS) {
- print STDERR "$OUT_CLUSTERS exists, reusing...\n";
- } else {
- my $in = "--in $CONTEXT_DIR/context.txt.gz";
- my $opts = "";
- if (lc($MODEL) eq "pr") {
- $opts = "--scale-phrase $PR_SCALE_P --scale-context $PR_SCALE_C";
- } elsif (lc($MODEL) eq "agree") {
- $opts = "--agree-direction";
- } elsif (lc($MODEL) eq "blagree") {
- $in = "--in $CONTEXT_DIR/context.source.gz --in1 $CONTEXT_DIR/context.target.gz";
- $opts = "--agree-language";
- }
- safesystem("$PREM_TRAIN $in --topics $NUM_TOPICS --out $OUT_CLUSTERS --iterations $NUM_ITERS $opts $PR_FLAGS") or die "Topic training failed.\n";
- }
-}
-
-sub label_spans_with_topics {
- my ($file) = (@_);
- print STDERR "\n!!!LABEL SPANS\n";
- my $IN_CLUSTERS = "$CLUSTER_DIR/docs.txt.gz";
- my $OUT_SPANS = "$LABELED_DIR/labeled_spans.txt";
- if (-e $OUT_SPANS) {
- print STDERR "$OUT_SPANS exists, reusing...\n";
- } else {
- my $extra = "tt";
- if ($LANGUAGE eq "source") {
- $extra = "ss";
- } elsif ($LANGUAGE eq "both") {
- $extra = "bb";
- } else { die "Invalid language specifier $LANGUAGE\n" unless $LANGUAGE eq "target" };
- $extra = $extra . " tok,tag" if ($PRESERVE_PHRASES);
- safesystem("$ZCAT $IN_CLUSTERS > $CLUSTER_DIR/clusters.txt") or die "Failed to unzip";
- safesystem("$EXTRACTOR --base_phrase_spans -i $CORPUS_CLUSTER -c $ITEMS_IN_MEMORY -L $BASE_PHRASE_MAX_SIZE -S $CONTEXT_SIZE | $S2L $CLUSTER_DIR/clusters.txt $CONTEXT_SIZE $LABEL_THRESHOLD $extra > $OUT_SPANS") or die "Failed to label spans";
- unlink("$CLUSTER_DIR/clusters.txt") or warn "Failed to remove $CLUSTER_DIR/clusters.txt";
- safesystem("paste -d ' ' $CORPUS_LEX $OUT_SPANS | sed 's/ *||| *\$//' > $LABELED_DIR/corpus.src_trg_al_label") or die "Couldn't paste";
- }
-}
-
-sub extract_freqs {
- print STDERR "\n!!!EXTRACTING FREQUENCIES\n";
- my $IN_COARSE = "$LABELED_DIR_C/labeled_spans.txt";
- my $IN_FINE = "$LABELED_DIR_F/labeled_spans.txt";
- my $OUT_SPANS = "$LABELED_DIR_F/labeled_spans.hier$NUM_TOPICS_COARSE-$NUM_TOPICS_FINE.txt";
- my $FREQS = "$LABELED_DIR_F/label_freq.hier$NUM_TOPICS_COARSE-$NUM_TOPICS_FINE.txt";
- my $COARSE_EXPR = "\'s/\\(X[0-9][0-9]*\\)/\\1c/g\'"; #'
- my $FINE_EXPR = "\'s/\\(X[0-9][0-9]*\\)/\\1f/g\'"; #'
- my %finehier = ();
- if (-e $OUT_SPANS) {
- print STDERR "$OUT_SPANS exists, reusing...\n";
- } else {
- safesystem("paste -d ' ' $IN_COARSE $IN_FINE > $OUT_SPANS");
- }
- open SPANS, $OUT_SPANS or die $!;
- while (<SPANS>) {
- my ($tmp, $coarse, $fine) = split /\|\|\|/;
- my @coarse_spans = $coarse =~ /\d+-\d+:X(\d+)/g;
- my @fine_spans = $fine =~ /\d+-\d+:X(\d+)/g;
-
- foreach my $i (0..(scalar @coarse_spans)-1) {
- my $coarse_cat = $coarse_spans[$i];
- my $fine_cat = $fine_spans[$i];
-
- $FREQ_HIER{$coarse_cat}{$fine_cat}++;
- }
- }
- close SPANS;
- foreach (values %FREQ_HIER) {
- my $coarse_freq = $_;
- my $total = 0;
- $total+=$_ for (values %{ $coarse_freq });
- $coarse_freq->{$_}=log($coarse_freq->{$_}/$total) for (keys %{ $coarse_freq });
- }
- open FREQS, ">", $FREQS or die $!;
- foreach my $coarse_cat (keys %FREQ_HIER) {
- print FREQS "$coarse_cat |||";
- foreach my $fine_cat (keys %{$FREQ_HIER{$coarse_cat}}) {
- my $res = $FREQ_HIER{$coarse_cat}{$fine_cat};
- print FREQS " $fine_cat:$res";
- if(! exists $finehier{$fine_cat} || $finehier{$fine_cat} < $res) {
- $finehier{$fine_cat} = $coarse_cat;
- }
- }
- print FREQS "\n";
- }
-# foreach my $fine_cat (keys %finehier) {
-# print FREQS "$fine_cat -> $finehier{$fine_cat}\n";
-# }
- close FREQS;
- $CLUSTER_DIR = $CLUSTER_DIR_F;
-}
-
-sub grammar_extract {
- my $LABELED = "$LABELED_DIR/corpus.src_trg_al_label";
- print STDERR "\n!!!EXTRACTING GRAMMAR\n";
- my $OUTGRAMMAR = "$GRAMMAR_DIR/grammar.gz";
- if (-e $OUTGRAMMAR) {
- print STDERR "$OUTGRAMMAR exists, reusing...\n";
- } else {
- my $BACKOFF_ARG = ($BACKOFF_GRAMMAR ? "-g" : "");
- my $DEFAULT_CAT_ARG = ($DEFAULT_CAT ? "-d X" : "");
- safesystem("$EXTRACTOR -i $LABELED -c $ITEMS_IN_MEMORY -L $BASE_PHRASE_MAX_SIZE -t $NUM_TOPICS $BACKOFF_ARG $DEFAULT_CAT_ARG | $SORT_KEYS | $REDUCER -p | $GZIP > $OUTGRAMMAR") or die "Couldn't extract grammar";
- }
- return $OUTGRAMMAR;
-}
-
-sub grammar_extract_bidir {
-#gzcat ex.output.gz | ./mr_stripe_rule_reduce -p -b | sort -t $'\t' -k 1 | ./mr_stripe_rule_reduce | gzip > phrase-table.gz
- my $LABELED = "$LABELED_DIR/corpus.src_trg_al_label";
- print STDERR "\n!!!EXTRACTING GRAMMAR\n";
- my $OUTGRAMMAR = "$GRAMMAR_DIR/grammar.bidir.gz";
- if (-e $OUTGRAMMAR) {
- print STDERR "$OUTGRAMMAR exists, reusing...\n";
- } else {
- my $BACKOFF_ARG = ($BACKOFF_GRAMMAR ? "-g" : "");
- safesystem("$EXTRACTOR -i $LABELED -c $ITEMS_IN_MEMORY -L $BASE_PHRASE_MAX_SIZE -b -t $NUM_TOPICS $BACKOFF_ARG | $SORT_KEYS | $REDUCER -p -b | $SORT_KEYS | $REDUCER | $GZIP > $OUTGRAMMAR") or die "Couldn't extract grammar";
- }
- return $OUTGRAMMAR;
-}
-
-sub safesystem {
- print STDERR "Executing: @_\n";
- system(@_);
- if ($? == -1) {
- print STDERR "ERROR: Failed to execute: @_\n $!\n";
- exit(1);
- }
- elsif ($? & 127) {
- printf STDERR "ERROR: Execution of: @_\n died with signal %d, %s coredump\n",
- ($? & 127), ($? & 128) ? 'with' : 'without';
- exit(1);
- }
- else {
- my $exitcode = $? >> 8;
- print STDERR "Exit code: $exitcode\n" if $exitcode;
- return ! $exitcode;
- }
-}
-
diff --git a/gi/pipeline/lticluster.config b/gi/pipeline/lticluster.config
deleted file mode 100644
index 3e23c8cb..00000000
--- a/gi/pipeline/lticluster.config
+++ /dev/null
@@ -1,9 +0,0 @@
-# THIS FILE GIVES THE LOCATIONS OF THE CORPORA USED
-# name path aligned-corpus LM dev dev-refs test1 testt-eval.sh ...
-/home/cdyer/ws10smt-data
-btec /home/cdyer/ws10smt-data/btec/ split.zh-en.al lm/en.3gram.lm.gz devtest/devset1_2.zh devtest/devset1_2.lc.en* devtest/devset3.zh eval-devset3.sh
-zhen /home/cdyer/ws10smt-data/chinese-english corpus.zh-en.al lm/c2e.3gram.lm.gz dev_and_test/mt02.src.txt dev_and_test/mt02.ref.* dev_and_test/mt03.src.txt eval-mt03.sh
-aren /home/cdyer/ws10smt-data/arabic-english corpus.ar-en-al lm/a2e.3gram.lm.gz dev_and_test/dev.src.txt dev_and_test/dev.ref.txt.* dev_and_test/mt05.src.txt eval-mt05.sh
-uren /home/cdyer/ws10smt-data/urdu-english corpus.ur-en.al lm/u2e.en.lm.gz dev/dev.ur dev/dev.en* devtest/devtest.ur eval-devtest.sh
-nlfr /home/cdyer/ws10smt-data/dutch-french corpus.nl-fr.al
-
diff --git a/gi/pipeline/scripts/filter-by-f.pl b/gi/pipeline/scripts/filter-by-f.pl
deleted file mode 100755
index 0cef0606..00000000
--- a/gi/pipeline/scripts/filter-by-f.pl
+++ /dev/null
@@ -1,56 +0,0 @@
-#!/usr/bin/perl -w
-use strict;
-
-my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR; }
-
-my $REKEY="$SCRIPT_DIR/rekey.pl";
-my $REFILTER="$SCRIPT_DIR/refilter.pl";
-my $SORT="$SCRIPT_DIR/sort-by-key.sh";
-assert_exec($REKEY, $REFILTER, $SORT);
-
-
-die "Usage: $0 NUM-TRANSLATIONS ingrammar.gz outgrammar.gz\n" unless scalar @ARGV == 3;
-my $translations = shift @ARGV;
-die "Need number: $translations" unless $translations > 0;
-die unless $ARGV[0] =~ /\.gz$/;
-die unless $ARGV[1] =~ /\.gz$/;
-die if $ARGV[0] eq $ARGV[1];
-die "Can't find $ARGV[0]" unless -f $ARGV[0];
-
-my $cmd = "gunzip -c $ARGV[0] | $REKEY | $SORT | $REFILTER $translations | gzip > $ARGV[1]";
-safesystem($ARGV[1], $cmd) or die "Filtering failed";
-exit 0;
-
-sub assert_exec {
- my @files = @_;
- for my $file (@files) {
- die "Can't find $file - did you run make?\n" unless -e $file;
- die "Can't execute $file" unless -e $file;
- }
-};
-
-sub safesystem {
- my $output = shift @_;
- print STDERR "Executing: @_\n";
- system(@_);
- if ($? == -1) {
- print STDERR "ERROR: Failed to execute: @_\n $!\n";
- if (defined $output && -e $output) { printf STDERR "Removing $output\n"; `rm -rf $output`; }
- exit(1);
- }
- elsif ($? & 127) {
- printf STDERR "ERROR: Execution of: @_\n died with signal %d, %s coredump\n",
- ($? & 127), ($? & 128) ? 'with' : 'without';
- if (defined $output && -e $output) { printf STDERR "Removing $output\n"; `rm -rf $output`; }
- exit(1);
- }
- else {
- my $exitcode = $? >> 8;
- if ($exitcode) {
- print STDERR "Exit code: $exitcode\n";
- if (defined $output && -e $output) { printf STDERR "Removing $output\n"; `rm -rf $output`; }
- }
- return ! $exitcode;
- }
-}
-
diff --git a/gi/pipeline/scripts/patch-corpus.pl b/gi/pipeline/scripts/patch-corpus.pl
deleted file mode 100755
index c0eec43e..00000000
--- a/gi/pipeline/scripts/patch-corpus.pl
+++ /dev/null
@@ -1,65 +0,0 @@
-#!/usr/bin/perl -w
-use strict;
-
-my $PATCH = shift @ARGV;
-my $TGT = 1;
-my $APPEND;
-while ($PATCH eq "-s" || $PATCH eq "-a") {
- if ($PATCH eq "-s") {
- undef $TGT;
- } else {
- $APPEND = 1;
- }
- $PATCH = shift @ARGV;
-}
-
-die "Usage: $0 [-s] [-a] tagged.en[_fr] < lexical.en_fr_al[_...]\n" unless $PATCH;
-
-open P, "<$PATCH" or die "Can't read tagged corpus $PATCH: $!";
-my $first=<P>; close P;
-my @fields = split / \|\|\| /, $first;
-die "Bad format!" if (scalar @fields > 2);
-
-if (scalar @fields != 1) {
- # TODO support this
- die "Patching source and target not supported yet!";
-}
-
-my $line = 0;
-open P, "<$PATCH" or die "Can't read tagged corpus $PATCH: $!";
-while(my $pline = <P>) {
- chomp $pline;
- $line++;
- my $line = <>;
- die "Too few lines in lexical corpus!" unless $line;
- chomp $line;
- @fields = split / \|\|\| /, $line;
- my @pwords = split /\s+/, $pline;
- if ($TGT) {
- my @lwords = split /\s+/, $fields[1];
- die "Length mismatch in line $line!\n" unless (scalar @pwords == scalar @lwords);
- if ($APPEND) {
- foreach my $i (0..(scalar @pwords-1)) {
- $lwords[$i] = $lwords[$i] . '_' . $pwords[$i];
- }
- $fields[1] = join ' ', @lwords;
- } else {
- $fields[1] = $pline;
- }
- } else { # source side
- my @lwords = split /\s+/, $fields[0];
- die "Length mismatch in line $line!\n" unless (scalar @pwords == scalar @lwords);
- if ($APPEND) {
- foreach my $i (0..(scalar @pwords-1)) {
- $lwords[$i] = $lwords[$i] . '_' . $pwords[$i];
- }
- $fields[0] = join ' ', @lwords;
- } else {
- $fields[0] = $pline;
- }
- }
- print join ' ||| ', @fields;
- print "\n";
-}
-
-
diff --git a/gi/pipeline/scripts/refilter.pl b/gi/pipeline/scripts/refilter.pl
deleted file mode 100755
index a783eb4e..00000000
--- a/gi/pipeline/scripts/refilter.pl
+++ /dev/null
@@ -1,40 +0,0 @@
-#!/usr/bin/perl -w
-use strict;
-
-my $NUM_TRANSLATIONS = shift @ARGV;
-unless ($NUM_TRANSLATIONS) { $NUM_TRANSLATIONS=30; }
-print STDERR "KEEPING $NUM_TRANSLATIONS TRANSLATIONS FOR SOURCE\n";
-
-my $pk = '';
-my %dict;
-while(<>) {
- s/^(.+)\t//;
- my $key = $1;
- if ($key ne $pk) {
- if ($pk) {
- emit_dict();
- }
- %dict = ();
- $pk = $key;
- }
- my ($lhs, $f, $e, $s) = split / \|\|\| /;
- my $score = 0;
- if ($s =~ /XEF=([^ ]+)/) {
- $score += $1;
- } else { die; }
- if ($s =~ /GenerativeProb=([^ ]+)/) {
- $score += ($1 / 10);
- } else { die; }
- $dict{"$lhs ||| $f ||| $e ||| $s"} = $score;
-}
-emit_dict();
-
-sub emit_dict {
- my $cc = 0;
- for my $k (sort { $dict{$a} <=> $dict{$b} } keys %dict) {
- print "$k";
- $cc++;
- if ($cc >= $NUM_TRANSLATIONS) { last; }
- }
-}
-
diff --git a/gi/pipeline/scripts/rekey.pl b/gi/pipeline/scripts/rekey.pl
deleted file mode 100755
index 31eb86b8..00000000
--- a/gi/pipeline/scripts/rekey.pl
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/usr/bin/perl
-
-while(<>) {
- my ($lhs, $f, $e, $s) = split / \|\|\| /;
- $f =~ s/\[X[0-9]+\]/\[X\]/g;
- print "$f\t$_";
-}
-
diff --git a/gi/pipeline/scripts/remove-tags-from-contexts.pl b/gi/pipeline/scripts/remove-tags-from-contexts.pl
deleted file mode 100755
index 20698816..00000000
--- a/gi/pipeline/scripts/remove-tags-from-contexts.pl
+++ /dev/null
@@ -1,53 +0,0 @@
-#!/usr/bin/perl -w
-use strict;
-
-use Getopt::Long "GetOptions";
-
-my $PHRASE = 'tok';
-my $CONTEXT = 'tag';
-
-die "Usage: $0 [--phrase=tok|tag] [--context=tok|tag] < corpus"
- unless &GetOptions('phrase=s' => \$PHRASE, 'context=s' => \$CONTEXT);
-
-my $lno = 0;
-while(my $line = <>) {
- $lno++;
- chomp $line;
- my @top = split /\t/, $line;
- die unless (scalar @top == 2);
-
- my @pwords = split /\s+/, $top[0];
- foreach my $token (@pwords) {
- #print $token . "\n";
- my @parts = split /_(?!.*_)/, $token;
- die unless (scalar @parts == 2);
- if ($PHRASE eq "tok") {
- $token = $parts[0]
- } elsif ($PHRASE eq "tag") {
- $token = $parts[1]
- }
- }
-
- my @fields = split / \|\|\| /, $top[1];
- foreach my $i (0..((scalar @fields) / 2 - 1)) {
- #print $i . ": " . $fields[2*$i] . " of " . (scalar @fields) . "\n";
- my @cwords = split /\s+/, $fields[2*$i];
- foreach my $token (@cwords) {
- #print $i . ": " . $token . "\n";
- my @parts = split /_(?!.*_)/, $token;
- if (scalar @parts == 2) {
- if ($CONTEXT eq "tok") {
- $token = $parts[0]
- } elsif ($CONTEXT eq "tag") {
- $token = $parts[1]
- }
- }
- }
- $fields[2*$i] = join ' ', @cwords;
- }
-
- print join ' ', @pwords;
- print "\t";
- print join ' ||| ', @fields;
- print "\n";
-}
diff --git a/gi/pipeline/scripts/remove-tags-from-corpus.pl b/gi/pipeline/scripts/remove-tags-from-corpus.pl
deleted file mode 100755
index be3e97c0..00000000
--- a/gi/pipeline/scripts/remove-tags-from-corpus.pl
+++ /dev/null
@@ -1,44 +0,0 @@
-#!/usr/bin/perl -w
-use strict;
-
-use Getopt::Long "GetOptions";
-
-my $LANGUAGE = shift @ARGV;
-$LANGUAGE = 'target' unless ($LANGUAGE);
-
-my $lno = 0;
-while(my $line = <>) {
- $lno++;
- chomp $line;
-
- my @fields = split / \|\|\| /, $line;
-
- if ($LANGUAGE eq "source" or $LANGUAGE eq "both") {
- my @cwords = split /\s+/, $fields[0];
- foreach my $token (@cwords) {
- my @parts = split /_(?!.*_)/, $token;
- if (scalar @parts == 2) {
- $token = $parts[0]
- } else {
- print STDERR "WARNING: invalid tagged token $token\n";
- }
- }
- $fields[0] = join ' ', @cwords;
- }
-
- if ($LANGUAGE eq "target" or $LANGUAGE eq "both") {
- my @cwords = split /\s+/, $fields[1];
- foreach my $token (@cwords) {
- my @parts = split /_(?!.*_)/, $token;
- if (scalar @parts == 2) {
- $token = $parts[1]
- } else {
- print STDERR "WARNING: invalid tagged token $token\n";
- }
- }
- $fields[0] = join ' ', @cwords;
- }
-
- print join ' ||| ', @fields;
- print "\n";
-}
diff --git a/gi/pipeline/scripts/sort-by-key.sh b/gi/pipeline/scripts/sort-by-key.sh
deleted file mode 100755
index 7ae33e03..00000000
--- a/gi/pipeline/scripts/sort-by-key.sh
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/bin/bash
-
-export LANG=C
-sort -t $'\t' -k 1 -T /tmp -S 6000000000
-
diff --git a/gi/pipeline/scripts/xfeats.pl b/gi/pipeline/scripts/xfeats.pl
deleted file mode 100755
index dc578513..00000000
--- a/gi/pipeline/scripts/xfeats.pl
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/usr/bin/perl -w
-use strict;
-
-die "Usage: $0 x-grammar.scfg[.gz] < cat-grammar.scfg\n" unless scalar @ARGV > 0;
-
-my $xgrammar = shift @ARGV;
-die "Can't find $xgrammar" unless -f $xgrammar;
-my $fh;
-if ($xgrammar =~ /\.gz$/) {
- open $fh, "gunzip -c $xgrammar|" or die "Can't fork: $!";
-} else {
- open $fh, "<$xgrammar" or die "Can't read $xgrammar: $!";
-}
-print STDERR "Reading X-feats from $xgrammar...\n";
-my %dict;
-while(<$fh>) {
- chomp;
- my ($lhs, $f, $e, $feats) = split / \|\|\| /;
- my $xfeats;
- my $cc = 0;
- my @xfeats = ();
- while ($feats =~ /(EGivenF|FGivenE|LogRuleCount|LogECount|LogFCount|SingletonRule|SingletonE|SingletonF)=([^ ]+)( |$)/og) {
- push @xfeats, "X_$1=$2";
- }
- #print "$lhs ||| $f ||| $e ||| @xfeats\n";
- $dict{"$lhs ||| $f ||| $e"} = "@xfeats";
-}
-close $fh;
-
-print STDERR "Add features...\n";
-while(<>) {
- chomp;
- my ($lhs, $f, $e) = split / \|\|\| /;
- $f=~ s/\[[^]]+,([12])\]/\[X,$1\]/g;
- my $xfeats = $dict{"[X] ||| $f ||| $e"};
- die "Can't find x features for: $_\n" unless $xfeats;
- print "$_ $xfeats\n";
-}
-
diff --git a/gi/pipeline/valhalla.config b/gi/pipeline/valhalla.config
deleted file mode 100644
index e00a8485..00000000
--- a/gi/pipeline/valhalla.config
+++ /dev/null
@@ -1,9 +0,0 @@
-# THIS FILE GIVES THE LOCATIONS OF THE CORPORA USED
-# name path aligned-corpus LM dev dev-refs test1 testt-eval.sh ...
-/home/chris/ws10smt/data
-btec /home/chris/ws10smt/data/btec/ split.zh-en.al lm/en.3gram.lm.gz devtest/devset1_2.zh devtest/devset1_2.lc.en* devtest/devset3.zh eval-devset3.sh
-fbis /home/chris/ws10smt/data/chinese-english.fbis corpus.zh-en.al
-zhen /home/chris/ws10smt/data/chinese-english corpus.zh-en.al
-aren /home/chris/ws10smt/data/arabic-english corpus.ar-en.al
-uren /home/chris/ws10smt/data/urdu-english corpus.ur-en.al lm/u2e.en.lm.gz dev/dev.ur dev/dev.en* devtest/devtest.ur eval-devtest.sh
-nlfr /home/chris/ws10smt/data/dutch-french corpus.nl-fr.al
diff --git a/gi/posterior-regularisation/Corpus.java b/gi/posterior-regularisation/Corpus.java
deleted file mode 100644
index 07b27387..00000000
--- a/gi/posterior-regularisation/Corpus.java
+++ /dev/null
@@ -1,167 +0,0 @@
-import gnu.trove.TIntArrayList;
-
-import java.io.*;
-import java.util.*;
-import java.util.regex.Pattern;
-
-public class Corpus
-{
- private Lexicon<String> tokenLexicon = new Lexicon<String>();
- private Lexicon<TIntArrayList> phraseLexicon = new Lexicon<TIntArrayList>();
- private Lexicon<TIntArrayList> contextLexicon = new Lexicon<TIntArrayList>();
- private List<Edge> edges = new ArrayList<Edge>();
- private List<List<Edge>> phraseToContext = new ArrayList<List<Edge>>();
- private List<List<Edge>> contextToPhrase = new ArrayList<List<Edge>>();
-
- public class Edge
- {
- Edge(int phraseId, int contextId, int count)
- {
- this.phraseId = phraseId;
- this.contextId = contextId;
- this.count = count;
- }
- public int getPhraseId()
- {
- return phraseId;
- }
- public TIntArrayList getPhrase()
- {
- return phraseLexicon.lookup(phraseId);
- }
- public String getPhraseString()
- {
- StringBuffer b = new StringBuffer();
- for (int tid: getPhrase().toNativeArray())
- {
- if (b.length() > 0)
- b.append(" ");
- b.append(tokenLexicon.lookup(tid));
- }
- return b.toString();
- }
- public int getContextId()
- {
- return contextId;
- }
- public TIntArrayList getContext()
- {
- return contextLexicon.lookup(contextId);
- }
- public String getContextString()
- {
- StringBuffer b = new StringBuffer();
- for (int tid: getContext().toNativeArray())
- {
- if (b.length() > 0)
- b.append(" ");
- b.append(tokenLexicon.lookup(tid));
- }
- return b.toString();
- }
- public int getCount()
- {
- return count;
- }
- private int phraseId;
- private int contextId;
- private int count;
- }
-
- List<Edge> getEdges()
- {
- return edges;
- }
-
- int getNumEdges()
- {
- return edges.size();
- }
-
- int getNumPhrases()
- {
- return phraseLexicon.size();
- }
-
- List<Edge> getEdgesForPhrase(int phraseId)
- {
- return phraseToContext.get(phraseId);
- }
-
- int getNumContexts()
- {
- return contextLexicon.size();
- }
-
- List<Edge> getEdgesForContext(int contextId)
- {
- return contextToPhrase.get(contextId);
- }
-
- int getNumTokens()
- {
- return tokenLexicon.size();
- }
-
- static Corpus readFromFile(Reader in) throws IOException
- {
- Corpus c = new Corpus();
-
- // read in line-by-line
- BufferedReader bin = new BufferedReader(in);
- String line;
- Pattern separator = Pattern.compile(" \\|\\|\\| ");
-
- while ((line = bin.readLine()) != null)
- {
- // split into phrase and contexts
- StringTokenizer st = new StringTokenizer(line, "\t");
- assert (st.hasMoreTokens());
- String phraseToks = st.nextToken();
- assert (st.hasMoreTokens());
- String rest = st.nextToken();
- assert (!st.hasMoreTokens());
-
- // process phrase
- st = new StringTokenizer(phraseToks, " ");
- TIntArrayList ptoks = new TIntArrayList();
- while (st.hasMoreTokens())
- ptoks.add(c.tokenLexicon.insert(st.nextToken()));
- int phraseId = c.phraseLexicon.insert(ptoks);
- if (phraseId == c.phraseToContext.size())
- c.phraseToContext.add(new ArrayList<Edge>());
-
- // process contexts
- String[] parts = separator.split(rest);
- assert (parts.length % 2 == 0);
- for (int i = 0; i < parts.length; i += 2)
- {
- // process pairs of strings - context and count
- TIntArrayList ctx = new TIntArrayList();
- String ctxString = parts[i];
- String countString = parts[i + 1];
- StringTokenizer ctxStrtok = new StringTokenizer(ctxString, " ");
- while (ctxStrtok.hasMoreTokens())
- {
- String token = ctxStrtok.nextToken();
- if (!token.equals("<PHRASE>"))
- ctx.add(c.tokenLexicon.insert(token));
- }
- int contextId = c.contextLexicon.insert(ctx);
- if (contextId == c.contextToPhrase.size())
- c.contextToPhrase.add(new ArrayList<Edge>());
-
- assert (countString.startsWith("C="));
- Edge e = c.new Edge(phraseId, contextId,
- Integer.parseInt(countString.substring(2).trim()));
- c.edges.add(e);
-
- // index the edge for fast phrase, context lookup
- c.phraseToContext.get(phraseId).add(e);
- c.contextToPhrase.get(contextId).add(e);
- }
- }
-
- return c;
- }
-}
diff --git a/gi/posterior-regularisation/Lexicon.java b/gi/posterior-regularisation/Lexicon.java
deleted file mode 100644
index 9f0245ee..00000000
--- a/gi/posterior-regularisation/Lexicon.java
+++ /dev/null
@@ -1,32 +0,0 @@
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-public class Lexicon<T>
-{
- public int insert(T word)
- {
- Integer i = wordToIndex.get(word);
- if (i == null)
- {
- i = indexToWord.size();
- wordToIndex.put(word, i);
- indexToWord.add(word);
- }
- return i;
- }
-
- public T lookup(int index)
- {
- return indexToWord.get(index);
- }
-
- public int size()
- {
- return indexToWord.size();
- }
-
- private Map<T, Integer> wordToIndex = new HashMap<T, Integer>();
- private List<T> indexToWord = new ArrayList<T>();
-} \ No newline at end of file
diff --git a/gi/posterior-regularisation/PhraseContextModel.java b/gi/posterior-regularisation/PhraseContextModel.java
deleted file mode 100644
index 85bcfb89..00000000
--- a/gi/posterior-regularisation/PhraseContextModel.java
+++ /dev/null
@@ -1,466 +0,0 @@
-// Input of the form:
-// " the phantom of the opera " tickets for <PHRASE> tonight ? ||| C=1 ||| seats for <PHRASE> ? </s> ||| C=1 ||| i see <PHRASE> ? </s> ||| C=1
-// phrase TAB [context]+
-// where context = phrase ||| C=... which are separated by |||
-
-// Model parameterised as follows:
-// - each phrase, p, is allocated a latent state, t
-// - this is used to generate the contexts, c
-// - each context is generated using 4 independent multinomials, one for each position LL, L, R, RR
-
-// Training with EM:
-// - e-step is estimating q(t) = P(t|p,c) for all x,c
-// - m-step is estimating model parameters P(c,t|p) = P(t) P(c|t)
-// - PR uses alternate e-step, which first optimizes lambda
-// min_q KL(q||p) + delta sum_pt max_c E_q[phi_ptc]
-// where
-// q(t|p,c) propto p(t,c|p) exp( -phi_ptc )
-// Then q is used to obtain expectations for vanilla M-step.
-
-// Sexing it up:
-// - learn p-specific conditionals P(t|p)
-// - or generate phrase internals, e.g., generate edge words from
-// different distribution to central words
-// - agreement between phrase->context model and context->phrase model
-
-import java.io.*;
-import optimization.gradientBasedMethods.*;
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.gradientBasedMethods.stats.ProjectedOptimizerStats;
-import optimization.linesearch.ArmijoLineSearchMinimizationAlongProjectionArc;
-import optimization.linesearch.GenericPickFirstStep;
-import optimization.linesearch.InterpolationPickFirstStep;
-import optimization.linesearch.LineSearchMethod;
-import optimization.linesearch.WolfRuleLineSearch;
-import optimization.projections.SimplexProjection;
-import optimization.stopCriteria.CompositeStopingCriteria;
-import optimization.stopCriteria.NormalizedProjectedGradientL2Norm;
-import optimization.stopCriteria.NormalizedValueDifference;
-import optimization.stopCriteria.ProjectedGradientL2Norm;
-import optimization.stopCriteria.StopingCriteria;
-import optimization.stopCriteria.ValueDifference;
-import optimization.util.MathUtils;
-import java.util.*;
-import java.util.regex.*;
-import gnu.trove.TDoubleArrayList;
-import gnu.trove.TIntArrayList;
-import static java.lang.Math.*;
-
-class PhraseContextModel
-{
- // model/optimisation configuration parameters
- int numTags;
- boolean posteriorRegularisation = true;
- double constraintScale = 3; // FIXME: make configurable
-
- // copied from L1LMax in depparsing code
- final double c1= 0.0001, c2=0.9, stoppingPrecision = 1e-5, maxStep = 10;
- final int maxZoomEvals = 10, maxExtrapolationIters = 200;
- int maxProjectionIterations = 200;
- int minOccurrencesForProjection = 0;
-
- // book keeping
- int numPositions;
- Random rng = new Random();
-
- // training set
- Corpus training;
-
- // model parameters (learnt)
- double emissions[][][]; // position in 0 .. 3 x tag x word Pr(word | tag, position)
- double prior[][]; // phrase x tag Pr(tag | phrase)
- double lambda[]; // edge = (phrase, context) x tag flattened lagrange multipliers
-
- PhraseContextModel(Corpus training, int tags)
- {
- this.training = training;
- this.numTags = tags;
- assert (!training.getEdges().isEmpty());
- assert (numTags > 1);
-
- // now initialise emissions
- numPositions = training.getEdges().get(0).getContext().size();
- assert (numPositions > 0);
-
- emissions = new double[numPositions][numTags][training.getNumTokens()];
- prior = new double[training.getNumEdges()][numTags];
- if (posteriorRegularisation)
- lambda = new double[training.getNumEdges() * numTags];
-
- for (double[][] emissionTW : emissions)
- {
- for (double[] emissionW : emissionTW)
- {
- randomise(emissionW);
-// for (int i = 0; i < emissionW.length; ++i)
-// emissionW[i] = i+1;
-// normalise(emissionW);
- }
- }
-
- for (double[] priorTag : prior)
- {
- randomise(priorTag);
-// for (int i = 0; i < priorTag.length; ++i)
-// priorTag[i] = i+1;
-// normalise(priorTag);
- }
- }
-
- void expectationMaximisation(int numIterations)
- {
- double lastLlh = Double.NEGATIVE_INFINITY;
-
- for (int iteration = 0; iteration < numIterations; ++iteration)
- {
- double emissionsCounts[][][] = new double[numPositions][numTags][training.getNumTokens()];
- double priorCounts[][] = new double[training.getNumPhrases()][numTags];
-
- // E-step
- double llh = 0;
- if (posteriorRegularisation)
- {
- EStepDualObjective objective = new EStepDualObjective();
-
- // copied from x2y2withconstraints
-// LineSearchMethod ls = new ArmijoLineSearchMinimizationAlongProjectionArc(new InterpolationPickFirstStep(1));
-// OptimizerStats stats = new OptimizerStats();
-// ProjectedGradientDescent optimizer = new ProjectedGradientDescent(ls);
-// CompositeStopingCriteria compositeStop = new CompositeStopingCriteria();
-// compositeStop.add(new ProjectedGradientL2Norm(0.001));
-// compositeStop.add(new ValueDifference(0.001));
-// optimizer.setMaxIterations(50);
-// boolean succeed = optimizer.optimize(objective,stats,compositeStop);
-
- // copied from depparser l1lmaxobjective
- ProjectedOptimizerStats stats = new ProjectedOptimizerStats();
- GenericPickFirstStep pickFirstStep = new GenericPickFirstStep(1);
- LineSearchMethod linesearch = new WolfRuleLineSearch(pickFirstStep, c1, c2);
- ProjectedGradientDescent optimizer = new ProjectedGradientDescent(linesearch);
- optimizer.setMaxIterations(maxProjectionIterations);
- CompositeStopingCriteria stop = new CompositeStopingCriteria();
- stop.add(new NormalizedProjectedGradientL2Norm(stoppingPrecision));
- stop.add(new NormalizedValueDifference(stoppingPrecision));
- boolean succeed = optimizer.optimize(objective, stats, stop);
-
- System.out.println("Ended optimzation Projected Gradient Descent\n" + stats.prettyPrint(1));
- //System.out.println("Solution: " + objective.parameters);
- if (!succeed)
- System.out.println("Failed to optimize");
- //System.out.println("Ended optimization in " + optimizer.getCurrentIteration());
-
- //lambda = objective.getParameters();
- llh = objective.primal();
-
- for (int i = 0; i < training.getNumPhrases(); ++i)
- {
- List<Corpus.Edge> edges = training.getEdgesForPhrase(i);
- for (int j = 0; j < edges.size(); ++j)
- {
- Corpus.Edge e = edges.get(j);
- for (int t = 0; t < numTags; t++)
- {
- double p = objective.q.get(i).get(j).get(t);
- priorCounts[i][t] += e.getCount() * p;
- TIntArrayList tokens = e.getContext();
- for (int k = 0; k < tokens.size(); ++k)
- emissionsCounts[k][t][tokens.get(k)] += e.getCount() * p;
- }
- }
- }
- }
- else
- {
- for (int i = 0; i < training.getNumPhrases(); ++i)
- {
- List<Corpus.Edge> edges = training.getEdgesForPhrase(i);
- for (int j = 0; j < edges.size(); ++j)
- {
- Corpus.Edge e = edges.get(j);
- double probs[] = posterior(i, e);
- double z = normalise(probs);
- llh += log(z) * e.getCount();
-
- TIntArrayList tokens = e.getContext();
- for (int t = 0; t < numTags; ++t)
- {
- priorCounts[i][t] += e.getCount() * probs[t];
- for (int k = 0; k < tokens.size(); ++k)
- emissionsCounts[j][t][tokens.get(k)] += e.getCount() * probs[t];
- }
- }
- }
- }
-
- // M-step: normalise
- for (double[][] emissionTW : emissionsCounts)
- for (double[] emissionW : emissionTW)
- normalise(emissionW);
-
- for (double[] priorTag : priorCounts)
- normalise(priorTag);
-
- emissions = emissionsCounts;
- prior = priorCounts;
-
- System.out.println("Iteration " + iteration + " llh " + llh);
-
-// if (llh - lastLlh < 1e-4)
-// break;
-// else
-// lastLlh = llh;
- }
- }
-
- static double normalise(double probs[])
- {
- double z = 0;
- for (double p : probs)
- z += p;
- for (int i = 0; i < probs.length; ++i)
- probs[i] /= z;
- return z;
- }
-
- void randomise(double probs[])
- {
- double z = 0;
- for (int i = 0; i < probs.length; ++i)
- {
- probs[i] = 10 + rng.nextDouble();
- z += probs[i];
- }
-
- for (int i = 0; i < probs.length; ++i)
- probs[i] /= z;
- }
-
- static int argmax(double probs[])
- {
- double m = Double.NEGATIVE_INFINITY;
- int mi = -1;
- for (int i = 0; i < probs.length; ++i)
- {
- if (probs[i] > m)
- {
- m = probs[i];
- mi = i;
- }
- }
- return mi;
- }
-
- double[] posterior(int phraseId, Corpus.Edge e) // unnormalised
- {
- double probs[] = new double[numTags];
- TIntArrayList tokens = e.getContext();
- for (int t = 0; t < numTags; ++t)
- {
- probs[t] = prior[phraseId][t];
- for (int k = 0; k < tokens.size(); ++k)
- probs[t] *= emissions[k][t][tokens.get(k)];
- }
- return probs;
- }
-
- void displayPosterior()
- {
- for (int i = 0; i < training.getNumPhrases(); ++i)
- {
- List<Corpus.Edge> edges = training.getEdgesForPhrase(i);
- for (Corpus.Edge e: edges)
- {
- double probs[] = posterior(i, e);
- normalise(probs);
-
- // emit phrase
- System.out.print(e.getPhraseString());
- System.out.print("\t");
- System.out.print(e.getContextString());
- System.out.print("||| C=" + e.getCount() + " |||");
-
- int t = argmax(probs);
- System.out.print(" " + t + " ||| " + probs[t]);
- // for (int t = 0; t < numTags; ++t)
- // System.out.print(" " + probs[t]);
- System.out.println();
- }
- }
- }
-
- public static void main(String[] args)
- {
- assert (args.length >= 2);
- try
- {
- Corpus corpus = Corpus.readFromFile(new FileReader(new File(args[0])));
- PhraseContextModel model = new PhraseContextModel(corpus, Integer.parseInt(args[1]));
- model.expectationMaximisation(Integer.parseInt(args[2]));
- model.displayPosterior();
- }
- catch (IOException e)
- {
- System.out.println("Failed to read input file: " + args[0]);
- e.printStackTrace();
- }
- }
-
- class EStepDualObjective extends ProjectedObjective
- {
- List<List<TDoubleArrayList>> conditionals; // phrase id x context # x tag - precomputed
- List<List<TDoubleArrayList>> q; // ditto, but including exp(-lambda) terms
- double objective = 0; // log(z)
- // Objective.gradient = d log(z) / d lambda = E_q[phi]
- double llh = 0;
-
- public EStepDualObjective()
- {
- super();
- // compute conditionals p(context, tag | phrase) for all training instances
- conditionals = new ArrayList<List<TDoubleArrayList>>(training.getNumPhrases());
- q = new ArrayList<List<TDoubleArrayList>>(training.getNumPhrases());
- for (int i = 0; i < training.getNumPhrases(); ++i)
- {
- List<Corpus.Edge> edges = training.getEdgesForPhrase(i);
-
- conditionals.add(new ArrayList<TDoubleArrayList>(edges.size()));
- q.add(new ArrayList<TDoubleArrayList>(edges.size()));
-
- for (int j = 0; j < edges.size(); ++j)
- {
- Corpus.Edge e = edges.get(j);
- double probs[] = posterior(i, e);
- double z = normalise(probs);
- llh += log(z) * e.getCount();
- conditionals.get(i).add(new TDoubleArrayList(probs));
- q.get(i).add(new TDoubleArrayList(probs));
- }
- }
-
- gradient = new double[training.getNumEdges()*numTags];
- setInitialParameters(lambda);
- computeObjectiveAndGradient();
- }
-
- @Override
- public double[] projectPoint(double[] point)
- {
- SimplexProjection p = new SimplexProjection(constraintScale);
-
- double[] newPoint = point.clone();
- int edgeIndex = 0;
- for (int i = 0; i < training.getNumPhrases(); ++i)
- {
- List<Corpus.Edge> edges = training.getEdgesForPhrase(i);
-
- for (int t = 0; t < numTags; t++)
- {
- double[] subPoint = new double[edges.size()];
- for (int j = 0; j < edges.size(); ++j)
- subPoint[j] = point[edgeIndex+j*numTags+t];
-
- p.project(subPoint);
- for (int j = 0; j < edges.size(); ++j)
- newPoint[edgeIndex+j*numTags+t] = subPoint[j];
- }
-
- edgeIndex += edges.size() * numTags;
- }
-// System.out.println("Proj from: " + Arrays.toString(point));
-// System.out.println("Proj to: " + Arrays.toString(newPoint));
- return newPoint;
- }
-
- @Override
- public void setParameters(double[] params)
- {
- super.setParameters(params);
- computeObjectiveAndGradient();
- }
-
- @Override
- public double[] getGradient()
- {
- gradientCalls += 1;
- return gradient;
- }
-
- @Override
- public double getValue()
- {
- functionCalls += 1;
- return objective;
- }
-
- public void computeObjectiveAndGradient()
- {
- int edgeIndex = 0;
- objective = 0;
- Arrays.fill(gradient, 0);
- for (int i = 0; i < training.getNumPhrases(); ++i)
- {
- List<Corpus.Edge> edges = training.getEdgesForPhrase(i);
-
- for (int j = 0; j < edges.size(); ++j)
- {
- Corpus.Edge e = edges.get(j);
-
- double z = 0;
- for (int t = 0; t < numTags; t++)
- {
- double v = conditionals.get(i).get(j).get(t) * exp(-parameters[edgeIndex+t]);
- q.get(i).get(j).set(t, v);
- z += v;
- }
- objective += log(z) * e.getCount();
-
- for (int t = 0; t < numTags; t++)
- {
- double v = q.get(i).get(j).get(t) / z;
- q.get(i).get(j).set(t, v);
- gradient[edgeIndex+t] -= e.getCount() * v;
- }
-
- edgeIndex += numTags;
- }
- }
-// System.out.println("computeObjectiveAndGradient logz=" + objective);
-// System.out.println("lambda= " + Arrays.toString(parameters));
-// System.out.println("gradient=" + Arrays.toString(gradient));
- }
-
- public String toString()
- {
- StringBuilder sb = new StringBuilder();
- sb.append(getClass().getCanonicalName()).append(" with ");
- sb.append(parameters.length).append(" parameters and ");
- sb.append(training.getNumPhrases() * numTags).append(" constraints");
- return sb.toString();
- }
-
- double primal()
- {
- // primal = llh + KL(q||p) + scale * sum_pt max_c E_q[phi_pct]
- // kl = sum_Y q(Y) log q(Y) / p(Y|X)
- // = sum_Y q(Y) { -lambda . phi(Y) - log Z }
- // = -log Z - lambda . E_q[phi]
- // = -objective + lambda . gradient
-
- double kl = -objective + MathUtils.dotProduct(parameters, gradient);
- double l1lmax = 0;
- for (int i = 0; i < training.getNumPhrases(); ++i)
- {
- List<Corpus.Edge> edges = training.getEdgesForPhrase(i);
- for (int t = 0; t < numTags; t++)
- {
- double lmax = Double.NEGATIVE_INFINITY;
- for (int j = 0; j < edges.size(); ++j)
- lmax = max(lmax, q.get(i).get(j).get(t));
- l1lmax += lmax;
- }
- }
-
- return llh + kl + constraintScale * l1lmax;
- }
- }
-}
diff --git a/gi/posterior-regularisation/README b/gi/posterior-regularisation/README
deleted file mode 100644
index a3d54ffc..00000000
--- a/gi/posterior-regularisation/README
+++ /dev/null
@@ -1,3 +0,0 @@
- 557 ./cdec_extools/extractor -i btec/split.zh-en.al -c 500000 -L 12 -C | sort -t $'\t' -k 1 | ./cdec_extools/mr_stripe_rule_reduce > btec.concordance
- 559 wc -l btec.concordance
- 588 cat btec.concordance | sed 's/.* //' | awk '{ for (i=1; i < NF; i++) { x=substr($i, 1, 2); if (x == "C=") printf "\n"; else if (x != "||") printf "%s ", $i; }; printf "\n"; }' | sort | uniq | wc -l
diff --git a/gi/posterior-regularisation/alphabet.hh b/gi/posterior-regularisation/alphabet.hh
deleted file mode 100644
index 1db928da..00000000
--- a/gi/posterior-regularisation/alphabet.hh
+++ /dev/null
@@ -1,61 +0,0 @@
-#ifndef _alphabet_hh
-#define _alphabet_hh
-
-#include <cassert>
-#include <iosfwd>
-#include <map>
-#include <string>
-#include <vector>
-
-// Alphabet: indexes a set of types
-template <typename T>
-class Alphabet: protected std::map<T, int>
-{
-public:
- Alphabet() {};
-
- bool empty() const { return std::map<T,int>::empty(); }
- int size() const { return std::map<T,int>::size(); }
-
- int operator[](const T &k) const
- {
- typename std::map<T,int>::const_iterator cit = find(k);
- if (cit != std::map<T,int>::end())
- return cit->second;
- else
- return -1;
- }
-
- int lookup(const T &k) const { return (*this)[k]; }
-
- int insert(const T &k)
- {
- int sz = size();
- assert((unsigned) sz == _items.size());
-
- std::pair<typename std::map<T,int>::iterator, bool>
- ins = std::map<T,int>::insert(make_pair(k, sz));
-
- if (ins.second)
- _items.push_back(k);
-
- return ins.first->second;
- }
-
- const T &type(int i) const
- {
- assert(i >= 0);
- assert(i < size());
- return _items[i];
- }
-
- std::ostream &display(std::ostream &out, int i) const
- {
- return out << type(i);
- }
-
-private:
- std::vector<T> _items;
-};
-
-#endif
diff --git a/gi/posterior-regularisation/canned.concordance b/gi/posterior-regularisation/canned.concordance
deleted file mode 100644
index 710973ff..00000000
--- a/gi/posterior-regularisation/canned.concordance
+++ /dev/null
@@ -1,4 +0,0 @@
-a 0 0 <PHRASE> 0 0 ||| C=1 ||| 1 1 <PHRASE> 1 1 ||| C=1 ||| 2 2 <PHRASE> 2 2 ||| C=1
-b 0 0 <PHRASE> 0 0 ||| C=1 ||| 1 1 <PHRASE> 1 1 ||| C=1
-c 2 2 <PHRASE> 2 2 ||| C=1 ||| 4 4 <PHRASE> 4 4 ||| C=1 ||| 5 5 <PHRASE> 5 5 ||| C=1
-d 4 4 <PHRASE> 4 4 ||| C=1 ||| 5 5 <PHRASE> 5 5 ||| C=1
diff --git a/gi/posterior-regularisation/em.cc b/gi/posterior-regularisation/em.cc
deleted file mode 100644
index f6c9fd68..00000000
--- a/gi/posterior-regularisation/em.cc
+++ /dev/null
@@ -1,830 +0,0 @@
-// Input of the form:
-// " the phantom of the opera " tickets for <PHRASE> tonight ? ||| C=1 ||| seats for <PHRASE> ? </s> ||| C=1 ||| i see <PHRASE> ? </s> ||| C=1
-// phrase TAB [context]+
-// where context = phrase ||| C=... which are separated by |||
-
-// Model parameterised as follows:
-// - each phrase, p, is allocated a latent state, t
-// - this is used to generate the contexts, c
-// - each context is generated using 4 independent multinomials, one for each position LL, L, R, RR
-
-// Training with EM:
-// - e-step is estimating P(t|p,c) for all x,c
-// - m-step is estimating model parameters P(p,c,t) = P(t) P(p|t) P(c|t)
-
-// Sexing it up:
-// - constrain the posteriors P(t|c) and P(t|p) to have few high-magnitude entries
-// - improve the generation of phrase internals, e.g., generate edge words from
-// different distribution to central words
-
-#include "alphabet.hh"
-#include "log_add.hh"
-#include <algorithm>
-#include <fstream>
-#include <iostream>
-#include <iterator>
-#include <map>
-#include <sstream>
-#include <stdexcept>
-#include <vector>
-#include <tr1/random>
-#include <tr1/tuple>
-#include <nlopt.h>
-
-using namespace std;
-using namespace std::tr1;
-
-const int numTags = 5;
-const int numIterations = 100;
-const bool posterior_regularisation = true;
-const double PHRASE_VIOLATION_WEIGHT = 10;
-const double CONTEXT_VIOLATION_WEIGHT = 0;
-const bool includePhraseProb = false;
-
-// Data structures:
-Alphabet<string> lexicon;
-typedef vector<int> Phrase;
-typedef tuple<int, int, int, int> Context;
-Alphabet<Phrase> phrases;
-Alphabet<Context> contexts;
-
-typedef map<int, int> ContextCounts;
-typedef map<int, int> PhraseCounts;
-typedef map<int, ContextCounts> PhraseToContextCounts;
-typedef map<int, PhraseCounts> ContextToPhraseCounts;
-
-PhraseToContextCounts concordancePhraseToContexts;
-ContextToPhraseCounts concordanceContextToPhrases;
-
-typedef vector<double> Dist;
-typedef vector<Dist> ConditionalDist;
-Dist prior; // class -> P(class)
-vector<ConditionalDist> probCtx; // word -> class -> P(word | class), for each position of context word
-ConditionalDist probPhrase; // class -> P(word | class)
-Dist probPhraseLength; // class -> P(length | class) expressed as geometric distribution parameter
-
-mt19937 randomGenerator((size_t) time(NULL));
-uniform_real<double> uniDist(0.0, 1e-1);
-variate_generator< mt19937, uniform_real<double> > rng(randomGenerator, uniDist);
-
-void addRandomNoise(Dist &d);
-void normalise(Dist &d);
-void addTo(Dist &d, const Dist &e);
-int argmax(const Dist &d);
-
-map<Phrase, map<Context, int> > lambda_indices;
-
-Dist conditional_probs(const Phrase &phrase, const Context &context, double *normalisation = 0);
-template <typename T>
-Dist
-penalised_conditionals(const Phrase &phrase, const Context &context,
- const T &lambda, double *normalisation);
-//Dist penalised_conditionals(const Phrase &phrase, const Context &context, const double *lambda, double *normalisation = 0);
-double penalised_log_likelihood(int n, const double *lambda, double *gradient, void *data);
-void optimise_lambda(double delta, double gamma, vector<double> &lambda);
-double expected_violation_phrases(const double *lambda);
-double expected_violation_contexts(const double *lambda);
-double primal_kl_divergence(const double *lambda);
-double dual(const double *lambda);
-void print_primal_dual(const double *lambda, double delta, double gamma);
-
-ostream &operator<<(ostream &, const Phrase &);
-ostream &operator<<(ostream &, const Context &);
-ostream &operator<<(ostream &, const Dist &);
-ostream &operator<<(ostream &, const ConditionalDist &);
-
-int
-main(int argc, char *argv[])
-{
- randomGenerator.seed(time(NULL));
-
- int edges = 0;
- istream &input = cin;
- while (input.good())
- {
- // read the phrase
- string phraseString;
- Phrase phrase;
- getline(input, phraseString, '\t');
- istringstream pinput(phraseString);
- string token;
- while (pinput >> token)
- phrase.push_back(lexicon.insert(token));
- int phraseId = phrases.insert(phrase);
-
- // read the rest, storing each context
- string remainder;
- getline(input, remainder, '\n');
- istringstream rinput(remainder);
- Context context(-1, -1, -1, -1);
- int index = 0;
- while (rinput >> token)
- {
- if (token != "|||" && token != "<PHRASE>")
- {
- if (index < 4)
- {
- // eugh! damn templates
- switch (index)
- {
- case 0: get<0>(context) = lexicon.insert(token); break;
- case 1: get<1>(context) = lexicon.insert(token); break;
- case 2: get<2>(context) = lexicon.insert(token); break;
- case 3: get<3>(context) = lexicon.insert(token); break;
- default: assert(false);
- }
- index += 1;
- }
- else if (token.find("C=") == 0)
- {
- int contextId = contexts.insert(context);
- int count = atoi(token.substr(strlen("C=")).c_str());
- concordancePhraseToContexts[phraseId][contextId] += count;
- concordanceContextToPhrases[contextId][phraseId] += count;
- index = 0;
- context = Context(-1, -1, -1, -1);
- edges += 1;
- }
- }
- }
-
- // trigger EOF
- input >> ws;
- }
-
- cout << "Read in " << phrases.size() << " phrases"
- << " and " << contexts.size() << " contexts"
- << " and " << edges << " edges"
- << " and " << lexicon.size() << " word types\n";
-
- // FIXME: filter out low count phrases and low count contexts (based on individual words?)
- // now populate model parameters with uniform + random noise
- prior.resize(numTags, 1.0);
- addRandomNoise(prior);
- normalise(prior);
-
- probCtx.resize(4, ConditionalDist(numTags, Dist(lexicon.size(), 1.0)));
- if (includePhraseProb)
- probPhrase.resize(numTags, Dist(lexicon.size(), 1.0));
- for (int t = 0; t < numTags; ++t)
- {
- for (int j = 0; j < 4; ++j)
- {
- addRandomNoise(probCtx[j][t]);
- normalise(probCtx[j][t]);
- }
- if (includePhraseProb)
- {
- addRandomNoise(probPhrase[t]);
- normalise(probPhrase[t]);
- }
- }
- if (includePhraseProb)
- {
- probPhraseLength.resize(numTags, 0.5); // geometric distribution p=0.5
- addRandomNoise(probPhraseLength);
- }
-
- cout << "\tprior: " << prior << "\n";
- //cout << "\tcontext: " << probCtx << "\n";
- //cout << "\tphrase: " << probPhrase << "\n";
- //cout << "\tphraseLen: " << probPhraseLength << endl;
-
- vector<double> lambda;
-
- // now do EM training
- for (int iteration = 0; iteration < numIterations; ++iteration)
- {
- cout << "EM iteration " << iteration << endl;
-
- if (posterior_regularisation)
- optimise_lambda(PHRASE_VIOLATION_WEIGHT, CONTEXT_VIOLATION_WEIGHT, lambda);
- //cout << "\tlambda " << lambda << endl;
-
- Dist countsPrior(numTags, 0.0);
- vector<ConditionalDist> countsCtx(4, ConditionalDist(numTags, Dist(lexicon.size(), 1e-10)));
- ConditionalDist countsPhrase(numTags, Dist(lexicon.size(), 1e-10));
- Dist countsPhraseLength(numTags, 0.0);
- Dist nPhrases(numTags, 0.0);
-
- double llh = 0;
- for (PhraseToContextCounts::iterator pcit = concordancePhraseToContexts.begin();
- pcit != concordancePhraseToContexts.end(); ++pcit)
- {
- const Phrase &phrase = phrases.type(pcit->first);
-
- // e-step: estimate latent class probs; compile (class,word) stats for m-step
- for (ContextCounts::iterator ccit = pcit->second.begin();
- ccit != pcit->second.end(); ++ccit)
- {
- const Context &context = contexts.type(ccit->first);
-
- double z = 0;
- Dist tagCounts;
- if (!posterior_regularisation)
- tagCounts = conditional_probs(phrase, context, &z);
- else
- tagCounts = penalised_conditionals(phrase, context, lambda, &z);
-
- llh += log(z) * ccit->second;
- addTo(countsPrior, tagCounts); // FIXME: times ccit->secon
-
- for (int t = 0; t < numTags; ++t)
- {
- for (int j = 0; j < 4; ++j)
- countsCtx[j][t][get<0>(context)] += tagCounts[t] * ccit->second;
-
- if (includePhraseProb)
- {
- for (Phrase::const_iterator pit = phrase.begin(); pit != phrase.end(); ++pit)
- countsPhrase[t][*pit] += tagCounts[t] * ccit->second;
- countsPhraseLength[t] += phrase.size() * tagCounts[t] * ccit->second;
- nPhrases[t] += tagCounts[t] * ccit->second;
- }
- }
- }
- }
-
- cout << "M-step\n";
-
- // m-step: normalise prior and (class,word) stats and assign to model parameters
- normalise(countsPrior);
- prior = countsPrior;
- for (int t = 0; t < numTags; ++t)
- {
- //cout << "\t\tt " << t << " prior " << countsPrior[t] << "\n";
- for (int j = 0; j < 4; ++j)
- normalise(countsCtx[j][t]);
- if (includePhraseProb)
- {
- normalise(countsPhrase[t]);
- countsPhraseLength[t] = nPhrases[t] / countsPhraseLength[t];
- }
- }
- probCtx = countsCtx;
- if (includePhraseProb)
- {
- probPhrase = countsPhrase;
- probPhraseLength = countsPhraseLength;
- }
-
- double *larray = new double[lambda.size()];
- copy(lambda.begin(), lambda.end(), larray);
- print_primal_dual(larray, PHRASE_VIOLATION_WEIGHT, CONTEXT_VIOLATION_WEIGHT);
- delete [] larray;
-
- //cout << "\tllh " << llh << endl;
- //cout << "\tprior: " << prior << "\n";
- //cout << "\tcontext: " << probCtx << "\n";
- //cout << "\tphrase: " << probPhrase << "\n";
- //cout << "\tphraseLen: " << probPhraseLength << "\n";
- }
-
- // output class membership
- for (PhraseToContextCounts::iterator pcit = concordancePhraseToContexts.begin();
- pcit != concordancePhraseToContexts.end(); ++pcit)
- {
- const Phrase &phrase = phrases.type(pcit->first);
- for (ContextCounts::iterator ccit = pcit->second.begin();
- ccit != pcit->second.end(); ++ccit)
- {
- const Context &context = contexts.type(ccit->first);
- Dist tagCounts = conditional_probs(phrase, context, 0);
- cout << phrase << " ||| " << context << " ||| " << argmax(tagCounts) << "\n";
- }
- }
-
- return 0;
-}
-
-void addRandomNoise(Dist &d)
-{
- for (Dist::iterator dit = d.begin(); dit != d.end(); ++dit)
- *dit += rng();
-}
-
-void normalise(Dist &d)
-{
- double z = 0;
- for (Dist::iterator dit = d.begin(); dit != d.end(); ++dit)
- z += *dit;
- for (Dist::iterator dit = d.begin(); dit != d.end(); ++dit)
- *dit /= z;
-}
-
-void addTo(Dist &d, const Dist &e)
-{
- assert(d.size() == e.size());
- for (int i = 0; i < (int) d.size(); ++i)
- d[i] += e[i];
-}
-
-int argmax(const Dist &d)
-{
- double best = d[0];
- int index = 0;
- for (int i = 1; i < (int) d.size(); ++i)
- {
- if (d[i] > best)
- {
- best = d[i];
- index = i;
- }
- }
- return index;
-}
-
-ostream &operator<<(ostream &out, const Phrase &phrase)
-{
- for (Phrase::const_iterator pit = phrase.begin(); pit != phrase.end(); ++pit)
- lexicon.display(((pit == phrase.begin()) ? out : out << " "), *pit);
- return out;
-}
-
-ostream &operator<<(ostream &out, const Context &context)
-{
- lexicon.display(out, get<0>(context));
- lexicon.display(out << " ", get<1>(context));
- lexicon.display(out << " <PHRASE> ", get<2>(context));
- lexicon.display(out << " ", get<3>(context));
- return out;
-}
-
-ostream &operator<<(ostream &out, const Dist &dist)
-{
- for (Dist::const_iterator dit = dist.begin(); dit != dist.end(); ++dit)
- out << ((dit == dist.begin()) ? "" : " ") << *dit;
- return out;
-}
-
-ostream &operator<<(ostream &out, const ConditionalDist &dist)
-{
- for (ConditionalDist::const_iterator dit = dist.begin(); dit != dist.end(); ++dit)
- out << ((dit == dist.begin()) ? "" : "; ") << *dit;
- return out;
-}
-
-// FIXME: slow - just use the phrase index, context index to do the mapping
-// (n.b. it's a sparse setup, not just equal to 3d array index)
-int
-lambda_index(const Phrase &phrase, const Context &context, int tag)
-{
- return lambda_indices[phrase][context] + tag;
-}
-
-template <typename T>
-Dist
-penalised_conditionals(const Phrase &phrase, const Context &context,
- const T &lambda, double *normalisation)
-{
- Dist d = conditional_probs(phrase, context, 0);
-
- double z = 0;
- for (int t = 0; t < numTags; ++t)
- {
- d[t] *= exp(-lambda[lambda_index(phrase, context, t)]);
- z += d[t];
- }
-
- if (normalisation)
- *normalisation = z;
-
- for (int t = 0; t < numTags; ++t)
- d[t] /= z;
-
- return d;
-}
-
-Dist
-conditional_probs(const Phrase &phrase, const Context &context, double *normalisation)
-{
- Dist tagCounts(numTags, 0.0);
- double z = 0;
- for (int t = 0; t < numTags; ++t)
- {
- double prob = prior[t];
- prob *= (probCtx[0][t][get<0>(context)] * probCtx[1][t][get<1>(context)] *
- probCtx[2][t][get<2>(context)] * probCtx[3][t][get<3>(context)]);
-
- if (includePhraseProb)
- {
- prob *= pow(1 - probPhraseLength[t], phrase.size() - 1) * probPhraseLength[t];
- for (Phrase::const_iterator pit = phrase.begin(); pit != phrase.end(); ++pit)
- prob *= probPhrase[t][*pit];
- }
-
- tagCounts[t] = prob;
- z += prob;
- }
- if (normalisation)
- *normalisation = z;
-
- for (int t = 0; t < numTags; ++t)
- tagCounts[t] /= z;
-
- return tagCounts;
-}
-
-double
-penalised_log_likelihood(int n, const double *lambda, double *grad, void *)
-{
- // return log Z(lambda, theta) over the corpus
- // where theta are the global parameters (prior, probCtx*, probPhrase*)
- // and lambda are lagrange multipliers for the posterior sparsity constraints
- //
- // this is formulated as:
- // f = log Z(lambda) = sum_i log ( sum_i p_theta(t_i|p_i,c_i) exp [-lambda_{t_i,p_i,c_i}] )
- // where i indexes the training examples - specifying the (p, c) pair (which may occur with count > 1)
- //
- // with derivative:
- // f'_{tpc} = frac { - count(t,p,c) p_theta(t|p,c) exp (-lambda_{t,p,c}) }
- // { sum_t' p_theta(t'|p,c) exp (-lambda_{t',p,c}) }
-
- //cout << "penalised_log_likelihood with lambda ";
- //copy(lambda, lambda+n, ostream_iterator<double>(cout, " "));
- //cout << "\n";
-
- double f = 0;
- if (grad)
- {
- for (int i = 0; i < n; ++i)
- grad[i] = 0.0;
- }
-
- for (int p = 0; p < phrases.size(); ++p)
- {
- const Phrase &phrase = phrases.type(p);
- PhraseToContextCounts::const_iterator pcit = concordancePhraseToContexts.find(p);
- for (ContextCounts::const_iterator ccit = pcit->second.begin();
- ccit != pcit->second.end(); ++ccit)
- {
- const Context &context = contexts.type(ccit->first);
- double z = 0;
- Dist scores = penalised_conditionals(phrase, context, lambda, &z);
-
- f += ccit->second * log(z);
- //cout << "\tphrase: " << phrase << " context: " << context << " count: " << ccit->second << " z " << z << endl;
- //cout << "\t\tscores: " << scores << "\n";
-
- if (grad)
- {
- for (int t = 0; t < numTags; ++t)
- {
- int i = lambda_index(phrase, context, t); // FIXME: redundant lookups
- assert(grad[i] == 0.0);
- grad[i] = - ccit->second * scores[t];
- }
- }
- }
- }
-
- //cout << "penalised_log_likelihood returning " << f;
- //if (grad)
- //{
- //cout << "\ngradient: ";
- //copy(grad, grad+n, ostream_iterator<double>(cout, " "));
- //}
- //cout << "\n";
-
- return f;
-}
-
-typedef struct
-{
- // one of p or c should be set to -1, in which case it will be marginalised out
- // i.e. sum_p' lambda_{p'ct} <= threshold
- // or sum_c' lambda_{pc't} <= threshold
- int p, c, t, threshold;
-} constraint_data;
-
-double
-constraint_and_gradient(int n, const double *lambda, double *grad, void *data)
-{
- constraint_data *d = (constraint_data *) data;
- assert(d->t >= 0);
- assert(d->threshold >= 0);
-
- //cout << "constraint_and_gradient: t " << d->t << " p " << d->p << " c " << d->c << " tau " << d->threshold << endl;
- //cout << "\tlambda ";
- //copy(lambda, lambda+n, ostream_iterator<double>(cout, " "));
- //cout << "\n";
-
- // FIXME: it's crazy to use a dense gradient here => will only have a handful of non-zero entries
- if (grad)
- {
- for (int i = 0; i < n; ++i)
- grad[i] = 0.0;
- }
-
- //cout << "constraint_and_gradient: " << d->p << "; " << d->c << "; " << d->t << "; " << d->threshold << endl;
-
- if (d->p >= 0)
- {
- assert(d->c < 0);
- // sum_c lambda_pct <= delta [a.k.a. threshold]
- // => sum_c lambda_pct - delta <= 0
- // derivative_pct = { 1, if p and t match; 0, otherwise }
-
- double val = -d->threshold;
-
- const Phrase &phrase = phrases.type(d->p);
- PhraseToContextCounts::const_iterator pcit = concordancePhraseToContexts.find(d->p);
- assert(pcit != concordancePhraseToContexts.end());
- for (ContextCounts::const_iterator ccit = pcit->second.begin();
- ccit != pcit->second.end(); ++ccit)
- {
- const Context &context = contexts.type(ccit->first);
- int i = lambda_index(phrase, context, d->t);
- val += lambda[i];
- if (grad) grad[i] = 1;
- }
- //cout << "\treturning " << val << endl;
-
- return val;
- }
- else
- {
- assert(d->c >= 0);
- assert(d->p < 0);
- // sum_p lambda_pct <= gamma [a.k.a. threshold]
- // => sum_p lambda_pct - gamma <= 0
- // derivative_pct = { 1, if c and t match; 0, otherwise }
-
- double val = -d->threshold;
-
- const Context &context = contexts.type(d->c);
- ContextToPhraseCounts::iterator cpit = concordanceContextToPhrases.find(d->c);
- assert(cpit != concordanceContextToPhrases.end());
- for (PhraseCounts::iterator pcit = cpit->second.begin();
- pcit != cpit->second.end(); ++pcit)
- {
- const Phrase &phrase = phrases.type(pcit->first);
- int i = lambda_index(phrase, context, d->t);
- val += lambda[i];
- if (grad) grad[i] = 1;
- }
- //cout << "\treturning " << val << endl;
-
- return val;
- }
-}
-
-void
-optimise_lambda(double delta, double gamma, vector<double> &lambdav)
-{
- int num_lambdas = lambdav.size();
- if (lambda_indices.empty() || lambdav.empty())
- {
- lambda_indices.clear();
- lambdav.clear();
-
- int i = 0;
- for (int p = 0; p < phrases.size(); ++p)
- {
- const Phrase &phrase = phrases.type(p);
- PhraseToContextCounts::iterator pcit = concordancePhraseToContexts.find(p);
- for (ContextCounts::iterator ccit = pcit->second.begin();
- ccit != pcit->second.end(); ++ccit)
- {
- const Context &context = contexts.type(ccit->first);
- lambda_indices[phrase][context] = i;
- i += numTags;
- }
- }
- num_lambdas = i;
- lambdav.resize(num_lambdas);
- }
- //cout << "optimise_lambda: #langrange multipliers " << num_lambdas << endl;
-
- // FIXME: better to work with an implicit representation to save memory usage
- int num_constraints = (((delta > 0) ? phrases.size() : 0) + ((gamma > 0) ? contexts.size() : 0)) * numTags;
- //cout << "optimise_lambda: #constraints " << num_constraints << endl;
- constraint_data *data = new constraint_data[num_constraints];
- int i = 0;
- if (delta > 0)
- {
- for (int p = 0; p < phrases.size(); ++p)
- {
- for (int t = 0; t < numTags; ++t, ++i)
- {
- constraint_data &d = data[i];
- d.p = p;
- d.c = -1;
- d.t = t;
- d.threshold = delta;
- }
- }
- }
-
- if (gamma > 0)
- {
- for (int c = 0; c < contexts.size(); ++c)
- {
- for (int t = 0; t < numTags; ++t, ++i)
- {
- constraint_data &d = data[i];
- d.p = -1;
- d.c = c;
- d.t = t;
- d.threshold = gamma;
- }
- }
- }
- assert(i == num_constraints);
-
- double lambda[num_lambdas];
- double lb[num_lambdas], ub[num_lambdas];
- for (i = 0; i < num_lambdas; ++i)
- {
- lambda[i] = lambdav[i]; // starting value
- lb[i] = 0; // lower bound
- if (delta <= 0) // upper bound
- ub[i] = gamma;
- else if (gamma <= 0)
- ub[i] = delta;
- else
- assert(false);
- }
-
- //print_primal_dual(lambda, delta, gamma);
-
- double minf;
- int error_code = nlopt_minimize_constrained(NLOPT_LN_COBYLA, num_lambdas, penalised_log_likelihood, NULL,
- num_constraints, constraint_and_gradient, data, sizeof(constraint_data),
- lb, ub, lambda, &minf, -HUGE_VAL, 0.0, 0.0, 1e-4, NULL, 0, 0.0);
- //cout << "optimise error code " << error_code << endl;
-
- //print_primal_dual(lambda, delta, gamma);
-
- delete [] data;
-
- if (error_code < 0)
- cout << "WARNING: optimisation failed with error code: " << error_code << endl;
- //else
- //{
- //cout << "success; minf " << minf << endl;
- //print_primal_dual(lambda, delta, gamma);
- //}
-
- lambdav = vector<double>(&lambda[0], &lambda[0] + num_lambdas);
-}
-
-// FIXME: inefficient - cache the scores
-double
-expected_violation_phrases(const double *lambda)
-{
- // sum_pt max_c E_q[phi_pct]
- double violation = 0;
-
- for (int p = 0; p < phrases.size(); ++p)
- {
- const Phrase &phrase = phrases.type(p);
- PhraseToContextCounts::const_iterator pcit = concordancePhraseToContexts.find(p);
-
- for (int t = 0; t < numTags; ++t)
- {
- double best = 0;
- for (ContextCounts::const_iterator ccit = pcit->second.begin();
- ccit != pcit->second.end(); ++ccit)
- {
- const Context &context = contexts.type(ccit->first);
- Dist scores = penalised_conditionals(phrase, context, lambda, 0);
- best = max(best, scores[t]);
- }
- violation += best;
- }
- }
-
- return violation;
-}
-
-// FIXME: inefficient - cache the scores
-double
-expected_violation_contexts(const double *lambda)
-{
- // sum_ct max_p E_q[phi_pct]
- double violation = 0;
-
- for (int c = 0; c < contexts.size(); ++c)
- {
- const Context &context = contexts.type(c);
- ContextToPhraseCounts::iterator cpit = concordanceContextToPhrases.find(c);
-
- for (int t = 0; t < numTags; ++t)
- {
- double best = 0;
- for (PhraseCounts::iterator pit = cpit->second.begin();
- pit != cpit->second.end(); ++pit)
- {
- const Phrase &phrase = phrases.type(pit->first);
- Dist scores = penalised_conditionals(phrase, context, lambda, 0);
- best = max(best, scores[t]);
- }
- violation += best;
- }
- }
-
- return violation;
-}
-
-// FIXME: possibly inefficient
-double
-primal_likelihood() // FIXME: primal evaluation needs to use lambda and calculate l1linf terms
-{
- double llh = 0;
- for (int p = 0; p < phrases.size(); ++p)
- {
- const Phrase &phrase = phrases.type(p);
- PhraseToContextCounts::const_iterator pcit = concordancePhraseToContexts.find(p);
- for (ContextCounts::const_iterator ccit = pcit->second.begin();
- ccit != pcit->second.end(); ++ccit)
- {
- const Context &context = contexts.type(ccit->first);
- double z = 0;
- Dist scores = conditional_probs(phrase, context, &z);
- llh += ccit->second * log(z);
- }
- }
- return llh;
-}
-
-// FIXME: inefficient - cache the scores
-double
-primal_kl_divergence(const double *lambda)
-{
- // return KL(q || p) = sum_y q(y) { log q(y) - log p(y | x) }
- // = sum_y q(y) { log p(y | x) - lambda . phi(x, y) - log Z - log p(y | x) }
- // = sum_y q(y) { - lambda . phi(x, y) } - log Z
- // and q(y) factors with each edge, ditto for Z
-
- double feature_sum = 0, log_z = 0;
- for (int p = 0; p < phrases.size(); ++p)
- {
- const Phrase &phrase = phrases.type(p);
- PhraseToContextCounts::const_iterator pcit = concordancePhraseToContexts.find(p);
- for (ContextCounts::const_iterator ccit = pcit->second.begin();
- ccit != pcit->second.end(); ++ccit)
- {
- const Context &context = contexts.type(ccit->first);
-
- double local_z = 0;
- double local_f = 0;
- Dist d = conditional_probs(phrase, context, 0);
- for (int t = 0; t < numTags; ++t)
- {
- int i = lambda_index(phrase, context, t);
- double s = d[t] * exp(-lambda[i]);
- local_f += lambda[i] * s;
- local_z += s;
- }
-
- log_z += ccit->second * log(local_z);
- feature_sum += ccit->second * (local_f / local_z);
- }
- }
-
- return -feature_sum - log_z;
-}
-
-// FIXME: inefficient - cache the scores
-double
-dual(const double *lambda)
-{
- // return log(Z) = - log { sum_y p(y | x) exp( - lambda . phi(x, y) }
- // n.b. have flipped the sign as we're minimising
-
- double z = 0;
- for (int p = 0; p < phrases.size(); ++p)
- {
- const Phrase &phrase = phrases.type(p);
- PhraseToContextCounts::const_iterator pcit = concordancePhraseToContexts.find(p);
- for (ContextCounts::const_iterator ccit = pcit->second.begin();
- ccit != pcit->second.end(); ++ccit)
- {
- const Context &context = contexts.type(ccit->first);
- double lz = 0;
- Dist scores = penalised_conditionals(phrase, context, lambda, &z);
- z += lz * ccit->second;
- }
- }
- return log(z);
-}
-
-void
-print_primal_dual(const double *lambda, double delta, double gamma)
-{
- double likelihood = primal_likelihood();
- double kl = primal_kl_divergence(lambda);
- double sum_pt = expected_violation_phrases(lambda);
- double sum_ct = expected_violation_contexts(lambda);
- //double d = dual(lambda);
-
- cout << "\tllh=" << likelihood
- << " kl=" << kl
- << " violations phrases=" << sum_pt
- << " contexts=" << sum_ct
- //<< " primal=" << (kl + delta * sum_pt + gamma * sum_ct)
- //<< " dual=" << d
- << " objective=" << (likelihood - kl + delta * sum_pt + gamma * sum_ct)
- << endl;
-}
diff --git a/gi/posterior-regularisation/invert.hh b/gi/posterior-regularisation/invert.hh
deleted file mode 100644
index d06356e9..00000000
--- a/gi/posterior-regularisation/invert.hh
+++ /dev/null
@@ -1,45 +0,0 @@
-// The following code inverts the matrix input using LU-decomposition with
-// backsubstitution of unit vectors. Reference: Numerical Recipies in C, 2nd
-// ed., by Press, Teukolsky, Vetterling & Flannery.
-// Code written by Fredrik Orderud.
-// http://www.crystalclearsoftware.com/cgi-bin/boost_wiki/wiki.pl?LU_Matrix_Inversion
-
-#ifndef INVERT_MATRIX_HPP
-#define INVERT_MATRIX_HPP
-
-// REMEMBER to update "lu.hpp" header includes from boost-CVS
-#include <boost/numeric/ublas/vector.hpp>
-#include <boost/numeric/ublas/vector_proxy.hpp>
-#include <boost/numeric/ublas/matrix.hpp>
-#include <boost/numeric/ublas/triangular.hpp>
-#include <boost/numeric/ublas/lu.hpp>
-#include <boost/numeric/ublas/io.hpp>
-
-namespace ublas = boost::numeric::ublas;
-
-/* Matrix inversion routine.
- Uses lu_factorize and lu_substitute in uBLAS to invert a matrix */
-template<class T>
-bool invert_matrix(const ublas::matrix<T>& input, ublas::matrix<T>& inverse)
-{
- using namespace boost::numeric::ublas;
- typedef permutation_matrix<std::size_t> pmatrix;
- // create a working copy of the input
- matrix<T> A(input);
- // create a permutation matrix for the LU-factorization
- pmatrix pm(A.size1());
-
- // perform LU-factorization
- int res = lu_factorize(A,pm);
- if( res != 0 ) return false;
-
- // create identity matrix of "inverse"
- inverse.assign(ublas::identity_matrix<T>(A.size1()));
-
- // backsubstitute to get the inverse
- lu_substitute(A, pm, inverse);
-
- return true;
-}
-
-#endif //INVERT_MATRIX_HPP
diff --git a/gi/posterior-regularisation/linesearch.py b/gi/posterior-regularisation/linesearch.py
deleted file mode 100644
index 5a3f2e9c..00000000
--- a/gi/posterior-regularisation/linesearch.py
+++ /dev/null
@@ -1,58 +0,0 @@
-## Automatically adapted for scipy Oct 07, 2005 by convertcode.py
-
-from scipy.optimize import minpack2
-import numpy
-
-import __builtin__
-pymin = __builtin__.min
-
-def line_search(f, myfprime, xk, pk, gfk, old_fval, old_old_fval,
- args=(), c1=1e-4, c2=0.9, amax=50):
-
- fc = 0
- gc = 0
- phi0 = old_fval
- derphi0 = numpy.dot(gfk,pk)
- alpha1 = pymin(1.0,1.01*2*(phi0-old_old_fval)/derphi0)
- # trevor: added this test
- alpha1 = pymin(alpha1,amax)
-
- if isinstance(myfprime,type(())):
- eps = myfprime[1]
- fprime = myfprime[0]
- newargs = (f,eps) + args
- gradient = False
- else:
- fprime = myfprime
- newargs = args
- gradient = True
-
- xtol = 1e-14
- amin = 1e-8
- isave = numpy.zeros((2,), numpy.intc)
- dsave = numpy.zeros((13,), float)
- task = 'START'
- fval = old_fval
- gval = gfk
-
- while 1:
- stp,fval,derphi,task = minpack2.dcsrch(alpha1, phi0, derphi0, c1, c2,
- xtol, task, amin, amax,isave,dsave)
- #print 'minpack2.dcsrch', alpha1, phi0, derphi0, c1, c2, xtol, task, amin, amax,isave,dsave
- #print 'returns', stp,fval,derphi,task
-
- if task[:2] == 'FG':
- alpha1 = stp
- fval = f(xk+stp*pk,*args)
- fc += 1
- gval = fprime(xk+stp*pk,*newargs)
- if gradient: gc += 1
- else: fc += len(xk) + 1
- phi0 = fval
- derphi0 = numpy.dot(gval,pk)
- else:
- break
-
- if task[:5] == 'ERROR' or task[1:4] == 'WARN':
- stp = None # failed
- return stp, fc, gc, fval, old_fval, gval
diff --git a/gi/posterior-regularisation/log_add.hh b/gi/posterior-regularisation/log_add.hh
deleted file mode 100644
index e0620c5a..00000000
--- a/gi/posterior-regularisation/log_add.hh
+++ /dev/null
@@ -1,30 +0,0 @@
-#ifndef log_add_hh
-#define log_add_hh
-
-#include <limits>
-#include <iostream>
-#include <cassert>
-#include <cmath>
-
-template <typename T>
-struct Log
-{
- static T zero() { return -std::numeric_limits<T>::infinity(); }
-
- static T add(T l1, T l2)
- {
- if (l1 == zero()) return l2;
- if (l1 > l2)
- return l1 + std::log(1 + exp(l2 - l1));
- else
- return l2 + std::log(1 + exp(l1 - l2));
- }
-
- static T subtract(T l1, T l2)
- {
- //std::assert(l1 >= l2);
- return l1 + log(1 - exp(l2 - l1));
- }
-};
-
-#endif
diff --git a/gi/posterior-regularisation/prjava.jar b/gi/posterior-regularisation/prjava.jar
deleted file mode 120000
index da8bf761..00000000
--- a/gi/posterior-regularisation/prjava.jar
+++ /dev/null
@@ -1 +0,0 @@
-prjava/prjava-20100708.jar \ No newline at end of file
diff --git a/gi/posterior-regularisation/prjava/Makefile b/gi/posterior-regularisation/prjava/Makefile
deleted file mode 100755
index bd3bfca0..00000000
--- a/gi/posterior-regularisation/prjava/Makefile
+++ /dev/null
@@ -1,8 +0,0 @@
-all:
- ant dist
-
-check:
- echo no tests
-
-clean:
- ant clean
diff --git a/gi/posterior-regularisation/prjava/build.xml b/gi/posterior-regularisation/prjava/build.xml
deleted file mode 100644
index 7222b3c8..00000000
--- a/gi/posterior-regularisation/prjava/build.xml
+++ /dev/null
@@ -1,38 +0,0 @@
-<project name="prjava" default="dist" basedir=".">
- <!-- set global properties for this build -->
- <property name="src" location="src"/>
- <property name="build" location="build"/>
- <property name="dist" location="lib"/>
- <path id="classpath">
- <pathelement location="lib/trove-2.0.2.jar"/>
- <pathelement location="lib/optimization.jar"/>
- <pathelement location="lib/jopt-simple-3.2.jar"/>
- <pathelement location="lib/commons-math-2.1.jar"/>
- </path>
-
- <target name="init">
- <!-- Create the time stamp -->
- <tstamp/>
- <!-- Create the build directory structure used by compile -->
- <mkdir dir="${build}"/>
- </target>
-
- <target name="compile" depends="init"
- description="compile the source " >
- <!-- Compile the java code from ${src} into ${build} -->
- <javac srcdir="${src}" destdir="${build}" includeantruntime="false">
- <classpath refid="classpath"/>
- </javac>
- </target>
-
- <target name="dist" depends="compile"
- description="generate the distribution" >
- <jar jarfile="${dist}/prjava-${DSTAMP}.jar" basedir="${build}"/>
- <symlink link="./prjava.jar" resource="${dist}/prjava-${DSTAMP}.jar" overwrite="true"/>
- </target>
-
- <target name="clean"
- description="clean up" >
- <delete dir="${build}"/>
- </target>
-</project>
diff --git a/gi/posterior-regularisation/prjava/lib/commons-math-2.1.jar b/gi/posterior-regularisation/prjava/lib/commons-math-2.1.jar
deleted file mode 100644
index 43b4b369..00000000
--- a/gi/posterior-regularisation/prjava/lib/commons-math-2.1.jar
+++ /dev/null
Binary files differ
diff --git a/gi/posterior-regularisation/prjava/lib/jopt-simple-3.2.jar b/gi/posterior-regularisation/prjava/lib/jopt-simple-3.2.jar
deleted file mode 100644
index 56373621..00000000
--- a/gi/posterior-regularisation/prjava/lib/jopt-simple-3.2.jar
+++ /dev/null
Binary files differ
diff --git a/gi/posterior-regularisation/prjava/lib/trove-2.0.2.jar b/gi/posterior-regularisation/prjava/lib/trove-2.0.2.jar
deleted file mode 100644
index 3e59fbf3..00000000
--- a/gi/posterior-regularisation/prjava/lib/trove-2.0.2.jar
+++ /dev/null
Binary files differ
diff --git a/gi/posterior-regularisation/prjava/src/arr/F.java b/gi/posterior-regularisation/prjava/src/arr/F.java
deleted file mode 100644
index be0a6ed6..00000000
--- a/gi/posterior-regularisation/prjava/src/arr/F.java
+++ /dev/null
@@ -1,99 +0,0 @@
-package arr;
-
-import java.util.Arrays;
-import java.util.Random;
-
-public class F {
- public static Random rng = new Random();
-
- public static void randomise(double probs[])
- {
- randomise(probs, true);
- }
-
- public static void randomise(double probs[], boolean normalise)
- {
- double z = 0;
- for (int i = 0; i < probs.length; ++i)
- {
- probs[i] = 10 + rng.nextDouble();
- if (normalise)
- z += probs[i];
- }
-
- if (normalise)
- for (int i = 0; i < probs.length; ++i)
- probs[i] /= z;
- }
-
- public static void uniform(double probs[])
- {
- for (int i = 0; i < probs.length; ++i)
- probs[i] = 1.0 / probs.length;
- }
-
- public static void l1normalize(double [] a){
- double sum=0;
- for(int i=0;i<a.length;i++){
- sum+=a[i];
- }
- if(sum==0)
- Arrays.fill(a, 1.0/a.length);
- else
- {
- for(int i=0;i<a.length;i++){
- a[i]/=sum;
- }
- }
- }
-
- public static void l1normalize(double [][] a){
- double sum=0;
- for(int i=0;i<a.length;i++){
- for(int j=0;j<a[i].length;j++){
- sum+=a[i][j];
- }
- }
- if(sum==0){
- return;
- }
- for(int i=0;i<a.length;i++){
- for(int j=0;j<a[i].length;j++){
- a[i][j]/=sum;
- }
- }
- }
-
- public static double l1norm(double a[]){
- // FIXME: this isn't the l1 norm for a < 0
- double norm=0;
- for(int i=0;i<a.length;i++){
- norm += a[i];
- }
- return norm;
- }
-
- public static double l2norm(double a[]){
- double norm=0;
- for(int i=0;i<a.length;i++){
- norm += a[i]*a[i];
- }
- return Math.sqrt(norm);
- }
-
- public static int argmax(double probs[])
- {
- double m = Double.NEGATIVE_INFINITY;
- int mi = -1;
- for (int i = 0; i < probs.length; ++i)
- {
- if (probs[i] > m)
- {
- m = probs[i];
- mi = i;
- }
- }
- return mi;
- }
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/data/Corpus.java b/gi/posterior-regularisation/prjava/src/data/Corpus.java
deleted file mode 100644
index 425ede11..00000000
--- a/gi/posterior-regularisation/prjava/src/data/Corpus.java
+++ /dev/null
@@ -1,233 +0,0 @@
-package data;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Scanner;
-
-public class Corpus {
-
- public static final String alphaFilename="../posdata/corpus.alphabet";
- public static final String tagalphaFilename="../posdata/corpus.tag.alphabet";
-
-// public static final String START_SYM="<s>";
- public static final String END_SYM="<e>";
- public static final String NUM_TOK="<NUM>";
-
- public static final String UNK_TOK="<unk>";
-
- private ArrayList<String[]>sent;
- private ArrayList<int[]>data;
-
- public ArrayList<String[]>tag;
- public ArrayList<int[]>tagData;
-
- public static boolean convertNumTok=true;
-
- private HashMap<String,Integer>freq;
- public HashMap<String,Integer>vocab;
-
- public HashMap<String,Integer>tagVocab;
- private int tagV;
-
- private int V;
-
- public static void main(String[] args) {
- Corpus c=new Corpus("../posdata/en_test.conll");
- System.out.println(
- Arrays.toString(c.get(0))
- );
- System.out.println(
- Arrays.toString(c.getInt(0))
- );
-
- System.out.println(
- Arrays.toString(c.get(1))
- );
- System.out.println(
- Arrays.toString(c.getInt(1))
- );
- }
-
- public Corpus(String filename,HashMap<String,Integer>dict){
- V=0;
- tagV=0;
- freq=new HashMap<String,Integer>();
- tagVocab=new HashMap<String,Integer>();
- vocab=dict;
-
- sent=new ArrayList<String[]>();
- tag=new ArrayList<String[]>();
-
- Scanner sc=io.FileUtil.openInFile(filename);
- ArrayList<String>s=new ArrayList<String>();
- // s.add(START_SYM);
- while(sc.hasNextLine()){
- String line=sc.nextLine();
- String toks[]=line.split("\t");
- if(toks.length<2){
- s.add(END_SYM);
- sent.add(s.toArray(new String[0]));
- s=new ArrayList<String>();
- // s.add(START_SYM);
- continue;
- }
- String tok=toks[1].toLowerCase();
- s.add(tok);
- }
- sc.close();
-
- buildData();
- }
-
- public Corpus(String filename){
- V=0;
- freq=new HashMap<String,Integer>();
- vocab=new HashMap<String,Integer>();
- tagVocab=new HashMap<String,Integer>();
-
- sent=new ArrayList<String[]>();
- tag=new ArrayList<String[]>();
-
- System.out.println("Reading:"+filename);
-
- Scanner sc=io.FileUtil.openInFile(filename);
- ArrayList<String>s=new ArrayList<String>();
- ArrayList<String>tags=new ArrayList<String>();
- //s.add(START_SYM);
- while(sc.hasNextLine()){
- String line=sc.nextLine();
- String toks[]=line.split("\t");
- if(toks.length<2){
- s.add(END_SYM);
- tags.add(END_SYM);
- if(s.size()>2){
- sent.add(s.toArray(new String[0]));
- tag.add(tags.toArray(new String [0]));
- }
- s=new ArrayList<String>();
- tags=new ArrayList<String>();
- // s.add(START_SYM);
- continue;
- }
-
- String tok=toks[1].toLowerCase();
- if(convertNumTok && tok.matches(".*\\d.*")){
- tok=NUM_TOK;
- }
- s.add(tok);
-
- if(toks.length>3){
- tok=toks[3].toLowerCase();
- }else{
- tok="_";
- }
- tags.add(tok);
-
- }
- sc.close();
-
- for(int i=0;i<sent.size();i++){
- String[]toks=sent.get(i);
- for(int j=0;j<toks.length;j++){
- addVocab(toks[j]);
- addTag(tag.get(i)[j]);
- }
- }
-
- buildVocab();
- buildData();
- System.out.println(data.size()+"sentences, "+vocab.keySet().size()+" word types");
- }
-
- public String[] get(int idx){
- return sent.get(idx);
- }
-
- private void addVocab(String s){
- Integer integer=freq.get(s);
- if(integer==null){
- integer=0;
- }
- freq.put(s, integer+1);
- }
-
- public int tokIdx(String tok){
- Integer integer=vocab.get(tok);
- if(integer==null){
- return V;
- }
- return integer;
- }
-
- public int tagIdx(String tok){
- Integer integer=tagVocab.get(tok);
- if(integer==null){
- return tagV;
- }
- return integer;
- }
-
- private void buildData(){
- data=new ArrayList<int[]>();
- for(int i=0;i<sent.size();i++){
- String s[]=sent.get(i);
- data.add(new int [s.length]);
- for(int j=0;j<s.length;j++){
- data.get(i)[j]=tokIdx(s[j]);
- }
- }
-
- tagData=new ArrayList<int[]>();
- for(int i=0;i<tag.size();i++){
- String s[]=tag.get(i);
- tagData.add(new int [s.length]);
- for(int j=0;j<s.length;j++){
- tagData.get(i)[j]=tagIdx(s[j]);
- }
- }
- sent=null;
- tag=null;
- System.gc();
- }
-
- public int [] getInt(int idx){
- return data.get(idx);
- }
-
- /**
- *
- * @return size of vocabulary
- */
- public int getVocabSize(){
- return V;
- }
-
- public int [][]getAllData(){
- return data.toArray(new int [0][]);
- }
-
- public int [][]getTagData(){
- return tagData.toArray(new int [0][]);
- }
-
- private void buildVocab(){
- for (String key:freq.keySet()){
- if(freq.get(key)>2){
- vocab.put(key, V);
- V++;
- }
- }
- io.SerializedObjects.writeSerializedObject(vocab, alphaFilename);
- io.SerializedObjects.writeSerializedObject(tagVocab,tagalphaFilename);
- }
-
- private void addTag(String tag){
- Integer i=tagVocab.get(tag);
- if(i==null){
- tagVocab.put(tag, tagV);
- tagV++;
- }
- }
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/hmm/HMM.java b/gi/posterior-regularisation/prjava/src/hmm/HMM.java
deleted file mode 100644
index 17a4679f..00000000
--- a/gi/posterior-regularisation/prjava/src/hmm/HMM.java
+++ /dev/null
@@ -1,579 +0,0 @@
-package hmm;
-
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.ArrayList;
-import java.util.Scanner;
-
-public class HMM {
-
-
- //trans[i][j]=prob of going FROM i to j
- double [][]trans;
- double [][]emit;
- double []pi;
- int [][]data;
- int [][]tagdata;
-
- double logtrans[][];
-
- public HMMObjective o;
-
- public static void main(String[] args) {
-
- }
-
- public HMM(int n_state,int n_emit,int [][]data){
- trans=new double [n_state][n_state];
- emit=new double[n_state][n_emit];
- pi=new double [n_state];
- System.out.println(" random initial parameters");
- fillRand(trans);
- fillRand(emit);
- fillRand(pi);
-
- this.data=data;
-
- }
-
- private void fillRand(double [][] a){
- for(int i=0;i<a.length;i++){
- for(int j=0;j<a[i].length;j++){
- a[i][j]=Math.random();
- }
- l1normalize(a[i]);
- }
- }
- private void fillRand(double []a){
- for(int i=0;i<a.length;i++){
- a[i]=Math.random();
- }
- l1normalize(a);
- }
-
- private double loglikely=0;
-
- public void EM(){
- double trans_exp_cnt[][]=new double [trans.length][trans.length];
- double emit_exp_cnt[][]=new double[trans.length][emit[0].length];
- double start_exp_cnt[]=new double[trans.length];
- loglikely=0;
-
- //E
- for(int i=0;i<data.length;i++){
-
- double [][][] post=forwardBackward(data[i]);
- incrementExpCnt(post, data[i],
- trans_exp_cnt,
- emit_exp_cnt,
- start_exp_cnt);
-
-
- if(i%100==0){
- System.out.print(".");
- }
- if(i%1000==0){
- System.out.println(i);
- }
-
- }
- System.out.println("Log likelihood: "+loglikely);
-
- //M
- addOneSmooth(emit_exp_cnt);
- for(int i=0;i<trans.length;i++){
-
- //transition probs
- double sum=0;
- for(int j=0;j<trans.length;j++){
- sum+=trans_exp_cnt[i][j];
- }
- //avoid NAN
- if(sum==0){
- sum=1;
- }
- for(int j=0;j<trans[i].length;j++){
- trans[i][j]=trans_exp_cnt[i][j]/sum;
- }
-
- //emission probs
-
- sum=0;
- for(int j=0;j<emit[i].length;j++){
- sum+=emit_exp_cnt[i][j];
- }
- //avoid NAN
- if(sum==0){
- sum=1;
- }
- for(int j=0;j<emit[i].length;j++){
- emit[i][j]=emit_exp_cnt[i][j]/sum;
- }
-
-
- //initial probs
- for(int j=0;j<pi.length;j++){
- pi[j]=start_exp_cnt[j];
- }
- l1normalize(pi);
- }
- }
-
- private double [][][]forwardBackward(int [] seq){
- double a[][]=new double [seq.length][trans.length];
- double b[][]=new double [seq.length][trans.length];
-
- int len=seq.length;
- //initialize the first step
- for(int i=0;i<trans.length;i++){
- a[0][i]=emit[i][seq[0]]*pi[i];
- b[len-1][i]=1;
- }
-
- //log of denominator for likelyhood
- double c=Math.log(l1norm(a[0]));
-
- l1normalize(a[0]);
- l1normalize(b[len-1]);
-
-
-
- //forward
- for(int n=1;n<len;n++){
- for(int i=0;i<trans.length;i++){
- for(int j=0;j<trans.length;j++){
- a[n][i]+=trans[j][i]*a[n-1][j];
- }
- a[n][i]*=emit[i][seq[n]];
- }
- c+=Math.log(l1norm(a[n]));
- l1normalize(a[n]);
- }
-
- loglikely+=c;
-
- //backward
- for(int n=len-2;n>=0;n--){
- for(int i=0;i<trans.length;i++){
- for(int j=0;j<trans.length;j++){
- b[n][i]+=trans[i][j]*b[n+1][j]*emit[j][seq[n+1]];
- }
- }
- l1normalize(b[n]);
- }
-
-
- //expected transition
- double p[][][]=new double [seq.length][trans.length][trans.length];
- for(int n=0;n<len-1;n++){
- for(int i=0;i<trans.length;i++){
- for(int j=0;j<trans.length;j++){
- p[n][i][j]=a[n][i]*trans[i][j]*emit[j][seq[n+1]]*b[n+1][j];
-
- }
- }
-
- l1normalize(p[n]);
- }
- return p;
- }
-
- private void incrementExpCnt(
- double post[][][],int [] seq,
- double trans_exp_cnt[][],
- double emit_exp_cnt[][],
- double start_exp_cnt[])
- {
-
- for(int n=0;n<post.length;n++){
- for(int i=0;i<trans.length;i++){
- double py=0;
- for(int j=0;j<trans.length;j++){
- py+=post[n][i][j];
- trans_exp_cnt[i][j]+=post[n][i][j];
- }
-
- emit_exp_cnt[i][seq[n]]+=py;
-
- }
- }
-
- //the first state
- for(int i=0;i<trans.length;i++){
- double py=0;
- for(int j=0;j<trans.length;j++){
- py+=post[0][i][j];
- }
- start_exp_cnt[i]+=py;
- }
-
-
- //the last state
- int len=post.length;
- for(int i=0;i<trans.length;i++){
- double py=0;
- for(int j=0;j<trans.length;j++){
- py+=post[len-2][j][i];
- }
- emit_exp_cnt[i][seq[len-1]]+=py;
- }
- }
-
- public void l1normalize(double [] a){
- double sum=0;
- for(int i=0;i<a.length;i++){
- sum+=a[i];
- }
- if(sum==0){
- return ;
- }
- for(int i=0;i<a.length;i++){
- a[i]/=sum;
- }
- }
-
- public void l1normalize(double [][] a){
- double sum=0;
- for(int i=0;i<a.length;i++){
- for(int j=0;j<a[i].length;j++){
- sum+=a[i][j];
- }
- }
- if(sum==0){
- return;
- }
- for(int i=0;i<a.length;i++){
- for(int j=0;j<a[i].length;j++){
- a[i][j]/=sum;
- }
- }
- }
-
- public void writeModel(String modelFilename) throws FileNotFoundException, IOException{
- PrintStream ps=io.FileUtil.printstream(new File(modelFilename));
- ps.println(trans.length);
- ps.println("Initial Probabilities:");
- for(int i=0;i<pi.length;i++){
- ps.print(pi[i]+"\t");
- }
- ps.println();
- ps.println("Transition Probabilities:");
- for(int i=0;i<trans.length;i++){
- for(int j=0;j<trans[i].length;j++){
- ps.print(trans[i][j]+"\t");
- }
- ps.println();
- }
- ps.println("Emission Probabilities:");
- ps.println(emit[0].length);
- for(int i=0;i<trans.length;i++){
- for(int j=0;j<emit[i].length;j++){
- ps.println(emit[i][j]);
- }
- ps.println();
- }
- ps.close();
- }
-
- public HMM(){
-
- }
-
- public void readModel(String modelFilename){
- Scanner sc=io.FileUtil.openInFile(modelFilename);
-
- int n_state=sc.nextInt();
- sc.nextLine();
- sc.nextLine();
- pi=new double [n_state];
- for(int i=0;i<n_state;i++){
- pi[i]=sc.nextDouble();
- }
- sc.nextLine();
- sc.nextLine();
- trans=new double[n_state][n_state];
- for(int i=0;i<trans.length;i++){
- for(int j=0;j<trans[i].length;j++){
- trans[i][j]=sc.nextDouble();
- }
- }
- sc.nextLine();
- sc.nextLine();
-
- int n_obs=sc.nextInt();
- emit=new double[n_state][n_obs];
- for(int i=0;i<trans.length;i++){
- for(int j=0;j<emit[i].length;j++){
- emit[i][j]=sc.nextDouble();
- }
- }
- sc.close();
- }
-
- public int []viterbi(int [] seq){
- double [][]p=new double [seq.length][trans.length];
- int backp[][]=new int [seq.length][trans.length];
-
- for(int i=0;i<trans.length;i++){
- p[0][i]=Math.log(emit[i][seq[0]]*pi[i]);
- }
-
- double a[][]=logtrans;
- if(logtrans==null){
- a=new double [trans.length][trans.length];
- for(int i=0;i<trans.length;i++){
- for(int j=0;j<trans.length;j++){
- a[i][j]=Math.log(trans[i][j]);
- }
- }
- logtrans=a;
- }
-
- double maxprob=0;
- for(int n=1;n<seq.length;n++){
- for(int i=0;i<trans.length;i++){
- maxprob=p[n-1][0]+a[0][i];
- backp[n][i]=0;
- for(int j=1;j<trans.length;j++){
- double prob=p[n-1][j]+a[j][i];
- if(maxprob<prob){
- backp[n][i]=j;
- maxprob=prob;
- }
- }
- p[n][i]=maxprob+Math.log(emit[i][seq[n]]);
- }
- }
-
- maxprob=p[seq.length-1][0];
- int maxIdx=0;
- for(int i=1;i<trans.length;i++){
- if(p[seq.length-1][i]>maxprob){
- maxprob=p[seq.length-1][i];
- maxIdx=i;
- }
- }
- int ans[]=new int [seq.length];
- ans[seq.length-1]=maxIdx;
- for(int i=seq.length-2;i>=0;i--){
- ans[i]=backp[i+1][ans[i+1]];
- }
- return ans;
- }
-
- public double l1norm(double a[]){
- double norm=0;
- for(int i=0;i<a.length;i++){
- norm += a[i];
- }
- return norm;
- }
-
- public double [][]getEmitProb(){
- return emit;
- }
-
- public int [] sample(int terminalSym){
- ArrayList<Integer > s=new ArrayList<Integer>();
- int state=sample(pi);
- int sym=sample(emit[state]);
- while(sym!=terminalSym){
- s.add(sym);
- state=sample(trans[state]);
- sym=sample(emit[state]);
- }
-
- int ans[]=new int [s.size()];
- for(int i=0;i<ans.length;i++){
- ans[i]=s.get(i);
- }
- return ans;
- }
-
- public int sample(double p[]){
- double r=Math.random();
- double sum=0;
- for(int i=0;i<p.length;i++){
- sum+=p[i];
- if(sum>=r){
- return i;
- }
- }
- return p.length-1;
- }
-
- public void train(int tagdata[][]){
- double trans_exp_cnt[][]=new double [trans.length][trans.length];
- double emit_exp_cnt[][]=new double[trans.length][emit[0].length];
- double start_exp_cnt[]=new double[trans.length];
-
- for(int i=0;i<tagdata.length;i++){
- start_exp_cnt[tagdata[i][0]]++;
-
- for(int j=0;j<tagdata[i].length;j++){
- if(j+1<tagdata[i].length){
- trans_exp_cnt[ tagdata[i][j] ] [ tagdata[i][j+1] ]++;
- }
- emit_exp_cnt[tagdata[i][j]][data[i][j]]++;
- }
-
- }
-
- //M
- addOneSmooth(emit_exp_cnt);
- for(int i=0;i<trans.length;i++){
-
- //transition probs
- double sum=0;
- for(int j=0;j<trans.length;j++){
- sum+=trans_exp_cnt[i][j];
- }
- if(sum==0){
- sum=1;
- }
- for(int j=0;j<trans[i].length;j++){
- trans[i][j]=trans_exp_cnt[i][j]/sum;
- }
-
- //emission probs
-
- sum=0;
- for(int j=0;j<emit[i].length;j++){
- sum+=emit_exp_cnt[i][j];
- }
- if(sum==0){
- sum=1;
- }
- for(int j=0;j<emit[i].length;j++){
- emit[i][j]=emit_exp_cnt[i][j]/sum;
- }
-
-
- //initial probs
- for(int j=0;j<pi.length;j++){
- pi[j]=start_exp_cnt[j];
- }
- l1normalize(pi);
- }
- }
-
- private void addOneSmooth(double a[][]){
- for(int i=0;i<a.length;i++){
- for(int j=0;j<a[i].length;j++){
- a[i][j]+=0.01;
- }
- //l1normalize(a[i]);
- }
- }
-
- public void PREM(){
-
- o.optimizeWithProjectedGradientDescent();
-
- double trans_exp_cnt[][]=new double [trans.length][trans.length];
- double emit_exp_cnt[][]=new double[trans.length][emit[0].length];
- double start_exp_cnt[]=new double[trans.length];
-
- o.loglikelihood=0;
- //E
- for(int sentNum=0;sentNum<data.length;sentNum++){
-
- double [][][] post=o.forwardBackward(sentNum);
- incrementExpCnt(post, data[sentNum],
- trans_exp_cnt,
- emit_exp_cnt,
- start_exp_cnt);
-
-
- if(sentNum%100==0){
- System.out.print(".");
- }
- if(sentNum%1000==0){
- System.out.println(sentNum);
- }
-
- }
-
- System.out.println("Log likelihood: "+o.getValue());
-
- //M
- addOneSmooth(emit_exp_cnt);
- for(int i=0;i<trans.length;i++){
-
- //transition probs
- double sum=0;
- for(int j=0;j<trans.length;j++){
- sum+=trans_exp_cnt[i][j];
- }
- //avoid NAN
- if(sum==0){
- sum=1;
- }
- for(int j=0;j<trans[i].length;j++){
- trans[i][j]=trans_exp_cnt[i][j]/sum;
- }
-
- //emission probs
-
- sum=0;
- for(int j=0;j<emit[i].length;j++){
- sum+=emit_exp_cnt[i][j];
- }
- //avoid NAN
- if(sum==0){
- sum=1;
- }
- for(int j=0;j<emit[i].length;j++){
- emit[i][j]=emit_exp_cnt[i][j]/sum;
- }
-
-
- //initial probs
- for(int j=0;j<pi.length;j++){
- pi[j]=start_exp_cnt[j];
- }
- l1normalize(pi);
- }
-
- }
-
- public void computeMaxwt(double[][]maxwt, int[][] d){
-
- for(int sentNum=0;sentNum<d.length;sentNum++){
- double post[][][]=forwardBackward(d[sentNum]);
-
- for(int n=0;n<post.length;n++){
- for(int i=0;i<trans.length;i++){
- double py=0;
- for(int j=0;j<trans.length;j++){
- py+=post[n][i][j];
- }
-
- if(py>maxwt[i][d[sentNum][n]]){
- maxwt[i][d[sentNum][n]]=py;
- }
-
- }
- }
-
- //the last state
- int len=post.length;
- for(int i=0;i<trans.length;i++){
- double py=0;
- for(int j=0;j<trans.length;j++){
- py+=post[len-2][j][i];
- }
-
- if(py>maxwt[i][d[sentNum][len-1]]){
- maxwt[i][d[sentNum][len-1]]=py;
- }
-
- }
-
- }
-
- }
-
-}//end of class
diff --git a/gi/posterior-regularisation/prjava/src/hmm/HMMObjective.java b/gi/posterior-regularisation/prjava/src/hmm/HMMObjective.java
deleted file mode 100644
index 70b6c966..00000000
--- a/gi/posterior-regularisation/prjava/src/hmm/HMMObjective.java
+++ /dev/null
@@ -1,351 +0,0 @@
-package hmm;
-
-import gnu.trove.TIntArrayList;
-import optimization.gradientBasedMethods.ProjectedGradientDescent;
-import optimization.gradientBasedMethods.ProjectedObjective;
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.linesearch.ArmijoLineSearchMinimizationAlongProjectionArc;
-import optimization.linesearch.InterpolationPickFirstStep;
-import optimization.linesearch.LineSearchMethod;
-import optimization.projections.SimplexProjection;
-import optimization.stopCriteria.CompositeStopingCriteria;
-import optimization.stopCriteria.ProjectedGradientL2Norm;
-import optimization.stopCriteria.StopingCriteria;
-import optimization.stopCriteria.ValueDifference;
-
-public class HMMObjective extends ProjectedObjective{
-
-
- private static final double GRAD_DIFF = 3;
- public static double INIT_STEP_SIZE=10;
- public static double VAL_DIFF=1000;
-
- private HMM hmm;
- double[] newPoint ;
-
- //posterior[sent num][tok num][tag]=index into lambda
- private int posteriorMap[][][];
- //projection[word][tag].get(occurence)=index into lambda
- private TIntArrayList projectionMap[][];
-
- //Size of the simplex
- public double scale=10;
- private SimplexProjection projection;
-
- private int wordFreq[];
- private static int MIN_FREQ=10;
- private int numWordsToProject=0;
-
- private int n_param;
-
- public double loglikelihood;
-
- public HMMObjective(HMM h){
- hmm=h;
-
- countWords();
- buildMap();
-
- gradient=new double [n_param];
- projection = new SimplexProjection(scale);
- newPoint = new double[n_param];
- setInitialParameters(new double[n_param]);
-
- }
-
- /**@brief counts word frequency in the corpus
- *
- */
- private void countWords(){
- wordFreq=new int [hmm.emit[0].length];
- for(int i=0;i<hmm.data.length;i++){
- for(int j=0;j<hmm.data[i].length;j++){
- wordFreq[hmm.data[i][j]]++;
- }
- }
- }
-
- /**@brief build posterior and projection indices
- *
- */
- private void buildMap(){
- //number of sentences hidden states and words
- int n_states=hmm.trans.length;
- int n_words=hmm.emit[0].length;
- int n_sents=hmm.data.length;
-
- n_param=0;
- posteriorMap=new int[n_sents][][];
- projectionMap=new TIntArrayList[n_words][];
- for(int sentNum=0;sentNum<n_sents;sentNum++){
- int [] data=hmm.data[sentNum];
- posteriorMap[sentNum]=new int[data.length][n_states];
- numWordsToProject=0;
- for(int i=0;i<data.length;i++){
- int word=data[i];
- for(int state=0;state<n_states;state++){
- if(wordFreq[word]>MIN_FREQ){
- if(projectionMap[word]==null){
- projectionMap[word]=new TIntArrayList[n_states];
- }
- // if(posteriorMap[sentNum][i]==null){
- // posteriorMap[sentNum][i]=new int[n_states];
- // }
-
- posteriorMap[sentNum][i][state]=n_param;
- if(projectionMap[word][state]==null){
- projectionMap[word][state]=new TIntArrayList();
- numWordsToProject++;
- }
- projectionMap[word][state].add(n_param);
- n_param++;
- }
- else{
- posteriorMap[sentNum][i][state]=-1;
- }
- }
- }
- }
- }
-
- @Override
- public double[] projectPoint(double[] point) {
- // TODO Auto-generated method stub
- for(int i=0;i<projectionMap.length;i++){
-
- if(projectionMap[i]==null){
- //this word is not constrained
- continue;
- }
-
- for(int j=0;j<projectionMap[i].length;j++){
- TIntArrayList instances=projectionMap[i][j];
- double[] toProject = new double[instances.size()];
-
- for (int k = 0; k < toProject.length; k++) {
- // System.out.print(instances.get(k) + " ");
- toProject[k] = point[instances.get(k)];
- }
-
- projection.project(toProject);
- for (int k = 0; k < toProject.length; k++) {
- newPoint[instances.get(k)]=toProject[k];
- }
- }
- }
- return newPoint;
- }
-
- @Override
- public double[] getGradient() {
- // TODO Auto-generated method stub
- gradientCalls++;
- return gradient;
- }
-
- @Override
- public double getValue() {
- // TODO Auto-generated method stub
- functionCalls++;
- return loglikelihood;
- }
-
-
- @Override
- public String toString() {
- // TODO Auto-generated method stub
- StringBuffer sb = new StringBuffer();
- for (int i = 0; i < parameters.length; i++) {
- sb.append(parameters[i]+" ");
- if(i%100==0){
- sb.append("\n");
- }
- }
- sb.append("\n");
- /*
- for (int i = 0; i < gradient.length; i++) {
- sb.append(gradient[i]+" ");
- if(i%100==0){
- sb.append("\n");
- }
- }
- sb.append("\n");
- */
- return sb.toString();
- }
-
-
- /**
- * @param seq
- * @return posterior probability of each transition
- */
- public double [][][]forwardBackward(int sentNum){
- int [] seq=hmm.data[sentNum];
- int n_states=hmm.trans.length;
- double a[][]=new double [seq.length][n_states];
- double b[][]=new double [seq.length][n_states];
-
- int len=seq.length;
-
- boolean constrained=
- (projectionMap[seq[0]]!=null);
-
- //initialize the first step
- for(int i=0;i<n_states;i++){
- a[0][i]=hmm.emit[i][seq[0]]*hmm.pi[i];
- if(constrained){
- a[0][i]*=
- Math.exp(- parameters[ posteriorMap[sentNum][0][i] ] );
- }
- b[len-1][i]=1;
- }
-
- loglikelihood+=Math.log(hmm.l1norm(a[0]));
- hmm.l1normalize(a[0]);
- hmm.l1normalize(b[len-1]);
-
- //forward
- for(int n=1;n<len;n++){
-
- constrained=
- (projectionMap[seq[n]]!=null);
-
- for(int i=0;i<n_states;i++){
- for(int j=0;j<n_states;j++){
- a[n][i]+=hmm.trans[j][i]*a[n-1][j];
- }
- a[n][i]*=hmm.emit[i][seq[n]];
-
- if(constrained){
- a[n][i]*=
- Math.exp(- parameters[ posteriorMap[sentNum][n][i] ] );
- }
-
- }
- loglikelihood+=Math.log(hmm.l1norm(a[n]));
- hmm.l1normalize(a[n]);
- }
-
- //temp variable for e^{-\lambda}
- double factor=1;
- //backward
- for(int n=len-2;n>=0;n--){
-
- constrained=
- (projectionMap[seq[n+1]]!=null);
-
- for(int i=0;i<n_states;i++){
- for(int j=0;j<n_states;j++){
-
- if(constrained){
- factor=
- Math.exp(- parameters[ posteriorMap[sentNum][n+1][j] ] );
- }else{
- factor=1;
- }
-
- b[n][i]+=hmm.trans[i][j]*b[n+1][j]*hmm.emit[j][seq[n+1]]*factor;
-
- }
- }
- hmm.l1normalize(b[n]);
- }
-
- //expected transition
- double p[][][]=new double [seq.length][n_states][n_states];
- for(int n=0;n<len-1;n++){
-
- constrained=
- (projectionMap[seq[n+1]]!=null);
-
- for(int i=0;i<n_states;i++){
- for(int j=0;j<n_states;j++){
-
- if(constrained){
- factor=
- Math.exp(- parameters[ posteriorMap[sentNum][n+1][j] ] );
- }else{
- factor=1;
- }
-
- p[n][i][j]=a[n][i]*hmm.trans[i][j]*
- hmm.emit[j][seq[n+1]]*b[n+1][j]*factor;
-
- }
- }
-
- hmm.l1normalize(p[n]);
- }
- return p;
- }
-
- public void optimizeWithProjectedGradientDescent(){
- LineSearchMethod ls =
- new ArmijoLineSearchMinimizationAlongProjectionArc
- (new InterpolationPickFirstStep(INIT_STEP_SIZE));
-
- OptimizerStats stats = new OptimizerStats();
-
-
- ProjectedGradientDescent optimizer = new ProjectedGradientDescent(ls);
- StopingCriteria stopGrad = new ProjectedGradientL2Norm(GRAD_DIFF);
- StopingCriteria stopValue = new ValueDifference(VAL_DIFF);
- CompositeStopingCriteria compositeStop = new CompositeStopingCriteria();
- compositeStop.add(stopGrad);
- compositeStop.add(stopValue);
-
- optimizer.setMaxIterations(10);
- updateFunction();
- boolean succed = optimizer.optimize(this,stats,compositeStop);
- System.out.println("Ended optimzation Projected Gradient Descent\n" + stats.prettyPrint(1));
- if(succed){
- System.out.println("Ended optimization in " + optimizer.getCurrentIteration());
- }else{
- System.out.println("Failed to optimize");
- }
- }
-
- @Override
- public void setParameters(double[] params) {
- super.setParameters(params);
- updateFunction();
- }
-
- private void updateFunction(){
-
- updateCalls++;
- loglikelihood=0;
-
- for(int sentNum=0;sentNum<hmm.data.length;sentNum++){
- double [][][]p=forwardBackward(sentNum);
-
- for(int n=0;n<p.length-1;n++){
- for(int i=0;i<p[n].length;i++){
- if(projectionMap[hmm.data[sentNum][n]]!=null){
- double posterior=0;
- for(int j=0;j<p[n][i].length;j++){
- posterior+=p[n][i][j];
- }
- gradient[posteriorMap[sentNum][n][i]]=-posterior;
- }
- }
- }
-
- //the last state
- int n=p.length-2;
- for(int i=0;i<p[n].length;i++){
- if(projectionMap[hmm.data[sentNum][n+1]]!=null){
-
- double posterior=0;
- for(int j=0;j<p[n].length;j++){
- posterior+=p[n][j][i];
- }
- gradient[posteriorMap[sentNum][n+1][i]]=-posterior;
-
- }
- }
- }
-
- }
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/hmm/POS.java b/gi/posterior-regularisation/prjava/src/hmm/POS.java
deleted file mode 100644
index bdcbc683..00000000
--- a/gi/posterior-regularisation/prjava/src/hmm/POS.java
+++ /dev/null
@@ -1,120 +0,0 @@
-package hmm;
-
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.HashMap;
-
-import data.Corpus;
-
-public class POS {
-
- //public String trainFilename="../posdata/en_train.conll";
- public static String trainFilename="../posdata/small_train.txt";
-// public static String trainFilename="../posdata/en_test.conll";
-// public static String trainFilename="../posdata/trial1.txt";
-
- public static String testFilename="../posdata/en_test.conll";
- //public static String testFilename="../posdata/trial1.txt";
-
- public static String predFilename="../posdata/en_test.predict.conll";
- public static String modelFilename="../posdata/posModel.out";
- public static final int ITER=20;
- public static final int N_STATE=30;
-
- public static void main(String[] args) {
- //POS p=new POS();
- //POS p=new POS(true);
- try {
- PRPOS();
- } catch (FileNotFoundException e) {
- e.printStackTrace();
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
-
-
- public POS() throws FileNotFoundException, IOException{
- Corpus c= new Corpus(trainFilename);
- //size of vocabulary +1 for unknown tokens
- HMM hmm =new HMM(N_STATE, c.getVocabSize()+1,c.getAllData());
- for(int i=0;i<ITER;i++){
- System.out.println("Iter"+i);
- hmm.EM();
- if((i+1)%10==0){
- hmm.writeModel(modelFilename+i);
- }
- }
-
- hmm.writeModel(modelFilename);
-
- Corpus test=new Corpus(testFilename,c.vocab);
-
- PrintStream ps= io.FileUtil.printstream(new File(predFilename));
-
- int [][]data=test.getAllData();
- for(int i=0;i<data.length;i++){
- int []tag=hmm.viterbi(data[i]);
- String sent[]=test.get(i);
- for(int j=0;j<data[i].length;j++){
- ps.println(sent[j]+"\t"+tag[j]);
- }
- ps.println();
- }
- ps.close();
- }
-
- //POS induction with L1/Linf constraints
- public static void PRPOS() throws FileNotFoundException, IOException{
- Corpus c= new Corpus(trainFilename);
- //size of vocabulary +1 for unknown tokens
- HMM hmm =new HMM(N_STATE, c.getVocabSize()+1,c.getAllData());
- hmm.o=new HMMObjective(hmm);
- for(int i=0;i<ITER;i++){
- System.out.println("Iter: "+i);
- hmm.PREM();
- if((i+1)%10==0){
- hmm.writeModel(modelFilename+i);
- }
- }
-
- hmm.writeModel(modelFilename);
- }
-
-
- public POS(boolean supervised) throws FileNotFoundException, IOException{
- Corpus c= new Corpus(trainFilename);
- //size of vocabulary +1 for unknown tokens
- HMM hmm =new HMM(c.tagVocab.size() , c.getVocabSize()+1,c.getAllData());
- hmm.train(c.getTagData());
-
- hmm.writeModel(modelFilename);
-
- Corpus test=new Corpus(testFilename,c.vocab);
-
- HashMap<String, Integer>tagVocab=
- (HashMap<String, Integer>) io.SerializedObjects.readSerializedObject(Corpus.tagalphaFilename);
- String [] tagdict=new String [tagVocab.size()+1];
- for(String key:tagVocab.keySet()){
- tagdict[tagVocab.get(key)]=key;
- }
- tagdict[tagdict.length-1]=Corpus.UNK_TOK;
-
- System.out.println(c.vocab.get("<e>"));
-
- PrintStream ps= io.FileUtil.printstream(new File(predFilename));
-
- int [][]data=test.getAllData();
- for(int i=0;i<data.length;i++){
- int []tag=hmm.viterbi(data[i]);
- String sent[]=test.get(i);
- for(int j=0;j<data[i].length;j++){
- ps.println(sent[j]+"\t"+tagdict[tag[j]]);
- }
- ps.println();
- }
- ps.close();
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/io/FileUtil.java b/gi/posterior-regularisation/prjava/src/io/FileUtil.java
deleted file mode 100644
index 6720d087..00000000
--- a/gi/posterior-regularisation/prjava/src/io/FileUtil.java
+++ /dev/null
@@ -1,48 +0,0 @@
-package io;
-import java.util.*;
-import java.util.zip.GZIPInputStream;
-import java.util.zip.GZIPOutputStream;
-import java.io.*;
-public class FileUtil
-{
- public static BufferedReader reader(File file) throws FileNotFoundException, IOException
- {
- if (file.getName().endsWith(".gz"))
- return new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(file)), "UTF8"));
- else
- return new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF8"));
- }
-
- public static PrintStream printstream(File file) throws FileNotFoundException, IOException
- {
- if (file.getName().endsWith(".gz"))
- return new PrintStream(new GZIPOutputStream(new FileOutputStream(file)), true, "UTF8");
- else
- return new PrintStream(new FileOutputStream(file), true, "UTF8");
- }
-
- public static Scanner openInFile(String filename)
- {
- Scanner localsc=null;
- try
- {
- localsc=new Scanner(new FileInputStream(filename), "UTF8");
-
- }catch(IOException ioe){
- System.out.println(ioe.getMessage());
- }
- return localsc;
- }
-
- public static FileInputStream openInputStream(String infilename)
- {
- FileInputStream fis=null;
- try {
- fis = new FileInputStream(infilename);
-
- } catch (IOException ioe) {
- System.out.println(ioe.getMessage());
- }
- return fis;
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/io/SerializedObjects.java b/gi/posterior-regularisation/prjava/src/io/SerializedObjects.java
deleted file mode 100644
index d1631b51..00000000
--- a/gi/posterior-regularisation/prjava/src/io/SerializedObjects.java
+++ /dev/null
@@ -1,83 +0,0 @@
-package io;
-
-
-
-import java.io.BufferedInputStream;
-import java.io.BufferedOutputStream;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.ObjectInput;
-import java.io.ObjectInputStream;
-import java.io.ObjectOutput;
-import java.io.ObjectOutputStream;
-import java.io.OutputStream;
-
-public class SerializedObjects
-{
- public static void writeSerializedObject(Object object, String outFile)
- {
- ObjectOutput output = null;
- try{
- //use buffering
- OutputStream file = new FileOutputStream(outFile);
- OutputStream buffer = new BufferedOutputStream( file );
- output = new ObjectOutputStream( buffer );
- output.writeObject(object);
- buffer.close();
- file.close();
- }
- catch(IOException ex){
- ex.printStackTrace();
- }
- finally{
- try {
- if (output != null) {
- //flush and close "output" and its underlying streams
- output.close();
- }
- }
- catch (IOException ex ){
- ex.printStackTrace();
- }
- }
- }
-
- public static Object readSerializedObject(String inputFile)
- {
- ObjectInput input = null;
- Object recoveredObject=null;
- try{
- //use buffering
- InputStream file = new FileInputStream(inputFile);
- InputStream buffer = new BufferedInputStream(file);
- input = new ObjectInputStream(buffer);
- //deserialize the List
- recoveredObject = input.readObject();
- }
- catch(IOException ex){
- ex.printStackTrace();
- }
- catch (ClassNotFoundException ex){
- ex.printStackTrace();
- }
- catch(Exception ex)
- {
- ex.printStackTrace();
- }
- finally{
- try {
- if ( input != null ) {
- //close "input" and its underlying streams
- input.close();
- }
- }
- catch (IOException ex){
- ex.printStackTrace();
- }
- }
- return recoveredObject;
- }
-
-} \ No newline at end of file
diff --git a/gi/posterior-regularisation/prjava/src/optimization/examples/GeneralizedRosenbrock.java b/gi/posterior-regularisation/prjava/src/optimization/examples/GeneralizedRosenbrock.java
deleted file mode 100644
index 25fa7f09..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/examples/GeneralizedRosenbrock.java
+++ /dev/null
@@ -1,110 +0,0 @@
-package optimization.examples;
-
-
-import optimization.gradientBasedMethods.ConjugateGradient;
-import optimization.gradientBasedMethods.GradientDescent;
-import optimization.gradientBasedMethods.LBFGS;
-import optimization.gradientBasedMethods.Objective;
-import optimization.gradientBasedMethods.Optimizer;
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.linesearch.ArmijoLineSearchMinimization;
-import optimization.linesearch.LineSearchMethod;
-import optimization.stopCriteria.GradientL2Norm;
-import optimization.stopCriteria.StopingCriteria;
-import optimization.util.MathUtils;
-
-/**
- *
- * @author javg
- * f(x) = \sum_{i=1}^{N-1} \left[ (1-x_i)^2+ 100 (x_{i+1} - x_i^2 )^2 \right] \quad \forall x\in\mathbb{R}^N.
- */
-public class GeneralizedRosenbrock extends Objective{
-
-
-
- public GeneralizedRosenbrock(int dimensions){
- parameters = new double[dimensions];
- java.util.Arrays.fill(parameters, 0);
- gradient = new double[dimensions];
-
- }
-
- public GeneralizedRosenbrock(int dimensions, double[] params){
- parameters = params;
- gradient = new double[dimensions];
- }
-
-
- public double getValue() {
- functionCalls++;
- double value = 0;
- for(int i = 0; i < parameters.length-1; i++){
- value += MathUtils.square(1-parameters[i]) + 100*MathUtils.square(parameters[i+1] - MathUtils.square(parameters[i]));
- }
-
- return value;
- }
-
- /**
- * gx = -2(1-x) -2x200(y-x^2)
- * gy = 200(y-x^2)
- */
- public double[] getGradient() {
- gradientCalls++;
- java.util.Arrays.fill(gradient,0);
- for(int i = 0; i < parameters.length-1; i++){
- gradient[i]+=-2*(1-parameters[i]) - 400*parameters[i]*(parameters[i+1] - MathUtils.square(parameters[i]));
- gradient[i+1]+=200*(parameters[i+1] - MathUtils.square(parameters[i]));
- }
- return gradient;
- }
-
-
-
-
-
-
-
- public String toString(){
- String res ="";
- for(int i = 0; i < parameters.length; i++){
- res += "P" + i+ " " + parameters[i];
- }
- res += " Value " + getValue();
- return res;
- }
-
- public static void main(String[] args) {
-
- GeneralizedRosenbrock o = new GeneralizedRosenbrock(2);
- System.out.println("Starting optimization " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]);
- ;
-
- System.out.println("Doing Gradient descent");
- //LineSearchMethod wolfe = new WolfRuleLineSearch(new InterpolationPickFirstStep(1),100,0.001,0.1);
- StopingCriteria stop = new GradientL2Norm(0.001);
- LineSearchMethod ls = new ArmijoLineSearchMinimization();
- Optimizer optimizer = new GradientDescent(ls);
- OptimizerStats stats = new OptimizerStats();
- optimizer.setMaxIterations(1000);
- boolean succed = optimizer.optimize(o,stats, stop);
- System.out.println("Suceess " + succed + "/n"+stats.prettyPrint(1));
- System.out.println("Doing Conjugate Gradient descent");
- o = new GeneralizedRosenbrock(2);
- // wolfe = new WolfRuleLineSearch(new InterpolationPickFirstStep(1),100,0.001,0.1);
- optimizer = new ConjugateGradient(ls);
- stats = new OptimizerStats();
- optimizer.setMaxIterations(1000);
- succed = optimizer.optimize(o,stats,stop);
- System.out.println("Suceess " + succed + "/n"+stats.prettyPrint(1));
- System.out.println("Doing Quasi newton descent");
- o = new GeneralizedRosenbrock(2);
- optimizer = new LBFGS(ls,10);
- stats = new OptimizerStats();
- optimizer.setMaxIterations(1000);
- succed = optimizer.optimize(o,stats,stop);
- System.out.println("Suceess " + succed + "/n"+stats.prettyPrint(1));
-
- }
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/examples/x2y2.java b/gi/posterior-regularisation/prjava/src/optimization/examples/x2y2.java
deleted file mode 100644
index f087681e..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/examples/x2y2.java
+++ /dev/null
@@ -1,128 +0,0 @@
-package optimization.examples;
-
-
-import optimization.gradientBasedMethods.ConjugateGradient;
-
-import optimization.gradientBasedMethods.GradientDescent;
-import optimization.gradientBasedMethods.LBFGS;
-import optimization.gradientBasedMethods.Objective;
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.linesearch.GenericPickFirstStep;
-import optimization.linesearch.LineSearchMethod;
-import optimization.linesearch.WolfRuleLineSearch;
-import optimization.stopCriteria.GradientL2Norm;
-import optimization.stopCriteria.StopingCriteria;
-
-
-/**
- * @author javg
- *
- */
-public class x2y2 extends Objective{
-
-
- //Implements function ax2+ by2
- double a, b;
- public x2y2(double a, double b){
- this.a = a;
- this.b = b;
- parameters = new double[2];
- parameters[0] = 4;
- parameters[1] = 4;
- gradient = new double[2];
- }
-
- public double getValue() {
- functionCalls++;
- return a*parameters[0]*parameters[0]+b*parameters[1]*parameters[1];
- }
-
- public double[] getGradient() {
- gradientCalls++;
- gradient[0]=2*a*parameters[0];
- gradient[1]=2*b*parameters[1];
- return gradient;
-// if(debugLevel >=2){
-// double[] numericalGradient = DebugHelpers.getNumericalGradient(this, parameters, 0.000001);
-// for(int i = 0; i < parameters.length; i++){
-// double diff = Math.abs(gradient[i]-numericalGradient[i]);
-// if(diff > 0.00001){
-// System.out.println("Numerical Gradient does not match");
-// System.exit(1);
-// }
-// }
-// }
- }
-
-
-
- public void optimizeWithGradientDescent(LineSearchMethod ls, OptimizerStats stats, x2y2 o){
- GradientDescent optimizer = new GradientDescent(ls);
- StopingCriteria stop = new GradientL2Norm(0.001);
-// optimizer.setGradientConvergenceValue(0.001);
- optimizer.setMaxIterations(100);
- boolean succed = optimizer.optimize(o,stats,stop);
- System.out.println("Ended optimzation Gradient Descent\n" + stats.prettyPrint(1));
- System.out.println("Solution: " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]);
- if(succed){
- System.out.println("Ended optimization in " + optimizer.getCurrentIteration());
- }else{
- System.out.println("Failed to optimize");
- }
- }
-
- public void optimizeWithConjugateGradient(LineSearchMethod ls, OptimizerStats stats, x2y2 o){
- ConjugateGradient optimizer = new ConjugateGradient(ls);
- StopingCriteria stop = new GradientL2Norm(0.001);
-
- optimizer.setMaxIterations(10);
- boolean succed = optimizer.optimize(o,stats,stop);
- System.out.println("Ended optimzation Conjugate Gradient\n" + stats.prettyPrint(1));
- System.out.println("Solution: " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]);
- if(succed){
- System.out.println("Ended optimization in " + optimizer.getCurrentIteration());
- }else{
- System.out.println("Failed to optimize");
- }
- }
-
- public void optimizeWithLBFGS(LineSearchMethod ls, OptimizerStats stats, x2y2 o){
- LBFGS optimizer = new LBFGS(ls,10);
- StopingCriteria stop = new GradientL2Norm(0.001);
- optimizer.setMaxIterations(10);
- boolean succed = optimizer.optimize(o,stats,stop);
- System.out.println("Ended optimzation LBFGS\n" + stats.prettyPrint(1));
- System.out.println("Solution: " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]);
- if(succed){
- System.out.println("Ended optimization in " + optimizer.getCurrentIteration());
- }else{
- System.out.println("Failed to optimize");
- }
- }
-
- public static void main(String[] args) {
- x2y2 o = new x2y2(1,10);
- System.out.println("Starting optimization " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]);
- o.setDebugLevel(4);
- LineSearchMethod wolfe = new WolfRuleLineSearch(new GenericPickFirstStep(1),0.001,0.9);;
-// LineSearchMethod ls = new ArmijoLineSearchMinimization();
- OptimizerStats stats = new OptimizerStats();
- o.optimizeWithGradientDescent(wolfe, stats, o);
- o = new x2y2(1,10);
- System.out.println("Starting optimization " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]);
-// ls = new ArmijoLineSearchMinimization();
- stats = new OptimizerStats();
- o.optimizeWithConjugateGradient(wolfe, stats, o);
- o = new x2y2(1,10);
- System.out.println("Starting optimization " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]);
-// ls = new ArmijoLineSearchMinimization();
- stats = new OptimizerStats();
- o.optimizeWithLBFGS(wolfe, stats, o);
- }
-
- public String toString(){
- return "P1: " + parameters[0] + " P2: " + parameters[1] + " value " + getValue();
- }
-
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/examples/x2y2WithConstraints.java b/gi/posterior-regularisation/prjava/src/optimization/examples/x2y2WithConstraints.java
deleted file mode 100644
index 391775b7..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/examples/x2y2WithConstraints.java
+++ /dev/null
@@ -1,127 +0,0 @@
-package optimization.examples;
-
-
-import optimization.gradientBasedMethods.ProjectedGradientDescent;
-import optimization.gradientBasedMethods.ProjectedObjective;
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.linesearch.ArmijoLineSearchMinimizationAlongProjectionArc;
-import optimization.linesearch.InterpolationPickFirstStep;
-import optimization.linesearch.LineSearchMethod;
-import optimization.projections.BoundsProjection;
-import optimization.projections.Projection;
-import optimization.projections.SimplexProjection;
-import optimization.stopCriteria.CompositeStopingCriteria;
-import optimization.stopCriteria.GradientL2Norm;
-import optimization.stopCriteria.ProjectedGradientL2Norm;
-import optimization.stopCriteria.StopingCriteria;
-import optimization.stopCriteria.ValueDifference;
-
-
-/**
- * @author javg
- *
- *
- *ax2+ b(y2 -displacement)
- */
-public class x2y2WithConstraints extends ProjectedObjective{
-
-
- double a, b;
- double dx;
- double dy;
- Projection projection;
-
-
- public x2y2WithConstraints(double a, double b, double[] params, double dx, double dy, Projection proj){
- //projection = new BoundsProjection(0.2,Double.MAX_VALUE);
- super();
- projection = proj;
- this.a = a;
- this.b = b;
- this.dx = dx;
- this.dy = dy;
- setInitialParameters(params);
- System.out.println("Function " +a+"(x-"+dx+")^2 + "+b+"(y-"+dy+")^2");
- System.out.println("Gradient " +(2*a)+"(x-"+dx+") ; "+(b*2)+"(y-"+dy+")");
- printParameters();
- projection.project(parameters);
- printParameters();
- gradient = new double[2];
- }
-
- public double getValue() {
- functionCalls++;
- return a*(parameters[0]-dx)*(parameters[0]-dx)+b*((parameters[1]-dy)*(parameters[1]-dy));
- }
-
- public double[] getGradient() {
- if(gradient == null){
- gradient = new double[2];
- }
- gradientCalls++;
- gradient[0]=2*a*(parameters[0]-dx);
- gradient[1]=2*b*(parameters[1]-dy);
- return gradient;
- }
-
-
- public double[] projectPoint(double[] point) {
- double[] newPoint = point.clone();
- projection.project(newPoint);
- return newPoint;
- }
-
- public void optimizeWithProjectedGradientDescent(LineSearchMethod ls, OptimizerStats stats, x2y2WithConstraints o){
- ProjectedGradientDescent optimizer = new ProjectedGradientDescent(ls);
- StopingCriteria stopGrad = new ProjectedGradientL2Norm(0.001);
- StopingCriteria stopValue = new ValueDifference(0.001);
- CompositeStopingCriteria compositeStop = new CompositeStopingCriteria();
- compositeStop.add(stopGrad);
- compositeStop.add(stopValue);
-
- optimizer.setMaxIterations(5);
- boolean succed = optimizer.optimize(o,stats,compositeStop);
- System.out.println("Ended optimzation Projected Gradient Descent\n" + stats.prettyPrint(1));
- System.out.println("Solution: " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]);
- if(succed){
- System.out.println("Ended optimization in " + optimizer.getCurrentIteration());
- }else{
- System.out.println("Failed to optimize");
- }
- }
-
-
-
- public String toString(){
-
- return "P1: " + parameters[0] + " P2: " + parameters[1] + " value " + getValue() + " grad (" + getGradient()[0] + ":" + getGradient()[1]+")";
- }
-
- public static void main(String[] args) {
- double a = 1;
- double b=1;
- double x0 = 0;
- double y0 =1;
- double dx = 0.5;
- double dy = 0.5 ;
- double [] parameters = new double[2];
- parameters[0] = x0;
- parameters[1] = y0;
- x2y2WithConstraints o = new x2y2WithConstraints(a,b,parameters,dx,dy, new SimplexProjection(0.5));
- System.out.println("Starting optimization " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1] + " a " + a + " b "+b );
- o.setDebugLevel(4);
-
- LineSearchMethod ls = new ArmijoLineSearchMinimizationAlongProjectionArc(new InterpolationPickFirstStep(1));
-
- OptimizerStats stats = new OptimizerStats();
- o.optimizeWithProjectedGradientDescent(ls, stats, o);
-
-// o = new x2y2WithConstraints(a,b,x0,y0,dx,dy);
-// stats = new OptimizerStats();
-// o.optimizeWithSpectralProjectedGradientDescent(stats, o);
- }
-
-
-
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/AbstractGradientBaseMethod.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/AbstractGradientBaseMethod.java
deleted file mode 100644
index 2fcb7990..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/AbstractGradientBaseMethod.java
+++ /dev/null
@@ -1,120 +0,0 @@
-package optimization.gradientBasedMethods;
-
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.linesearch.DifferentiableLineSearchObjective;
-import optimization.linesearch.LineSearchMethod;
-import optimization.stopCriteria.StopingCriteria;
-import optimization.util.MathUtils;
-
-/**
- *
- * @author javg
- *
- */
-public abstract class AbstractGradientBaseMethod implements Optimizer{
-
- protected int maxNumberOfIterations=10000;
-
-
-
- protected int currentProjectionIteration;
- protected double currValue;
- protected double previousValue = Double.MAX_VALUE;;
- protected double step;
- protected double[] gradient;
- public double[] direction;
-
- //Original values
- protected double originalGradientL2Norm;
-
- protected LineSearchMethod lineSearch;
- DifferentiableLineSearchObjective lso;
-
-
- public void reset(){
- direction = null;
- gradient = null;
- previousValue = Double.MAX_VALUE;
- currentProjectionIteration = 0;
- originalGradientL2Norm = 0;
- step = 0;
- currValue = 0;
- }
-
- public void initializeStructures(Objective o,OptimizerStats stats, StopingCriteria stop){
- lso = new DifferentiableLineSearchObjective(o);
- }
- public void updateStructuresBeforeStep(Objective o,OptimizerStats stats, StopingCriteria stop){
- }
-
- public void updateStructuresAfterStep(Objective o,OptimizerStats stats, StopingCriteria stop){
- }
-
- public boolean optimize(Objective o,OptimizerStats stats, StopingCriteria stop){
- //Initialize structures
-
- stats.collectInitStats(this, o);
- direction = new double[o.getNumParameters()];
- initializeStructures(o, stats, stop);
- for (currentProjectionIteration = 1; currentProjectionIteration < maxNumberOfIterations; currentProjectionIteration++){
- //System.out.println("\tgradient descent iteration " + currentProjectionIteration);
- //System.out.print("\tparameters:" );
- //o.printParameters();
- previousValue = currValue;
- currValue = o.getValue();
- gradient = o.getGradient();
- if(stop.stopOptimization(o)){
- stats.collectFinalStats(this, o);
- return true;
- }
-
- getDirection();
- if(MathUtils.dotProduct(gradient, direction) > 0){
- System.out.println("Not a descent direction");
- System.out.println(" current stats " + stats.prettyPrint(1));
- System.exit(-1);
- }
- updateStructuresBeforeStep(o, stats, stop);
- lso.reset(direction);
- step = lineSearch.getStepSize(lso);
- //System.out.println("\t\tLeave with step: " + step);
- if(step==-1){
- System.out.println("Failed to find step");
- stats.collectFinalStats(this, o);
- return false;
- }
- updateStructuresAfterStep( o, stats, stop);
-// previousValue = currValue;
-// currValue = o.getValue();
-// gradient = o.getGradient();
- stats.collectIterationStats(this, o);
- }
- stats.collectFinalStats(this, o);
- return false;
- }
-
-
- public int getCurrentIteration() {
- return currentProjectionIteration;
- }
-
-
- /**
- * Method specific
- */
- public abstract double[] getDirection();
-
- public double getCurrentStep() {
- return step;
- }
-
-
-
- public void setMaxIterations(int max) {
- maxNumberOfIterations = max;
- }
-
- public double getCurrentValue() {
- return currValue;
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ConjugateGradient.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ConjugateGradient.java
deleted file mode 100644
index 28295729..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ConjugateGradient.java
+++ /dev/null
@@ -1,92 +0,0 @@
-package optimization.gradientBasedMethods;
-
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.linesearch.DifferentiableLineSearchObjective;
-import optimization.linesearch.LineSearchMethod;
-import optimization.stopCriteria.StopingCriteria;
-import optimization.util.MathUtils;
-
-
-
-public class ConjugateGradient extends AbstractGradientBaseMethod{
-
-
- double[] previousGradient;
- double[] previousDirection;
-
- public ConjugateGradient(LineSearchMethod lineSearch) {
- this.lineSearch = lineSearch;
- }
-
- public void reset(){
- super.reset();
- java.util.Arrays.fill(previousDirection, 0);
- java.util.Arrays.fill(previousGradient, 0);
- }
-
- public void initializeStructures(Objective o,OptimizerStats stats, StopingCriteria stop){
- super.initializeStructures(o, stats, stop);
- previousGradient = new double[o.getNumParameters()];
- previousDirection = new double[o.getNumParameters()];
- }
- public void updateStructuresBeforeStep(Objective o,OptimizerStats stats, StopingCriteria stop){
- System.arraycopy(gradient, 0, previousGradient, 0, gradient.length);
- System.arraycopy(direction, 0, previousDirection, 0, direction.length);
- }
-
-// public boolean optimize(Objective o,OptimizerStats stats, StopingCriteria stop){
-// DifferentiableLineSearchObjective lso = new DifferentiableLineSearchObjective(o);
-// stats.collectInitStats(this, o);
-// direction = new double[o.getNumParameters()];
-// initializeStructures(o, stats, stop);
-// for (currentProjectionIteration = 0; currentProjectionIteration < maxNumberOfIterations; currentProjectionIteration++){
-// previousValue = currValue;
-// currValue = o.getValue();
-// gradient =o.getGradient();
-// if(stop.stopOptimization(gradient)){
-// stats.collectFinalStats(this, o);
-// return true;
-// }
-// getDirection();
-// updateStructures(o, stats, stop);
-// lso.reset(direction);
-// step = lineSearch.getStepSize(lso);
-// if(step==-1){
-// System.out.println("Failed to find a step size");
-// System.out.println("Failed to find step");
-// stats.collectFinalStats(this, o);
-// return false;
-// }
-//
-// stats.collectIterationStats(this, o);
-// }
-// stats.collectFinalStats(this, o);
-// return false;
-// }
-
- public double[] getDirection(){
- direction = MathUtils.negation(gradient);
- if(currentProjectionIteration != 1){
- //Using Polak-Ribiere method (book equation 5.45)
- double b = MathUtils.dotProduct(gradient, MathUtils.arrayMinus(gradient, previousGradient))
- /MathUtils.dotProduct(previousGradient, previousGradient);
- if(b<0){
- System.out.println("Defaulting to gradient descent");
- b = Math.max(b, 0);
- }
- MathUtils.plusEquals(direction, previousDirection, b);
- //Debug code
- if(MathUtils.dotProduct(direction, gradient) > 0){
- System.out.println("Not an descent direction reseting to gradien");
- direction = MathUtils.negation(gradient);
- }
- }
- return direction;
- }
-
-
-
-
-
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/DebugHelpers.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/DebugHelpers.java
deleted file mode 100644
index 6dc4ef6c..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/DebugHelpers.java
+++ /dev/null
@@ -1,65 +0,0 @@
-package optimization.gradientBasedMethods;
-
-import java.util.ArrayList;
-
-import optimization.util.MathUtils;
-
-
-
-public class DebugHelpers {
- public static void getLineSearchGraph(Objective o, double[] direction,
- double[] parameters, double originalObj,
- double originalDot, double c1, double c2){
- ArrayList<Double> stepS = new ArrayList<Double>();
- ArrayList<Double> obj = new ArrayList<Double>();
- ArrayList<Double> norm = new ArrayList<Double>();
- double[] gradient = new double[o.getNumParameters()];
- double[] newParameters = parameters.clone();
- MathUtils.plusEquals(newParameters,direction,0);
- o.setParameters(newParameters);
- double minValue = o.getValue();
- int valuesBiggerThanMax = 0;
- for(double step = 0; step < 2; step +=0.01 ){
- newParameters = parameters.clone();
- MathUtils.plusEquals(newParameters,direction,step);
- o.setParameters(newParameters);
- double newValue = o.getValue();
- gradient = o.getGradient();
- double newgradDirectionDot = MathUtils.dotProduct(gradient,direction);
- stepS.add(step);
- obj.add(newValue);
- norm.add(newgradDirectionDot);
- if(newValue <= minValue){
- minValue = newValue;
- }else{
- valuesBiggerThanMax++;
- }
-
- if(valuesBiggerThanMax > 10){
- break;
- }
-
- }
- System.out.println("step\torigObj\tobj\tsuffdec\tnorm\tcurvature1");
- for(int i = 0; i < stepS.size(); i++){
- double cnorm= norm.get(i);
- System.out.println(stepS.get(i)+"\t"+originalObj +"\t"+obj.get(i) + "\t" +
- (originalObj + originalDot*((Double)stepS.get(i))*c1) +"\t"+Math.abs(cnorm) +"\t"+c2*Math.abs(originalDot));
- }
- }
-
- public static double[] getNumericalGradient(Objective o, double[] parameters, double epsilon){
- int nrParameters = o.getNumParameters();
- double[] gradient = new double[nrParameters];
- double[] newParameters;
- double originalValue = o.getValue();
- for(int parameter = 0; parameter < nrParameters; parameter++){
- newParameters = parameters.clone();
- newParameters[parameter]+=epsilon;
- o.setParameters(newParameters);
- double newValue = o.getValue();
- gradient[parameter]=(newValue-originalValue)/epsilon;
- }
- return gradient;
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/GradientDescent.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/GradientDescent.java
deleted file mode 100644
index 9a53cef4..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/GradientDescent.java
+++ /dev/null
@@ -1,19 +0,0 @@
-package optimization.gradientBasedMethods;
-
-import optimization.linesearch.LineSearchMethod;
-
-
-
-public class GradientDescent extends AbstractGradientBaseMethod{
-
- public GradientDescent(LineSearchMethod lineSearch) {
- this.lineSearch = lineSearch;
- }
-
- public double[] getDirection(){
- for(int i = 0; i< gradient.length; i++){
- direction[i] = -gradient[i];
- }
- return direction;
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/LBFGS.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/LBFGS.java
deleted file mode 100644
index dedbc942..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/LBFGS.java
+++ /dev/null
@@ -1,234 +0,0 @@
-package optimization.gradientBasedMethods;
-
-
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.linesearch.DifferentiableLineSearchObjective;
-import optimization.linesearch.LineSearchMethod;
-import optimization.stopCriteria.StopingCriteria;
-import optimization.util.MathUtils;
-
-public class LBFGS extends AbstractGradientBaseMethod{
-
- //How many previous values are being saved
- int history;
- double[][] skList;
- double[][] ykList;
- double initialHessianParameters;
- double[] previousGradient;
- double[] previousParameters;
-
- //auxiliar structures
- double q[];
- double[] roi;
- double[] alphai;
-
- public LBFGS(LineSearchMethod ls, int history) {
- lineSearch = ls;
- this.history = history;
- skList = new double[history][];
- ykList = new double[history][];
-
- }
-
- public void reset(){
- super.reset();
- initialHessianParameters = 0;
- previousParameters = null;
- previousGradient = null;
- skList = new double[history][];
- ykList = new double[history][];
- q = null;
- roi = null;
- alphai = null;
- }
-
- public double[] LBFGSTwoLoopRecursion(double hessianConst){
- //Only create array once
- if(q == null){
- q = new double[gradient.length];
- }
- System.arraycopy(gradient, 0, q, 0, gradient.length);
- //Only create array once
- if(roi == null){
- roi = new double[history];
- }
- //Only create array once
- if(alphai == null){
- alphai = new double[history];
- }
-
- for(int i = history-1; i >=0 && skList[i]!= null && ykList[i]!=null; i-- ){
- // System.out.println("New to Old proj " + currentProjectionIteration + " history "+history + " index " + i);
- double[] si = skList[i];
- double[] yi = ykList[i];
- roi[i]= 1.0/MathUtils.dotProduct(yi,si);
- alphai[i] = MathUtils.dotProduct(si, q)*roi[i];
- MathUtils.plusEquals(q, yi, -alphai[i]);
- }
- //Initial Hessian is just a constant
- MathUtils.scalarMultiplication(q, hessianConst);
- for(int i = 0; i <history && skList[i]!= null && ykList[i]!=null; i++ ){
- // System.out.println("Old to New proj " + currentProjectionIteration + " history "+history + " index " + i);
- double beta = MathUtils.dotProduct(ykList[i], q)*roi[i];
- MathUtils.plusEquals(q, skList[i], (alphai[i]-beta));
- }
- return q;
- }
-
-
-
-
- @Override
- public double[] getDirection() {
-
- calculateInitialHessianParameter();
-// System.out.println("Initial hessian " + initialHessianParameters);
- return direction = MathUtils.negation(LBFGSTwoLoopRecursion(initialHessianParameters));
- }
-
- public void calculateInitialHessianParameter(){
- if(currentProjectionIteration == 1){
- //Use gradient
- initialHessianParameters = 1;
- }else if(currentProjectionIteration <= history){
- double[] sk = skList[currentProjectionIteration-2];
- double[] yk = ykList[currentProjectionIteration-2];
- initialHessianParameters = MathUtils.dotProduct(sk, yk)/MathUtils.dotProduct(yk, yk);
- }else{
- //get the last one
- double[] sk = skList[history-1];
- double[] yk = ykList[history-1];
- initialHessianParameters = MathUtils.dotProduct(sk, yk)/MathUtils.dotProduct(yk, yk);
- }
- }
-
- //TODO if structures exit just reset them to zero
- public void initializeStructures(Objective o,OptimizerStats stats, StopingCriteria stop){
- super.initializeStructures(o, stats, stop);
- previousParameters = new double[o.getNumParameters()];
- previousGradient = new double[o.getNumParameters()];
- }
- public void updateStructuresBeforeStep(Objective o,OptimizerStats stats, StopingCriteria stop){
- super.initializeStructures(o, stats, stop);
- System.arraycopy(o.getParameters(), 0, previousParameters, 0, previousParameters.length);
- System.arraycopy(gradient, 0, previousGradient, 0, gradient.length);
- }
-
- public void updateStructuresAfterStep( Objective o,OptimizerStats stats, StopingCriteria stop){
- double[] diffX = MathUtils.arrayMinus(o.getParameters(), previousParameters);
- double[] diffGrad = MathUtils.arrayMinus(gradient, previousGradient);
- //Save new values and discard new ones
- if(currentProjectionIteration > history){
- for(int i = 0; i < history-1;i++){
- skList[i]=skList[i+1];
- ykList[i]=ykList[i+1];
- }
- skList[history-1]=diffX;
- ykList[history-1]=diffGrad;
- }else{
- skList[currentProjectionIteration-1]=diffX;
- ykList[currentProjectionIteration-1]=diffGrad;
- }
- }
-
-// public boolean optimize(Objective o, OptimizerStats stats, StopingCriteria stop) {
-// DifferentiableLineSearchObjective lso = new DifferentiableLineSearchObjective(o);
-// gradient = o.getGradient();
-// direction = new double[o.getNumParameters()];
-// previousGradient = new double[o.getNumParameters()];
-//
-// previousParameters = new double[o.getNumParameters()];
-//
-// stats.collectInitStats(this, o);
-// previousValue = Double.MAX_VALUE;
-// currValue= o.getValue();
-// //Used for stopping criteria
-// double[] originalGradient = o.getGradient();
-//
-// originalGradientL2Norm = MathUtils.L2Norm(originalGradient);
-// if(stop.stopOptimization(originalGradient)){
-// stats.collectFinalStats(this, o);
-// return true;
-// }
-// for (currentProjectionIteration = 1; currentProjectionIteration < maxNumberOfIterations; currentProjectionIteration++){
-//
-//
-// currValue = o.getValue();
-// gradient = o.getGradient();
-// currParameters = o.getParameters();
-//
-//
-// if(currentProjectionIteration == 1){
-// //Use gradient
-// initialHessianParameters = 1;
-// }else if(currentProjectionIteration <= history){
-// double[] sk = skList[currentProjectionIteration-2];
-// double[] yk = ykList[currentProjectionIteration-2];
-// initialHessianParameters = MathUtils.dotProduct(sk, yk)/MathUtils.dotProduct(yk, yk);
-// }else{
-// //get the last one
-// double[] sk = skList[history-1];
-// double[] yk = ykList[history-1];
-// initialHessianParameters = MathUtils.dotProduct(sk, yk)/MathUtils.dotProduct(yk, yk);
-// }
-//
-// getDirection();
-//
-// //MatrixOutput.printDoubleArray(direction, "direction");
-// double dot = MathUtils.dotProduct(direction, gradient);
-// if(dot > 0){
-// throw new RuntimeException("Not a descent direction");
-// } if (Double.isNaN(dot)){
-// throw new RuntimeException("dot is not a number!!");
-// }
-// System.arraycopy(currParameters, 0, previousParameters, 0, currParameters.length);
-// System.arraycopy(gradient, 0, previousGradient, 0, gradient.length);
-// lso.reset(direction);
-// step = lineSearch.getStepSize(lso);
-// if(step==-1){
-// System.out.println("Failed to find a step size");
-//// lso.printLineSearchSteps();
-//// System.out.println(stats.prettyPrint(1));
-// stats.collectFinalStats(this, o);
-// return false;
-// }
-// stats.collectIterationStats(this, o);
-//
-// //We are not updating the alpha since it is done in line search already
-// currParameters = o.getParameters();
-// gradient = o.getGradient();
-//
-// if(stop.stopOptimization(gradient)){
-// stats.collectFinalStats(this, o);
-// return true;
-// }
-// double[] diffX = MathUtils.arrayMinus(currParameters, previousParameters);
-// double[] diffGrad = MathUtils.arrayMinus(gradient, previousGradient);
-// //Save new values and discard new ones
-// if(currentProjectionIteration > history){
-// for(int i = 0; i < history-1;i++){
-// skList[i]=skList[i+1];
-// ykList[i]=ykList[i+1];
-// }
-// skList[history-1]=diffX;
-// ykList[history-1]=diffGrad;
-// }else{
-// skList[currentProjectionIteration-1]=diffX;
-// ykList[currentProjectionIteration-1]=diffGrad;
-// }
-// previousValue = currValue;
-// }
-// stats.collectFinalStats(this, o);
-// return false;
-// }
-
-
-
-
-
-
-
-
-
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Objective.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Objective.java
deleted file mode 100644
index 6be01bf9..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Objective.java
+++ /dev/null
@@ -1,87 +0,0 @@
-package optimization.gradientBasedMethods;
-
-
-/**
- * Defines an optimization objective:
- *
- *
- * @author javg
- *
- */
-public abstract class Objective {
-
- protected int functionCalls = 0;
- protected int gradientCalls = 0;
- protected int updateCalls = 0;
-
- protected double[] parameters;
-
- //Contains a cache with the gradient
- public double[] gradient;
- int debugLevel = 0;
-
- public void setDebugLevel(int level){
- debugLevel = level;
- }
-
- public int getNumParameters() {
- return parameters.length;
- }
-
- public double getParameter(int index) {
- return parameters[index];
- }
-
- public double[] getParameters() {
- return parameters;
- }
-
- public abstract double[] getGradient( );
-
- public void setParameter(int index, double value) {
- parameters[index]=value;
- }
-
- public void setParameters(double[] params) {
- if(parameters == null){
- parameters = new double[params.length];
- }
- updateCalls++;
- System.arraycopy(params, 0, parameters, 0, params.length);
- }
-
-
- public int getNumberFunctionCalls() {
- return functionCalls;
- }
-
- public int getNumberGradientCalls() {
- return gradientCalls;
- }
-
- public int getNumberUpdateCalls() {
- return updateCalls;
- }
-
- public String finalInfoString() {
- return "FE: " + functionCalls + " GE " + gradientCalls + " Params updates" +
- updateCalls;
- }
- public void printParameters() {
- System.out.println(toString());
- }
-
- public abstract String toString();
- public abstract double getValue ();
-
- /**
- * Sets the initial objective parameters
- * For unconstrained models this just sets the objective params = argument no copying
- * For a constrained objective project the parameters and then set
- * @param params
- */
- public void setInitialParameters(double[] params){
- parameters = params;
- }
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Optimizer.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Optimizer.java
deleted file mode 100644
index 96fce5b0..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Optimizer.java
+++ /dev/null
@@ -1,19 +0,0 @@
-package optimization.gradientBasedMethods;
-
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.stopCriteria.StopingCriteria;
-
-public interface Optimizer {
- public boolean optimize(Objective o,OptimizerStats stats, StopingCriteria stoping);
-
-
- public double[] getDirection();
- public double getCurrentStep();
- public double getCurrentValue();
- public int getCurrentIteration();
- public void reset();
-
- public void setMaxIterations(int max);
-
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedAbstractGradientBaseMethod.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedAbstractGradientBaseMethod.java
deleted file mode 100644
index afb29d04..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedAbstractGradientBaseMethod.java
+++ /dev/null
@@ -1,11 +0,0 @@
-package optimization.gradientBasedMethods;
-
-
-/**
- *
- * @author javg
- *
- */
-public abstract class ProjectedAbstractGradientBaseMethod extends AbstractGradientBaseMethod implements ProjectedOptimizer{
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedGradientDescent.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedGradientDescent.java
deleted file mode 100644
index 0186e945..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedGradientDescent.java
+++ /dev/null
@@ -1,154 +0,0 @@
-package optimization.gradientBasedMethods;
-
-import java.io.IOException;
-
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.linesearch.DifferentiableLineSearchObjective;
-import optimization.linesearch.LineSearchMethod;
-import optimization.linesearch.ProjectedDifferentiableLineSearchObjective;
-import optimization.stopCriteria.StopingCriteria;
-import optimization.util.MathUtils;
-
-
-/**
- * This class implements the projected gradiend
- * as described in Bertsekas "Non Linear Programming"
- * section 2.3.
- *
- * The update is given by:
- * x_k+1 = x_k + alpha^k(xbar_k-x_k)
- * Where xbar is:
- * xbar = [x_k -s_k grad(f(x_k))]+
- * where []+ is the projection into the feasibility set
- *
- * alpha is the step size
- * s_k - is a positive scalar which can be view as a step size as well, by
- * setting alpha to 1, then x_k+1 = [x_k -s_k grad(f(x_k))]+
- * This is called taking a step size along the projection arc (Bertsekas) which
- * we will use by default.
- *
- * Note that the only place where we actually take a step size is on pick a step size
- * so this is going to be just like a normal gradient descent but use a different
- * armijo line search where we project after taking a step.
- *
- *
- * @author javg
- *
- */
-public class ProjectedGradientDescent extends ProjectedAbstractGradientBaseMethod{
-
-
-
-
- public ProjectedGradientDescent(LineSearchMethod lineSearch) {
- this.lineSearch = lineSearch;
- }
-
- //Use projected differential objective instead
- public void initializeStructures(Objective o, OptimizerStats stats, StopingCriteria stop) {
- lso = new ProjectedDifferentiableLineSearchObjective(o);
- };
-
-
- ProjectedObjective obj;
- public boolean optimize(ProjectedObjective o,OptimizerStats stats, StopingCriteria stop){
- obj = o;
- return super.optimize(o, stats, stop);
- }
-
- public double[] getDirection(){
- for(int i = 0; i< gradient.length; i++){
- direction[i] = -gradient[i];
- }
- return direction;
- }
-
-
-
-
-}
-
-
-
-
-
-
-
-///OLD CODE
-
-//Use projected gradient norm
-//public boolean stopCriteria(double[] gradient){
-// if(originalDirenctionL2Norm == 0){
-// System.out.println("Leaving original direction norm is zero");
-// return true;
-// }
-// if(MathUtils.L2Norm(direction)/originalDirenctionL2Norm < gradientConvergenceValue){
-// System.out.println("Leaving projected gradient Norm smaller than epsilon");
-// return true;
-// }
-// if((previousValue - currValue)/Math.abs(previousValue) < valueConvergenceValue) {
-// System.out.println("Leaving value change below treshold " + previousValue + " - " + currValue);
-// System.out.println(previousValue/currValue + " - " + currValue/currValue
-// + " = " + (previousValue - currValue)/Math.abs(previousValue));
-// return true;
-// }
-// return false;
-//}
-//
-
-//public boolean optimize(ProjectedObjective o,OptimizerStats stats, StopingCriteria stop){
-// stats.collectInitStats(this, o);
-// obj = o;
-// step = 0;
-// currValue = o.getValue();
-// previousValue = Double.MAX_VALUE;
-// gradient = o.getGradient();
-// originalGradientL2Norm = MathUtils.L2Norm(gradient);
-// parameterChange = new double[gradient.length];
-// getDirection();
-// ProjectedDifferentiableLineSearchObjective lso = new ProjectedDifferentiableLineSearchObjective(o,direction);
-//
-// originalDirenctionL2Norm = MathUtils.L2Norm(direction);
-// //MatrixOutput.printDoubleArray(currParameters, "parameters");
-// for (currentProjectionIteration = 0; currentProjectionIteration < maxNumberOfIterations; currentProjectionIteration++){
-// // System.out.println("Iter " + currentProjectionIteration);
-// //o.printParameters();
-//
-//
-//
-// if(stop.stopOptimization(gradient)){
-// stats.collectFinalStats(this, o);
-// lastStepUsed = step;
-// return true;
-// }
-// lso.reset(direction);
-// step = lineSearch.getStepSize(lso);
-// if(step==-1){
-// System.out.println("Failed to find step");
-// stats.collectFinalStats(this, o);
-// return false;
-//
-// }
-//
-// //Update the direction for stopping criteria
-// previousValue = currValue;
-// currValue = o.getValue();
-// gradient = o.getGradient();
-// direction = getDirection();
-// if(MathUtils.dotProduct(gradient, direction) > 0){
-// System.out.println("Not a descent direction");
-// System.out.println(" current stats " + stats.prettyPrint(1));
-// System.exit(-1);
-// }
-// stats.collectIterationStats(this, o);
-// }
-// lastStepUsed = step;
-// stats.collectFinalStats(this, o);
-// return false;
-// }
-
-//public boolean optimize(Objective o,OptimizerStats stats, StopingCriteria stop){
-// System.out.println("Objective is not a projected objective");
-// throw new RuntimeException();
-//}
-
diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedObjective.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedObjective.java
deleted file mode 100644
index c3d21393..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedObjective.java
+++ /dev/null
@@ -1,29 +0,0 @@
-package optimization.gradientBasedMethods;
-
-import optimization.util.MathUtils;
-
-
-/**
- * Computes a projected objective
- * When we tell it to set some parameters it automatically projects the parameters back into the simplex:
- *
- *
- * When we tell it to get the gradient in automatically returns the projected gradient:
- * @author javg
- *
- */
-public abstract class ProjectedObjective extends Objective{
-
- public abstract double[] projectPoint (double[] point);
-
- public double[] auxParameters;
-
-
- public void setInitialParameters(double[] params){
- setParameters(projectPoint(params));
- }
-
-
-
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedOptimizer.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedOptimizer.java
deleted file mode 100644
index 81d8403e..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedOptimizer.java
+++ /dev/null
@@ -1,10 +0,0 @@
-package optimization.gradientBasedMethods;
-
-
-
-public interface ProjectedOptimizer extends Optimizer{
-
-
-
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/OptimizerStats.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/OptimizerStats.java
deleted file mode 100644
index 6340ef73..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/OptimizerStats.java
+++ /dev/null
@@ -1,86 +0,0 @@
-package optimization.gradientBasedMethods.stats;
-
-import java.util.ArrayList;
-
-import optimization.gradientBasedMethods.Objective;
-import optimization.gradientBasedMethods.Optimizer;
-import optimization.util.MathUtils;
-import optimization.util.StaticTools;
-
-
-public class OptimizerStats {
-
- double start = 0;
- double totalTime = 0;
-
- String objectiveFinalStats;
-
- ArrayList<Double> gradientNorms = new ArrayList<Double>();
- ArrayList<Double> steps = new ArrayList<Double>();
- ArrayList<Double> value = new ArrayList<Double>();
- ArrayList<Integer> iterations = new ArrayList<Integer>();
- double prevValue =0;
-
- public void reset(){
- start = 0;
- totalTime = 0;
-
- objectiveFinalStats="";
-
- gradientNorms.clear();
- steps.clear();
- value.clear();
- iterations.clear();
- prevValue =0;
- }
-
- public void startTime() {
- start = System.currentTimeMillis();
- }
- public void stopTime() {
- totalTime += System.currentTimeMillis() - start;
- }
-
- public String prettyPrint(int level){
- StringBuffer res = new StringBuffer();
- res.append("Total time " + totalTime/1000 + " seconds \n" + "Iterations " + iterations.size() + "\n");
- res.append(objectiveFinalStats+"\n");
- if(level > 0){
- if(iterations.size() > 0){
- res.append("\tIteration"+iterations.get(0)+"\tstep: "+StaticTools.prettyPrint(steps.get(0), "0.00E00", 6)+ "\tgradientNorm "+
- StaticTools.prettyPrint(gradientNorms.get(0), "0.00000E00", 10)+ "\tvalue "+ StaticTools.prettyPrint(value.get(0), "0.000000E00",11)+"\n");
- }
- for(int i = 1; i < iterations.size(); i++){
- res.append("\tIteration:\t"+iterations.get(i)+"\tstep:"+StaticTools.prettyPrint(steps.get(i), "0.00E00", 6)+ "\tgradientNorm "+
- StaticTools.prettyPrint(gradientNorms.get(i), "0.00000E00", 10)+
- "\tvalue:\t"+ StaticTools.prettyPrint(value.get(i), "0.000000E00",11)+
- "\tvalueDiff:\t"+ StaticTools.prettyPrint((value.get(i-1)-value.get(i)), "0.000000E00",11)+
- "\n");
- }
- }
- return res.toString();
- }
-
-
- public void collectInitStats(Optimizer optimizer, Objective objective){
- startTime();
- iterations.add(-1);
- gradientNorms.add(MathUtils.L2Norm(objective.getGradient()));
- steps.add(0.0);
- value.add(objective.getValue());
- }
-
- public void collectIterationStats(Optimizer optimizer, Objective objective){
- iterations.add(optimizer.getCurrentIteration());
- gradientNorms.add(MathUtils.L2Norm(objective.getGradient()));
- steps.add(optimizer.getCurrentStep());
- value.add(optimizer.getCurrentValue());
- }
-
-
- public void collectFinalStats(Optimizer optimizer, Objective objective){
- stopTime();
- objectiveFinalStats = objective.finalInfoString();
- }
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/ProjectedOptimizerStats.java b/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/ProjectedOptimizerStats.java
deleted file mode 100644
index d65a1267..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/ProjectedOptimizerStats.java
+++ /dev/null
@@ -1,70 +0,0 @@
-package optimization.gradientBasedMethods.stats;
-
-import java.util.ArrayList;
-
-import optimization.gradientBasedMethods.Objective;
-import optimization.gradientBasedMethods.Optimizer;
-import optimization.gradientBasedMethods.ProjectedObjective;
-import optimization.gradientBasedMethods.ProjectedOptimizer;
-import optimization.util.MathUtils;
-import optimization.util.StaticTools;
-
-
-public class ProjectedOptimizerStats extends OptimizerStats{
-
-
-
- public void reset(){
- super.reset();
- projectedGradientNorms.clear();
- }
-
- ArrayList<Double> projectedGradientNorms = new ArrayList<Double>();
-
- public String prettyPrint(int level){
- StringBuffer res = new StringBuffer();
- res.append("Total time " + totalTime/1000 + " seconds \n" + "Iterations " + iterations.size() + "\n");
- res.append(objectiveFinalStats+"\n");
- if(level > 0){
- if(iterations.size() > 0){
- res.append("\tIteration"+iterations.get(0)+"\tstep: "+
- StaticTools.prettyPrint(steps.get(0), "0.00E00", 6)+ "\tgradientNorm "+
- StaticTools.prettyPrint(gradientNorms.get(0), "0.00000E00", 10)
- + "\tdirection"+
- StaticTools.prettyPrint(projectedGradientNorms.get(0), "0.00000E00", 10)+
- "\tvalue "+ StaticTools.prettyPrint(value.get(0), "0.000000E00",11)+"\n");
- }
- for(int i = 1; i < iterations.size(); i++){
- res.append("\tIteration"+iterations.get(i)+"\tstep: "+StaticTools.prettyPrint(steps.get(i), "0.00E00", 6)+ "\tgradientNorm "+
- StaticTools.prettyPrint(gradientNorms.get(i), "0.00000E00", 10)+
- "\t direction "+
- StaticTools.prettyPrint(projectedGradientNorms.get(i), "0.00000E00", 10)+
- "\tvalue "+ StaticTools.prettyPrint(value.get(i), "0.000000E00",11)+
- "\tvalueDiff "+ StaticTools.prettyPrint((value.get(i-1)-value.get(i)), "0.000000E00",11)+
- "\n");
- }
- }
- return res.toString();
- }
-
-
- public void collectInitStats(Optimizer optimizer, Objective objective){
- startTime();
- }
-
- public void collectIterationStats(Optimizer optimizer, Objective objective){
- iterations.add(optimizer.getCurrentIteration());
- gradientNorms.add(MathUtils.L2Norm(objective.getGradient()));
- projectedGradientNorms.add(MathUtils.L2Norm(optimizer.getDirection()));
- steps.add(optimizer.getCurrentStep());
- value.add(optimizer.getCurrentValue());
- }
-
-
-
- public void collectFinalStats(Optimizer optimizer, Objective objective){
- stopTime();
- objectiveFinalStats = objective.finalInfoString();
- }
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimization.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimization.java
deleted file mode 100644
index c9f9b8df..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimization.java
+++ /dev/null
@@ -1,102 +0,0 @@
-package optimization.linesearch;
-
-import optimization.util.Interpolation;
-
-
-/**
- * Implements Back Tracking Line Search as described on page 37 of Numerical Optimization.
- * Also known as armijo rule
- * @author javg
- *
- */
-public class ArmijoLineSearchMinimization implements LineSearchMethod{
-
- /**
- * How much should the step size decrease at each iteration.
- */
- double contractionFactor = 0.5;
- double c1 = 0.0001;
-
- double sigma1 = 0.1;
- double sigma2 = 0.9;
-
-
-
- double initialStep;
- int maxIterations = 10;
-
-
- public ArmijoLineSearchMinimization(){
- this.initialStep = 1;
- }
-
- //Experiment
- double previousStepPicked = -1;;
- double previousInitGradientDot = -1;
- double currentInitGradientDot = -1;
-
-
- public void reset(){
- previousStepPicked = -1;;
- previousInitGradientDot = -1;
- currentInitGradientDot = -1;
- }
-
- public void setInitialStep(double initial){
- initialStep = initial;
- }
-
- /**
- *
- */
-
- public double getStepSize(DifferentiableLineSearchObjective o) {
- currentInitGradientDot = o.getInitialGradient();
- //Should update all in the objective
- o.updateAlpha(initialStep);
- int nrIterations = 0;
- //System.out.println("tried alpha" + initialStep + " value " + o.getCurrentValue());
- while(!WolfeConditions.suficientDecrease(o,c1)){
- if(nrIterations >= maxIterations){
- o.printLineSearchSteps();
- return -1;
- }
- double alpha=o.getAlpha();
- double alphaTemp =
- Interpolation.quadraticInterpolation(o.getOriginalValue(), o.getInitialGradient(), alpha, o.getCurrentValue());
- if(alphaTemp >= sigma1 || alphaTemp <= sigma2*o.getAlpha()){
-// System.out.println("using alpha temp " + alphaTemp);
- alpha = alphaTemp;
- }else{
-// System.out.println("Discarding alpha temp " + alphaTemp);
- alpha = alpha*contractionFactor;
- }
-// double alpha =o.getAlpha()*contractionFactor;
-
- o.updateAlpha(alpha);
- //System.out.println("tried alpha" + alpha+ " value " + o.getCurrentValue());
- nrIterations++;
- }
-
- //System.out.println("Leavning line search used:");
- //o.printLineSearchSteps();
-
- previousInitGradientDot = currentInitGradientDot;
- previousStepPicked = o.getAlpha();
- return o.getAlpha();
- }
-
- public double getInitialGradient() {
- return currentInitGradientDot;
-
- }
-
- public double getPreviousInitialGradient() {
- return previousInitGradientDot;
- }
-
- public double getPreviousStepUsed() {
- return previousStepPicked;
- }
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimizationAlongProjectionArc.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimizationAlongProjectionArc.java
deleted file mode 100644
index e153f2da..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimizationAlongProjectionArc.java
+++ /dev/null
@@ -1,141 +0,0 @@
-package optimization.linesearch;
-
-import optimization.gradientBasedMethods.ProjectedObjective;
-import optimization.util.Interpolation;
-import optimization.util.MathUtils;
-
-
-
-
-
-/**
- * Implements Armijo Rule Line search along the projection arc (Non-Linear Programming page 230)
- * To be used with Projected gradient Methods.
- *
- * Recall that armijo tries successive step sizes alpha until the sufficient decrease is satisfied:
- * f(x+alpha*direction) < f(x) + alpha*c1*grad(f)*direction
- *
- * In this case we are optimizing over a convex set X so we must guarantee that the new point stays inside the
- * constraints.
- * First the direction as to be feasible (inside constraints) and will be define as:
- * d = (x_k_f - x_k) where x_k_f is a feasible point.
- * so the armijo condition can be rewritten as:
- * f(x+alpha(x_k_f - x_k)) < f(x) + c1*grad(f)*(x_k_f - x_k)
- * and x_k_f is defined as:
- * [x_k-alpha*grad(f)]+
- * where []+ mean a projection to the feasibility set.
- * So this means that we take a step on the negative gradient (gradient descent) and then obtain then project
- * that point to the feasibility set.
- * Note that if the point is already feasible then we are back to the normal armijo rule.
- *
- * @author javg
- *
- */
-public class ArmijoLineSearchMinimizationAlongProjectionArc implements LineSearchMethod{
-
- /**
- * How much should the step size decrease at each iteration.
- */
- double contractionFactor = 0.5;
- double c1 = 0.0001;
-
-
- double initialStep;
- int maxIterations = 100;
-
-
- double sigma1 = 0.1;
- double sigma2 = 0.9;
-
- //Experiment
- double previousStepPicked = -1;;
- double previousInitGradientDot = -1;
- double currentInitGradientDot = -1;
-
- GenericPickFirstStep strategy;
-
-
- public void reset(){
- previousStepPicked = -1;;
- previousInitGradientDot = -1;
- currentInitGradientDot = -1;
- }
-
-
- public ArmijoLineSearchMinimizationAlongProjectionArc(){
- this.initialStep = 1;
- }
-
- public ArmijoLineSearchMinimizationAlongProjectionArc(GenericPickFirstStep strategy){
- this.strategy = strategy;
- this.initialStep = strategy.getFirstStep(this);
- }
-
-
- public void setInitialStep(double initial){
- this.initialStep = initial;
- }
-
- /**
- *
- */
-
- public double getStepSize(DifferentiableLineSearchObjective o) {
-
-
- //Should update all in the objective
- initialStep = strategy.getFirstStep(this);
- o.updateAlpha(initialStep);
- previousInitGradientDot=currentInitGradientDot;
- currentInitGradientDot=o.getCurrentGradient();
- int nrIterations = 0;
-
- //Armijo rule, the current value has to be smaller than the original value plus a small step of the gradient
- while(o.getCurrentValue() >
- o.getOriginalValue() + c1*(o.getCurrentGradient())){
-// System.out.println("curr value "+o.getCurrentValue());
-// System.out.println("original value "+o.getOriginalValue());
-// System.out.println("GRADIENT decrease" +(MathUtils.dotProduct(o.o.gradient,
-// MathUtils.arrayMinus(o.originalParameters,((ProjectedObjective)o.o).auxParameters))));
-// System.out.println("GRADIENT SAVED" + o.getCurrentGradient());
- if(nrIterations >= maxIterations){
- System.out.println("Could not find a step leaving line search with -1");
- o.printLineSearchSteps();
- return -1;
- }
- double alpha=o.getAlpha();
- double alphaTemp =
- Interpolation.quadraticInterpolation(o.getOriginalValue(), o.getInitialGradient(), alpha, o.getCurrentValue());
- if(alphaTemp >= sigma1 || alphaTemp <= sigma2*o.getAlpha()){
- alpha = alphaTemp;
- }else{
- alpha = alpha*contractionFactor;
- }
-// double alpha =obj.getAlpha()*contractionFactor;
- o.updateAlpha(alpha);
- nrIterations++;
- }
-// System.out.println("curr value "+o.getCurrentValue());
-// System.out.println("original value "+o.getOriginalValue());
-// System.out.println("sufficient decrease" +c1*o.getCurrentGradient());
-// System.out.println("Leavning line search used:");
-// o.printSmallLineSearchSteps();
-
- previousStepPicked = o.getAlpha();
- return o.getAlpha();
- }
-
- public double getInitialGradient() {
- return currentInitGradientDot;
-
- }
-
- public double getPreviousInitialGradient() {
- return previousInitGradientDot;
- }
-
- public double getPreviousStepUsed() {
- return previousStepPicked;
- }
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/DifferentiableLineSearchObjective.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/DifferentiableLineSearchObjective.java
deleted file mode 100644
index a5bc958e..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/DifferentiableLineSearchObjective.java
+++ /dev/null
@@ -1,185 +0,0 @@
-package optimization.linesearch;
-
-import gnu.trove.TDoubleArrayList;
-import gnu.trove.TIntArrayList;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.Comparator;
-
-import optimization.gradientBasedMethods.Objective;
-import optimization.util.MathUtils;
-import optimization.util.StaticTools;
-
-
-
-import util.MathUtil;
-import util.Printing;
-
-
-/**
- * A wrapper class for the actual objective in order to perform
- * line search. The optimization code assumes that this does a lot
- * of caching in order to simplify legibility. For the applications
- * we use it for, caching the entire history of evaluations should be
- * a win.
- *
- * Note: the lastEvaluatedAt value is very important, since we will use
- * it to avoid doing an evaluation of the gradient after the line search.
- *
- * The differentiable line search objective defines a search along the ray
- * given by a direction of the main objective.
- * It defines the following function,
- * where f is the original objective function:
- * g(alpha) = f(x_0 + alpha*direction)
- * g'(alpha) = f'(x_0 + alpha*direction)*direction
- *
- * @author joao
- *
- */
-public class DifferentiableLineSearchObjective {
-
-
-
- Objective o;
- int nrIterations;
- TDoubleArrayList steps;
- TDoubleArrayList values;
- TDoubleArrayList gradients;
-
- //This variables cannot change
- public double[] originalParameters;
- public double[] searchDirection;
-
-
- /**
- * Defines a line search objective:
- * Receives:
- * Objective to each we are performing the line search, is used to calculate values and gradients
- * Direction where to do the ray search, note that the direction does not depend of the
- * objective but depends from the method.
- * @param o
- * @param direction
- */
- public DifferentiableLineSearchObjective(Objective o) {
- this.o = o;
- originalParameters = new double[o.getNumParameters()];
- searchDirection = new double[o.getNumParameters()];
- steps = new TDoubleArrayList();
- values = new TDoubleArrayList();
- gradients = new TDoubleArrayList();
- }
- /**
- * Called whenever we start a new iteration.
- * Receives the ray where we are searching for and resets all values
- *
- */
- public void reset(double[] direction){
- //Copy initial values
- System.arraycopy(o.getParameters(), 0, originalParameters, 0, o.getNumParameters());
- System.arraycopy(direction, 0, searchDirection, 0, o.getNumParameters());
-
- //Initialize variables
- nrIterations = 0;
- steps.clear();
- values.clear();
- gradients.clear();
-
- values.add(o.getValue());
- gradients.add(MathUtils.dotProduct(o.getGradient(),direction));
- steps.add(0);
- }
-
-
- /**
- * update the current value of alpha.
- * Takes a step with that alpha in direction
- * Get the real objective value and gradient and calculate all required information.
- */
- public void updateAlpha(double alpha){
- if(alpha < 0){
- System.out.println("alpha may not be smaller that zero");
- throw new RuntimeException();
- }
- nrIterations++;
- steps.add(alpha);
- //x_t+1 = x_t + alpha*direction
- System.arraycopy(originalParameters,0, o.getParameters(), 0, originalParameters.length);
- MathUtils.plusEquals(o.getParameters(), searchDirection, alpha);
- o.setParameters(o.getParameters());
-// System.out.println("Took a step of " + alpha + " new value " + o.getValue());
- values.add(o.getValue());
- gradients.add(MathUtils.dotProduct(o.getGradient(),searchDirection));
- }
-
-
-
- public int getNrIterations(){
- return nrIterations;
- }
-
- /**
- * return g(alpha) for the current value of alpha
- * @param iter
- * @return
- */
- public double getValue(int iter){
- return values.get(iter);
- }
-
- public double getCurrentValue(){
- return values.get(nrIterations);
- }
-
- public double getOriginalValue(){
- return values.get(0);
- }
-
- /**
- * return g'(alpha) for the current value of alpha
- * @param iter
- * @return
- */
- public double getGradient(int iter){
- return gradients.get(iter);
- }
-
- public double getCurrentGradient(){
- return gradients.get(nrIterations);
- }
-
- public double getInitialGradient(){
- return gradients.get(0);
- }
-
-
-
-
- public double getAlpha(){
- return steps.get(nrIterations);
- }
-
- public void printLineSearchSteps(){
- System.out.println(
- " Steps size "+steps.size() +
- "Values size "+values.size() +
- "Gradeients size "+gradients.size());
- for(int i =0; i < steps.size();i++){
- System.out.println("Iter " + i + " step " + steps.get(i) +
- " value " + values.get(i) + " grad " + gradients.get(i));
- }
- }
-
- public void printSmallLineSearchSteps(){
- for(int i =0; i < steps.size();i++){
- System.out.print(StaticTools.prettyPrint(steps.get(i), "0.0000E00",8) + " ");
- }
- System.out.println();
- }
-
- public static void main(String[] args) {
-
- }
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/GenericPickFirstStep.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/GenericPickFirstStep.java
deleted file mode 100644
index a33eb311..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/GenericPickFirstStep.java
+++ /dev/null
@@ -1,20 +0,0 @@
-package optimization.linesearch;
-
-
-public class GenericPickFirstStep{
- double _initValue;
- public GenericPickFirstStep(double initValue) {
- _initValue = initValue;
- }
-
- public double getFirstStep(LineSearchMethod ls){
- return _initValue;
- }
- public void collectInitValues(LineSearchMethod ls){
-
- }
-
- public void collectFinalValues(LineSearchMethod ls){
-
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/InterpolationPickFirstStep.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/InterpolationPickFirstStep.java
deleted file mode 100644
index 0deebcdb..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/InterpolationPickFirstStep.java
+++ /dev/null
@@ -1,25 +0,0 @@
-package optimization.linesearch;
-
-
-public class InterpolationPickFirstStep extends GenericPickFirstStep{
- public InterpolationPickFirstStep(double initValue) {
- super(initValue);
- }
-
- public double getFirstStep(LineSearchMethod ls){
- if(ls.getPreviousStepUsed() != -1 && ls.getPreviousInitialGradient()!=0){
- double newStep = Math.min(300, 1.02*ls.getPreviousInitialGradient()*ls.getPreviousStepUsed()/ls.getInitialGradient());
- // System.out.println("proposing " + newStep);
- return newStep;
-
- }
- return _initValue;
- }
- public void collectInitValues(WolfRuleLineSearch ls){
-
- }
-
- public void collectFinalValues(WolfRuleLineSearch ls){
-
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/LineSearchMethod.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/LineSearchMethod.java
deleted file mode 100644
index 80cd7f39..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/LineSearchMethod.java
+++ /dev/null
@@ -1,14 +0,0 @@
-package optimization.linesearch;
-
-
-public interface LineSearchMethod {
-
- double getStepSize(DifferentiableLineSearchObjective o);
-
- public double getInitialGradient();
- public double getPreviousInitialGradient();
- public double getPreviousStepUsed();
-
- public void setInitialStep(double initial);
- public void reset();
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/NonNewtonInterpolationPickFirstStep.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/NonNewtonInterpolationPickFirstStep.java
deleted file mode 100644
index 4b354fd9..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/NonNewtonInterpolationPickFirstStep.java
+++ /dev/null
@@ -1,33 +0,0 @@
-package optimization.linesearch;
-
-/**
- * Non newtwon since we don't always try 1...
- * Not sure if that is even usefull for newton
- * @author javg
- *
- */
-public class NonNewtonInterpolationPickFirstStep extends GenericPickFirstStep{
- public NonNewtonInterpolationPickFirstStep(double initValue) {
- super(initValue);
- }
-
- public double getFirstStep(LineSearchMethod ls){
-// System.out.println("Previous step used " + ls.getPreviousStepUsed());
-// System.out.println("PreviousGradinebt " + ls.getPreviousInitialGradient());
-// System.out.println("CurrentGradinebt " + ls.getInitialGradient());
- if(ls.getPreviousStepUsed() != -1 && ls.getPreviousInitialGradient()!=0){
- double newStep = 1.01*ls.getPreviousInitialGradient()*ls.getPreviousStepUsed()/ls.getInitialGradient();
- //System.out.println("Suggesting " + newStep);
- return newStep;
-
- }
- return _initValue;
- }
- public void collectInitValues(WolfRuleLineSearch ls){
-
- }
-
- public void collectFinalValues(WolfRuleLineSearch ls){
-
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/ProjectedDifferentiableLineSearchObjective.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/ProjectedDifferentiableLineSearchObjective.java
deleted file mode 100644
index 29ccbc32..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/ProjectedDifferentiableLineSearchObjective.java
+++ /dev/null
@@ -1,137 +0,0 @@
-package optimization.linesearch;
-
-import optimization.gradientBasedMethods.Objective;
-import optimization.gradientBasedMethods.ProjectedObjective;
-import optimization.util.MathUtils;
-import optimization.util.MatrixOutput;
-
-
-/**
- * See ArmijoLineSearchMinimizationAlongProjectionArc for description
- * @author javg
- *
- */
-public class ProjectedDifferentiableLineSearchObjective extends DifferentiableLineSearchObjective{
-
-
-
- ProjectedObjective obj;
- public ProjectedDifferentiableLineSearchObjective(Objective o) {
- super(o);
- if(!(o instanceof ProjectedObjective)){
- System.out.println("Must receive a projected objective");
- throw new RuntimeException();
- }
- obj = (ProjectedObjective) o;
- }
-
-
-
- public double[] projectPoint (double[] point){
- return ((ProjectedObjective)o).projectPoint(point);
- }
- public void updateAlpha(double alpha){
- if(alpha < 0){
- System.out.println("alpha may not be smaller that zero");
- throw new RuntimeException();
- }
-
- if(obj.auxParameters == null){
- obj.auxParameters = new double[obj.getParameters().length];
- }
-
- nrIterations++;
-
- steps.add(alpha);
- System.arraycopy(originalParameters, 0, obj.auxParameters, 0, obj.auxParameters.length);
-
- //Take a step into the search direction
-
-// MatrixOutput.printDoubleArray(obj.getGradient(), "gradient");
-
-// alpha=gradients.get(0)*alpha/(gradients.get(gradients.size()-1));
-
- //x_t+1 = x_t - alpha*gradient = x_t + alpha*direction
- MathUtils.plusEquals(obj.auxParameters, searchDirection, alpha);
-// MatrixOutput.printDoubleArray(obj.auxParameters, "before projection");
- obj.auxParameters = projectPoint(obj.auxParameters);
-// MatrixOutput.printDoubleArray(obj.auxParameters, "after projection");
- o.setParameters(obj.auxParameters);
-// System.out.println("new parameters");
-// o.printParameters();
- values.add(o.getValue());
- //Computes the new gradient x_k-[x_k-alpha*Gradient(x_k)]+
- MathUtils.minusEqualsInverse(originalParameters,obj.auxParameters,1);
-// MatrixOutput.printDoubleArray(obj.auxParameters, "new gradient");
- //Dot product between the new direction and the new gradient
- double gradient = MathUtils.dotProduct(obj.auxParameters,searchDirection);
- gradients.add(gradient);
- if(gradient > 0){
- System.out.println("Gradient on line search has to be smaller than zero");
- System.out.println("Iter: " + nrIterations);
- MatrixOutput.printDoubleArray(obj.auxParameters, "new direction");
- MatrixOutput.printDoubleArray(searchDirection, "search direction");
- throw new RuntimeException();
-
- }
-
- }
-
- /**
- *
- */
-// public void updateAlpha(double alpha){
-//
-// if(alpha < 0){
-// System.out.println("alpha may not be smaller that zero");
-// throw new RuntimeException();
-// }
-//
-// nrIterations++;
-// steps.add(alpha);
-// //x_t+1 = x_t - alpha*direction
-// System.arraycopy(originalParameters, 0, parametersChange, 0, parametersChange.length);
-//// MatrixOutput.printDoubleArray(parametersChange, "parameters before step");
-//// System.out.println("Step" + alpha);
-// MatrixOutput.printDoubleArray(originalGradient, "gradient + " + alpha);
-//
-// MathUtils.minusEquals(parametersChange, originalGradient, alpha);
-//
-// //Project the points into the feasibility set
-//// MatrixOutput.printDoubleArray(parametersChange, "before projection");
-// //x_k(alpha) = [x_k - alpha*grad f(x_k)]+
-// parametersChange = projectPoint(parametersChange);
-//// MatrixOutput.printDoubleArray(parametersChange, "after projection");
-// o.setParameters(parametersChange);
-// values.add(o.getValue());
-// //Computes the new direction x_k-[x_k-alpha*Gradient(x_k)]+
-//
-// direction=MathUtils.arrayMinus(parametersChange,originalParameters);
-//// MatrixOutput.printDoubleArray(direction, "new direction");
-//
-// double gradient = MathUtils.dotProduct(originalGradient,direction);
-// gradients.add(gradient);
-// if(gradient > 1E-10){
-// System.out.println("cosine " + gradient/(MathUtils.L2Norm(originalGradient)*MathUtils.L2Norm(direction)));
-//
-//
-// System.out.println("not a descent direction for alpha " + alpha);
-// System.arraycopy(originalParameters, 0, parametersChange, 0, parametersChange.length);
-// MathUtils.minusEquals(parametersChange, originalGradient, 1E-20);
-//
-// parametersChange = projectPoint(parametersChange);
-// direction=MathUtils.arrayMinus(parametersChange,originalParameters);
-// gradient = MathUtils.dotProduct(originalGradient,direction);
-// if(gradient > 0){
-// System.out.println("Direction is really non-descent evern for small alphas:" + gradient);
-// }
-// System.out.println("ProjecteLineSearchObjective: Should be a descent direction at " + nrIterations + ": "+ gradient);
-//// System.out.println(Printing.doubleArrayToString(originalGradient, null,"Original gradient"));
-//// System.out.println(Printing.doubleArrayToString(originalParameters, null,"Original parameters"));
-//// System.out.println(Printing.doubleArrayToString(parametersChange, null,"Projected parameters"));
-//// System.out.println(Printing.doubleArrayToString(direction, null,"Direction"));
-// throw new RuntimeException();
-// }
-// }
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfRuleLineSearch.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfRuleLineSearch.java
deleted file mode 100644
index 5489f2d0..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfRuleLineSearch.java
+++ /dev/null
@@ -1,300 +0,0 @@
-package optimization.linesearch;
-
-import java.io.PrintStream;
-import java.util.ArrayList;
-
-import optimization.util.Interpolation;
-
-
-
-
-/**
- *
- * @author javg
- *
- */
-public class WolfRuleLineSearch implements LineSearchMethod{
-
- GenericPickFirstStep pickFirstStep;
-
- double c1 = 1.0E-4;
- double c2 = 0.9;
-
- //Application dependent
- double maxStep=100;
-
- int extrapolationIteration;
- int maxExtrapolationIteration = 1000;
-
-
- double minZoomDiffTresh = 10E-10;
-
-
- ArrayList<Double> steps;
- ArrayList<Double> gradientDots;
- ArrayList<Double> functionVals;
-
- int debugLevel = 0;
- boolean foudStep = false;
-
- public WolfRuleLineSearch(GenericPickFirstStep pickFirstStep){
- this.pickFirstStep = pickFirstStep;
-
- }
-
-
-
-
- public WolfRuleLineSearch(GenericPickFirstStep pickFirstStep, double c1, double c2){
- this.pickFirstStep = pickFirstStep;
- initialStep = pickFirstStep.getFirstStep(this);
- this.c1 = c1;
- this.c2 = c2;
- }
-
- public void setDebugLevel(int level){
- debugLevel = level;
- }
-
- //Experiment
- double previousStepPicked = -1;;
- double previousInitGradientDot = -1;
- double currentInitGradientDot = -1;
-
- double initialStep;
-
-
- public void reset(){
- previousStepPicked = -1;;
- previousInitGradientDot = -1;
- currentInitGradientDot = -1;
- if(steps != null)
- steps.clear();
- if(gradientDots != null)
- gradientDots.clear();
- if(functionVals != null)
- functionVals.clear();
- }
-
- public void setInitialStep(double initial){
- initialStep = pickFirstStep.getFirstStep(this);
- }
-
-
-
- /**
- * Implements Wolf Line search as described in nocetal.
- * This process consists in two stages. The first stage we try to satisfy the
- * biggest step size that still satisfies the curvature condition. We keep increasing
- * the initial step size until we find a step satisfying the curvature condition, we return
- * success, we failed the sufficient increase so we cannot increase more and we can call zoom with
- * that maximum step, or we pass the minimum in which case we can call zoom the same way.
- *
- */
- public double getStepSize(DifferentiableLineSearchObjective objective){
- //System.out.println("entering line search");
-
- foudStep = false;
- if(debugLevel >= 1){
- steps = new ArrayList<Double>();
- gradientDots = new ArrayList<Double>();
- functionVals =new ArrayList<Double>();
- }
-
- //test
- currentInitGradientDot = objective.getInitialGradient();
-
-
- double previousValue = objective.getCurrentValue();
- double previousStep = 0;
- double currentStep =pickFirstStep.getFirstStep(this);
- for(extrapolationIteration = 0;
- extrapolationIteration < maxExtrapolationIteration; extrapolationIteration++){
-
- objective.updateAlpha(currentStep);
- double currentValue = objective.getCurrentValue();
- if(debugLevel >= 1){
- steps.add(currentStep);
- functionVals.add(currentValue);
- gradientDots.add(objective.getCurrentGradient());
- }
-
-
- //The current step does not satisfy the sufficient decrease condition anymore
- // so we cannot get bigger than that calling zoom.
- if(!WolfeConditions.suficientDecrease(objective,c1)||
- (extrapolationIteration > 0 && currentValue >= previousValue)){
- currentStep = zoom(objective,previousStep,currentStep,objective.nrIterations-1,objective.nrIterations);
- break;
- }
-
- //Satisfying both conditions ready to leave
- if(WolfeConditions.sufficientCurvature(objective,c1,c2)){
- //Found step
- foudStep = true;
- break;
- }
-
- /**
- * This means that we passed the minimum already since the dot product that should be
- * negative (descent direction) is now positive. So we cannot increase more. On the other hand
- * since we know the direction is a descent direction the value the objective at the current step
- * is for sure smaller than the preivous step so we change the order.
- */
- if(objective.getCurrentGradient() >= 0){
- currentStep = zoom(objective,currentStep,previousStep,objective.nrIterations,objective.nrIterations-1);
- break;
- }
-
-
- //Ok, so we can still get a bigger step,
- double aux = currentStep;
- //currentStep = currentStep*2;
- if(Math.abs(currentStep-maxStep)>1.1e-2){
- currentStep = (currentStep+maxStep)/2;
- }else{
- currentStep = currentStep*2;
- }
- previousStep = aux;
- previousValue = currentValue;
- //Could be done better
- if(currentStep >= maxStep){
- System.out.println("Excedded max step...calling zoom with maxStepSize");
- currentStep = zoom(objective,previousStep,currentStep,objective.nrIterations-1,objective.nrIterations);
- }
- }
- if(!foudStep){
- System.out.println("Wolfe Rule exceed number of iterations");
- if(debugLevel >= 1){
- printSmallWolfeStats(System.out);
-// System.out.println("Line search values");
-// DebugHelpers.getLineSearchGraph(o, direction, originalParameters,origValue, origGradDirectionDot,c1,c2);
- }
- return -1;
- }
- if(debugLevel >= 1){
- printSmallWolfeStats(System.out);
- }
-
- previousStepPicked = currentStep;
- previousInitGradientDot = currentInitGradientDot;
-// objective.printLineSearchSteps();
- return currentStep;
- }
-
-
-
-
-
- public void printWolfeStats(PrintStream out){
- for(int i = 0; i < steps.size(); i++){
- out.println("Step " + steps.get(i) + " value " + functionVals.get(i) + " dot " + gradientDots.get(i));
- }
- }
-
- public void printSmallWolfeStats(PrintStream out){
- for(int i = 0; i < steps.size(); i++){
- out.print(steps.get(i) + ":"+functionVals.get(i)+":"+gradientDots.get(i)+" ");
- }
- System.out.println();
- }
-
-
-
- /**
- * Pick a step satisfying the strong wolfe condition from an given from lowerStep and higherStep
- * picked on the routine above.
- *
- * Both lowerStep and higherStep have been evaluated, so we only need to pass the iteration where they have
- * been evaluated and save extra evaluations.
- *
- * We know that lowerStepValue as to be smaller than higherStepValue, and that a point
- * satisfying both conditions exists in such interval.
- *
- * LowerStep always satisfies at least the sufficient decrease
- * @return
- */
- public double zoom(DifferentiableLineSearchObjective o, double lowerStep, double higherStep,
- int lowerStepIter, int higherStepIter){
-
- if(debugLevel >=2){
- System.out.println("Entering zoom with " + lowerStep+"-"+higherStep);
- }
-
- double currentStep=-1;
-
- int zoomIter = 0;
- while(zoomIter < 1000){
- if(Math.abs(lowerStep-higherStep) < minZoomDiffTresh){
- o.updateAlpha(lowerStep);
- if(debugLevel >= 1){
- steps.add(lowerStep);
- functionVals.add(o.getCurrentValue());
- gradientDots.add(o.getCurrentGradient());
- }
- foudStep = true;
- return lowerStep;
- }
-
- //Cubic interpolation
- currentStep =
- Interpolation.cubicInterpolation(lowerStep, o.getValue(lowerStepIter), o.getGradient(lowerStepIter),
- higherStep, o.getValue(higherStepIter), o.getGradient(higherStepIter));
-
- //Safeguard.... should not be required check in what condtions it is required
- if(currentStep < 0 ){
- currentStep = (lowerStep+higherStep)/2;
- }
- if(Double.isNaN(currentStep) || Double.isInfinite(currentStep)){
- currentStep = (lowerStep+higherStep)/2;
- }
-// currentStep = (lowerStep+higherStep)/2;
-// System.out.println("Trying "+currentStep);
- o.updateAlpha(currentStep);
- if(debugLevel >=1){
- steps.add(currentStep);
- functionVals.add(o.getCurrentValue());
- gradientDots.add(o.getCurrentGradient());
- }
- if(!WolfeConditions.suficientDecrease(o,c1)
- || o.getCurrentValue() >= o.getValue(lowerStepIter)){
- higherStepIter = o.nrIterations;
- higherStep = currentStep;
- }
- //Note when entering here the new step satisfies the sufficent decrease and
- // or as a function value that is better than the previous best (lowerStepFunctionValues)
- // so we either leave or change the value of the alpha low.
- else{
- if(WolfeConditions.sufficientCurvature(o,c1,c2)){
- //Satisfies the both wolf conditions
- foudStep = true;
- break;
- }
- //If does not satisfy curvature
- if(o.getCurrentGradient()*(higherStep-lowerStep) >= 0){
- higherStep = lowerStep;
- higherStepIter = lowerStepIter;
- }
- lowerStep = currentStep;
- lowerStepIter = o.nrIterations;
- }
- zoomIter++;
- }
- return currentStep;
- }
-
- public double getInitialGradient() {
- return currentInitGradientDot;
-
- }
-
- public double getPreviousInitialGradient() {
- return previousInitGradientDot;
- }
-
- public double getPreviousStepUsed() {
- return previousStepPicked;
- }
-
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfeConditions.java b/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfeConditions.java
deleted file mode 100644
index dcc704eb..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfeConditions.java
+++ /dev/null
@@ -1,45 +0,0 @@
-package optimization.linesearch;
-
-
-public class WolfeConditions {
-
- /**
- * Sufficient Increase number. Default constant
- */
-
-
- /**
- * Value for suficient curvature:
- * 0.9 - For newton and quase netwon methods
- * 0.1 - Non linear conhugate gradient
- */
-
- int debugLevel = 0;
- public void setDebugLevel(int level){
- debugLevel = level;
- }
-
- public static boolean suficientDecrease(DifferentiableLineSearchObjective o, double c1){
- double value = o.getOriginalValue()+c1*o.getAlpha()*o.getInitialGradient();
-// System.out.println("Sufficient Decrease original "+value+" new "+ o.getCurrentValue());
- return o.getCurrentValue() <= value;
- }
-
-
-
-
- public static boolean sufficientCurvature(DifferentiableLineSearchObjective o, double c1, double c2){
-// if(debugLevel >= 2){
-// double current = Math.abs(o.getCurrentGradient());
-// double orig = -c2*o.getInitialGradient();
-// if(current <= orig){
-// return true;
-// }else{
-// System.out.println("Not satistfying curvature condition curvature " + current + " wants " + orig);
-// return false;
-// }
-// }
- return Math.abs(o.getCurrentGradient()) <= -c2*o.getInitialGradient();
- }
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/projections/BoundsProjection.java b/gi/posterior-regularisation/prjava/src/optimization/projections/BoundsProjection.java
deleted file mode 100644
index 0429d531..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/projections/BoundsProjection.java
+++ /dev/null
@@ -1,104 +0,0 @@
-package optimization.projections;
-
-
-import java.util.Random;
-
-import optimization.util.MathUtils;
-import optimization.util.MatrixOutput;
-
-/**
- * Implements a projection into a box set defined by a and b.
- * If either a or b are infinity then that bound is ignored.
- * @author javg
- *
- */
-public class BoundsProjection extends Projection{
-
- double a,b;
- boolean ignoreA = false;
- boolean ignoreB = false;
- public BoundsProjection(double lowerBound, double upperBound) {
- if(Double.isInfinite(lowerBound)){
- this.ignoreA = true;
- }else{
- this.a =lowerBound;
- }
- if(Double.isInfinite(upperBound)){
- this.ignoreB = true;
- }else{
- this.b =upperBound;
- }
- }
-
-
-
- /**
- * Projects into the bounds
- * a <= x_i <=b
- */
- public void project(double[] original){
- for (int i = 0; i < original.length; i++) {
- if(!ignoreA && original[i] < a){
- original[i] = a;
- }else if(!ignoreB && original[i]>b){
- original[i]=b;
- }
- }
- }
-
- /**
- * Generates a random number between a and b.
- */
-
- Random r = new Random();
-
- public double[] samplePoint(int numParams) {
- double[] point = new double[numParams];
- for (int i = 0; i < point.length; i++) {
- double rand = r.nextDouble();
- if(ignoreA && ignoreB){
- //Use const to avoid number near overflow
- point[i] = rand*(1.E100+1.E100)-1.E100;
- }else if(ignoreA){
- point[i] = rand*(b-1.E100)-1.E100;
- }else if(ignoreB){
- point[i] = rand*(1.E100-a)-a;
- }else{
- point[i] = rand*(b-a)-a;
- }
- }
- return point;
- }
-
- public static void main(String[] args) {
- BoundsProjection sp = new BoundsProjection(0,Double.POSITIVE_INFINITY);
-
-
- MatrixOutput.printDoubleArray(sp.samplePoint(3), "random 1");
- MatrixOutput.printDoubleArray(sp.samplePoint(3), "random 2");
- MatrixOutput.printDoubleArray(sp.samplePoint(3), "random 3");
-
- double[] d = {-1.1,1.2,1.4};
- double[] original = d.clone();
- MatrixOutput.printDoubleArray(d, "before");
-
- sp.project(d);
- MatrixOutput.printDoubleArray(d, "after");
- System.out.println("Test projection: " + sp.testProjection(original, d));
- }
-
- double epsilon = 1.E-10;
- public double[] perturbePoint(double[] point, int parameter){
- double[] newPoint = point.clone();
- if(!ignoreA && MathUtils.almost(point[parameter], a)){
- newPoint[parameter]+=epsilon;
- }else if(!ignoreB && MathUtils.almost(point[parameter], b)){
- newPoint[parameter]-=epsilon;
- }else{
- newPoint[parameter]-=epsilon;
- }
- return newPoint;
- }
-
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/projections/Projection.java b/gi/posterior-regularisation/prjava/src/optimization/projections/Projection.java
deleted file mode 100644
index b5a9f92f..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/projections/Projection.java
+++ /dev/null
@@ -1,72 +0,0 @@
-package optimization.projections;
-
-import optimization.util.MathUtils;
-import optimization.util.MatrixOutput;
-import util.ArrayMath;
-import util.Printing;
-
-
-
-public abstract class Projection {
-
-
- public abstract void project(double[] original);
-
-
- /**
- * From the projection theorem "Non-Linear Programming" page
- * 201 fact 2.
- *
- * Given some z in R, and a vector x* in X;
- * x* = z+ iif for all x in X
- * (z-x*)'(x-x*) <= 0 where 0 is when x*=x
- * See figure 2.16 in book
- *
- * @param original
- * @param projected
- * @return
- */
- public boolean testProjection(double[] original, double[] projected){
- double[] original1 = original.clone();
- //System.out.println(Printing.doubleArrayToString(original1, null, "original"));
- //System.out.println(Printing.doubleArrayToString(projected, null, "projected"));
- MathUtils.minusEquals(original1, projected, 1);
- //System.out.println(Printing.doubleArrayToString(original1, null, "minus1"));
- for(int i = 0; i < 10; i++){
- double[] x = samplePoint(original.length);
- // System.out.println(Printing.doubleArrayToString(x, null, "sample"));
- //If the same this returns zero so we are there.
- MathUtils.minusEquals(x, projected, 1);
- // System.out.println(Printing.doubleArrayToString(x, null, "minus2"));
- double dotProd = MathUtils.dotProduct(original1, x);
-
- // System.out.println("dot " + dotProd);
- if(dotProd > 0) return false;
- }
-
- //Perturbs the point a bit in all possible directions
- for(int i = 0; i < original.length; i++){
- double[] x = perturbePoint(projected,i);
- // System.out.println(Printing.doubleArrayToString(x, null, "perturbed"));
- //If the same this returns zero so we are there.
- MathUtils.minusEquals(x, projected, 1);
- // System.out.println(Printing.doubleArrayToString(x, null, "minus2"));
- double dotProd = MathUtils.dotProduct(original1, x);
-
- // System.out.println("dot " + dotProd);
- if(dotProd > 0) return false;
- }
-
-
-
- return true;
- }
-
- //Samples a point from the constrained set
- public abstract double[] samplePoint(int dimensions);
-
- //Perturbs a point a bit still leaving it at the constraints set
- public abstract double[] perturbePoint(double[] point, int parameter);
-
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/projections/SimplexProjection.java b/gi/posterior-regularisation/prjava/src/optimization/projections/SimplexProjection.java
deleted file mode 100644
index f22afcaf..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/projections/SimplexProjection.java
+++ /dev/null
@@ -1,127 +0,0 @@
-package optimization.projections;
-
-
-
-import java.util.Random;
-
-import optimization.util.MathUtils;
-import optimization.util.MatrixOutput;
-
-public class SimplexProjection extends Projection{
-
- double scale;
- public SimplexProjection(double scale) {
- this.scale = scale;
- }
-
- /**
- * projects the numbers of the array
- * into a simplex of size.
- * We follow the description of the paper
- * "Efficient Projetions onto the l1-Ball
- * for learning in high dimensions"
- */
- public void project(double[] original){
- double[] ds = new double[original.length];
- System.arraycopy(original, 0, ds, 0, ds.length);
- //If sum is smaller then zero then its ok
- for (int i = 0; i < ds.length; i++) ds[i] = ds[i]>0? ds[i]:0;
- double sum = MathUtils.sum(ds);
- if (scale - sum >= -1.E-10 ){
- System.arraycopy(ds, 0, original, 0, ds.length);
- //System.out.println("Not projecting");
- return;
- }
- //System.out.println("projecting " + sum + " scontraints " + scale);
- util.Array.sortDescending(ds);
- double currentSum = 0;
- double previousTheta = 0;
- double theta = 0;
- for (int i = 0; i < ds.length; i++) {
- currentSum+=ds[i];
- theta = (currentSum-scale)/(i+1);
- if(ds[i]-theta < -1e-10){
- break;
- }
- previousTheta = theta;
- }
- //DEBUG
- if(previousTheta < 0){
- System.out.println("Simple Projection: Theta is smaller than zero: " + previousTheta);
- System.exit(-1);
- }
- for (int i = 0; i < original.length; i++) {
- original[i] = Math.max(original[i]-previousTheta, 0);
- }
- }
-
-
-
-
-
-
- /**
- * Samples a point from the simplex of scale. Just sample
- * random number from 0-scale and then if
- * their sum is bigger then sum make them normalize.
- * This is probably not sampling uniformly from the simplex but it is
- * enough for our goals in here.
- */
- Random r = new Random();
- public double[] samplePoint(int dimensions) {
- double[] newPoint = new double[dimensions];
- double sum =0;
- for (int i = 0; i < newPoint.length; i++) {
- double rand = r.nextDouble()*scale;
- sum+=rand;
- newPoint[i]=rand;
- }
- //Normalize
- if(sum > scale){
- for (int i = 0; i < newPoint.length; i++) {
- newPoint[i]=scale*newPoint[i]/sum;
- }
- }
- return newPoint;
- }
-
- public static void main(String[] args) {
- SimplexProjection sp = new SimplexProjection(1);
-
-
- double[] point = sp.samplePoint(3);
- MatrixOutput.printDoubleArray(point , "random 1 sum:" + MathUtils.sum(point));
- point = sp.samplePoint(3);
- MatrixOutput.printDoubleArray(point , "random 2 sum:" + MathUtils.sum(point));
- point = sp.samplePoint(3);
- MatrixOutput.printDoubleArray(point , "random 3 sum:" + MathUtils.sum(point));
-
- double[] d = {0,1.1,-10};
- double[] original = d.clone();
- MatrixOutput.printDoubleArray(d, "before");
-
- sp.project(d);
- MatrixOutput.printDoubleArray(d, "after");
- System.out.println("Test projection: " + sp.testProjection(original, d));
-
- }
-
-
- double epsilon = 1.E-10;
- public double[] perturbePoint(double[] point, int parameter){
- double[] newPoint = point.clone();
- if(MathUtils.almost(MathUtils.sum(point), scale)){
- newPoint[parameter]-=epsilon;
- }
- else if(point[parameter]==0){
- newPoint[parameter]+=epsilon;
- }else if(MathUtils.almost(point[parameter], scale)){
- newPoint[parameter]-=epsilon;
- }
- else{
- newPoint[parameter]-=epsilon;
- }
- return newPoint;
- }
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/CompositeStopingCriteria.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/CompositeStopingCriteria.java
deleted file mode 100644
index 15760f18..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/CompositeStopingCriteria.java
+++ /dev/null
@@ -1,33 +0,0 @@
-package optimization.stopCriteria;
-
-import java.util.ArrayList;
-
-import optimization.gradientBasedMethods.Objective;
-
-public class CompositeStopingCriteria implements StopingCriteria {
-
- ArrayList<StopingCriteria> criterias;
-
- public CompositeStopingCriteria() {
- criterias = new ArrayList<StopingCriteria>();
- }
-
- public void add(StopingCriteria criteria){
- criterias.add(criteria);
- }
-
- public boolean stopOptimization(Objective obj){
- for(StopingCriteria criteria: criterias){
- if(criteria.stopOptimization(obj)){
- return true;
- }
- }
- return false;
- }
-
- public void reset(){
- for(StopingCriteria criteria: criterias){
- criteria.reset();
- }
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/GradientL2Norm.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/GradientL2Norm.java
deleted file mode 100644
index 534ff833..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/GradientL2Norm.java
+++ /dev/null
@@ -1,30 +0,0 @@
-package optimization.stopCriteria;
-
-import optimization.gradientBasedMethods.Objective;
-import optimization.util.MathUtils;
-
-public class GradientL2Norm implements StopingCriteria{
-
- /**
- * Stop if gradientNorm/(originalGradientNorm) smaller
- * than gradientConvergenceValue
- */
- protected double gradientConvergenceValue;
-
-
- public GradientL2Norm(double gradientConvergenceValue){
- this.gradientConvergenceValue = gradientConvergenceValue;
- }
-
- public void reset(){}
-
- public boolean stopOptimization(Objective obj){
- double norm = MathUtils.L2Norm(obj.gradient);
- if(norm < gradientConvergenceValue){
- System.out.println("Gradient norm below treshold");
- return true;
- }
- return false;
-
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedGradientL2Norm.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedGradientL2Norm.java
deleted file mode 100644
index 4a489641..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedGradientL2Norm.java
+++ /dev/null
@@ -1,48 +0,0 @@
-package optimization.stopCriteria;
-
-import optimization.gradientBasedMethods.Objective;
-import optimization.gradientBasedMethods.ProjectedObjective;
-import optimization.util.MathUtils;
-
-/**
- * Divides the norm by the norm at the begining of the iteration
- * @author javg
- *
- */
-public class NormalizedGradientL2Norm extends GradientL2Norm{
-
- /**
- * Stop if gradientNorm/(originalGradientNorm) smaller
- * than gradientConvergenceValue
- */
- protected double originalGradientNorm = -1;
-
- public void reset(){
- originalGradientNorm = -1;
- }
- public NormalizedGradientL2Norm(double gradientConvergenceValue){
- super(gradientConvergenceValue);
- }
-
-
-
-
- public boolean stopOptimization(Objective obj){
- double norm = MathUtils.L2Norm(obj.gradient);
- if(originalGradientNorm == -1){
- originalGradientNorm = norm;
- }
- if(originalGradientNorm < 1E-10){
- System.out.println("Gradient norm is zero " + originalGradientNorm);
- return true;
- }
- double normalizedNorm = 1.0*norm/originalGradientNorm;
- if( normalizedNorm < gradientConvergenceValue){
- System.out.println("Gradient norm below normalized normtreshold: " + norm + " original: " + originalGradientNorm + " normalized norm: " + normalizedNorm);
- return true;
- }else{
-// System.out.println("projected gradient norm: " + norm);
- return false;
- }
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedProjectedGradientL2Norm.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedProjectedGradientL2Norm.java
deleted file mode 100644
index 5ae554c2..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedProjectedGradientL2Norm.java
+++ /dev/null
@@ -1,60 +0,0 @@
-package optimization.stopCriteria;
-
-import optimization.gradientBasedMethods.Objective;
-import optimization.gradientBasedMethods.ProjectedObjective;
-import optimization.util.MathUtils;
-
-/**
- * Divides the norm by the norm at the begining of the iteration
- * @author javg
- *
- */
-public class NormalizedProjectedGradientL2Norm extends ProjectedGradientL2Norm{
-
- /**
- * Stop if gradientNorm/(originalGradientNorm) smaller
- * than gradientConvergenceValue
- */
- double originalProjectedNorm = -1;
-
- public NormalizedProjectedGradientL2Norm(double gradientConvergenceValue){
- super(gradientConvergenceValue);
- }
-
- public void reset(){
- originalProjectedNorm = -1;
- }
-
-
- double[] projectGradient(ProjectedObjective obj){
-
- if(obj.auxParameters == null){
- obj.auxParameters = new double[obj.getNumParameters()];
- }
- System.arraycopy(obj.getParameters(), 0, obj.auxParameters, 0, obj.getNumParameters());
- MathUtils.minusEquals(obj.auxParameters, obj.gradient, 1);
- obj.auxParameters = obj.projectPoint(obj.auxParameters);
- MathUtils.minusEquals(obj.auxParameters,obj.getParameters(),1);
- return obj.auxParameters;
- }
-
- public boolean stopOptimization(Objective obj){
- if(obj instanceof ProjectedObjective) {
- ProjectedObjective o = (ProjectedObjective) obj;
- double norm = MathUtils.L2Norm(projectGradient(o));
- if(originalProjectedNorm == -1){
- originalProjectedNorm = norm;
- }
- double normalizedNorm = 1.0*norm/originalProjectedNorm;
- if( normalizedNorm < gradientConvergenceValue){
- System.out.println("Gradient norm below normalized normtreshold: " + norm + " original: " + originalProjectedNorm + " normalized norm: " + normalizedNorm);
- return true;
- }else{
-// System.out.println("projected gradient norm: " + norm);
- return false;
- }
- }
- System.out.println("Not a projected objective");
- throw new RuntimeException();
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedValueDifference.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedValueDifference.java
deleted file mode 100644
index 6dbbc50d..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedValueDifference.java
+++ /dev/null
@@ -1,54 +0,0 @@
-package optimization.stopCriteria;
-
-import optimization.gradientBasedMethods.Objective;
-import optimization.util.MathUtils;
-
-public class NormalizedValueDifference implements StopingCriteria{
-
- /**
- * Stop if the different between values is smaller than a treshold
- */
- protected double valueConvergenceValue=0.01;
- protected double previousValue = Double.NaN;
- protected double currentValue = Double.NaN;
-
- public NormalizedValueDifference(double valueConvergenceValue){
- this.valueConvergenceValue = valueConvergenceValue;
- }
-
- public void reset(){
- previousValue = Double.NaN;
- currentValue = Double.NaN;
- }
-
-
- public boolean stopOptimization(Objective obj){
- if(Double.isNaN(currentValue)){
- currentValue = obj.getValue();
- return false;
- }else {
- previousValue = currentValue;
- currentValue = obj.getValue();
- if(previousValue != 0){
- double valueDiff = Math.abs(previousValue - currentValue)/Math.abs(previousValue);
- if( valueDiff < valueConvergenceValue){
- System.out.println("Leaving different in values is to small: Prev "
- + (previousValue/previousValue) + " Curr: " + (currentValue/previousValue)
- + " diff: " + valueDiff);
- return true;
- }
- }else{
- double valueDiff = Math.abs(previousValue - currentValue);
- if( valueDiff < valueConvergenceValue){
- System.out.println("Leaving different in values is to small: Prev "
- + (previousValue) + " Curr: " + (currentValue)
- + " diff: " + valueDiff);
- return true;
- }
- }
-
- return false;
- }
-
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ProjectedGradientL2Norm.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ProjectedGradientL2Norm.java
deleted file mode 100644
index aadf1fd5..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ProjectedGradientL2Norm.java
+++ /dev/null
@@ -1,51 +0,0 @@
-package optimization.stopCriteria;
-
-import optimization.gradientBasedMethods.Objective;
-import optimization.gradientBasedMethods.ProjectedObjective;
-import optimization.util.MathUtils;
-
-public class ProjectedGradientL2Norm implements StopingCriteria{
-
- /**
- * Stop if gradientNorm/(originalGradientNorm) smaller
- * than gradientConvergenceValue
- */
- protected double gradientConvergenceValue;
-
-
- public ProjectedGradientL2Norm(double gradientConvergenceValue){
- this.gradientConvergenceValue = gradientConvergenceValue;
- }
-
- public void reset(){
-
- }
-
- double[] projectGradient(ProjectedObjective obj){
-
- if(obj.auxParameters == null){
- obj.auxParameters = new double[obj.getNumParameters()];
- }
- System.arraycopy(obj.getParameters(), 0, obj.auxParameters, 0, obj.getNumParameters());
- MathUtils.minusEquals(obj.auxParameters, obj.gradient, 1);
- obj.auxParameters = obj.projectPoint(obj.auxParameters);
- MathUtils.minusEquals(obj.auxParameters,obj.getParameters(),1);
- return obj.auxParameters;
- }
-
- public boolean stopOptimization(Objective obj){
- if(obj instanceof ProjectedObjective) {
- ProjectedObjective o = (ProjectedObjective) obj;
- double norm = MathUtils.L2Norm(projectGradient(o));
- if(norm < gradientConvergenceValue){
- // System.out.println("Gradient norm below treshold: " + norm);
- return true;
- }else{
-// System.out.println("projected gradient norm: " + norm);
- return false;
- }
- }
- System.out.println("Not a projected objective");
- throw new RuntimeException();
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/StopingCriteria.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/StopingCriteria.java
deleted file mode 100644
index 10cf0522..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/StopingCriteria.java
+++ /dev/null
@@ -1,8 +0,0 @@
-package optimization.stopCriteria;
-
-import optimization.gradientBasedMethods.Objective;
-
-public interface StopingCriteria {
- public boolean stopOptimization(Objective obj);
- public void reset();
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ValueDifference.java b/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ValueDifference.java
deleted file mode 100644
index e5d07229..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ValueDifference.java
+++ /dev/null
@@ -1,41 +0,0 @@
-package optimization.stopCriteria;
-
-import optimization.gradientBasedMethods.Objective;
-import optimization.util.MathUtils;
-
-public class ValueDifference implements StopingCriteria{
-
- /**
- * Stop if the different between values is smaller than a treshold
- */
- protected double valueConvergenceValue=0.01;
- protected double previousValue = Double.NaN;
- protected double currentValue = Double.NaN;
-
- public ValueDifference(double valueConvergenceValue){
- this.valueConvergenceValue = valueConvergenceValue;
- }
-
- public void reset(){
- previousValue = Double.NaN;
- currentValue = Double.NaN;
- }
-
- public boolean stopOptimization(Objective obj){
- if(Double.isNaN(currentValue)){
- currentValue = obj.getValue();
- return false;
- }else {
- previousValue = currentValue;
- currentValue = obj.getValue();
- if(previousValue - currentValue < valueConvergenceValue){
-// System.out.println("Leaving different in values is to small: Prev "
-// + previousValue + " Curr: " + currentValue
-// + " diff: " + (previousValue - currentValue));
- return true;
- }
- return false;
- }
-
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/util/Interpolation.java b/gi/posterior-regularisation/prjava/src/optimization/util/Interpolation.java
deleted file mode 100644
index cdbdefc6..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/util/Interpolation.java
+++ /dev/null
@@ -1,37 +0,0 @@
-package optimization.util;
-
-public class Interpolation {
-
- /**
- * Fits a cubic polinomyal to a function given two points,
- * such that either gradB is bigger than zero or funcB >= funcA
- *
- * NonLinear Programming appendix C
- * @param funcA
- * @param gradA
- * @param funcB
- * @param gradB
- */
- public final static double cubicInterpolation(double a,
- double funcA, double gradA, double b,double funcB, double gradB ){
- if(gradB < 0 && funcA > funcB){
- System.out.println("Cannot call cubic interpolation");
- return -1;
- }
-
- double z = 3*(funcA-funcB)/(b-a) + gradA + gradB;
- double w = Math.sqrt(z*z - gradA*gradB);
- double min = b -(gradB+w-z)*(b-a)/(gradB-gradA+2*w);
- return min;
- }
-
- public final static double quadraticInterpolation(double initFValue,
- double initGrad, double point,double pointFValue){
- double min = -1*initGrad*point*point/(2*(pointFValue-initGrad*point-initFValue));
- return min;
- }
-
- public static void main(String[] args) {
-
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/util/Logger.java b/gi/posterior-regularisation/prjava/src/optimization/util/Logger.java
deleted file mode 100644
index 5343a39b..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/util/Logger.java
+++ /dev/null
@@ -1,7 +0,0 @@
-package optimization.util;
-
-public class Logger {
-
-
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/util/MathUtils.java b/gi/posterior-regularisation/prjava/src/optimization/util/MathUtils.java
deleted file mode 100644
index af66f82c..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/util/MathUtils.java
+++ /dev/null
@@ -1,339 +0,0 @@
-package optimization.util;
-
-import java.util.Arrays;
-
-
-
-public class MathUtils {
-
- /**
- *
- * @param vector
- * @return
- */
- public static double L2Norm(double[] vector){
- double value = 0;
- for(int i = 0; i < vector.length; i++){
- double v = vector[i];
- value+=v*v;
- }
- return Math.sqrt(value);
- }
-
- public static double sum(double[] v){
- double sum = 0;
- for (int i = 0; i < v.length; i++) {
- sum+=v[i];
- }
- return sum;
- }
-
-
-
-
- /**
- * w = w + v
- * @param w
- * @param v
- */
- public static void plusEquals(double[] w, double[] v) {
- for(int i=0; i<w.length;i++){
- w[i] += w[i] + v[i];
- }
- }
-
- /**
- * w[i] = w[i] + v
- * @param w
- * @param v
- */
- public static void plusEquals(double[] w, double v) {
- for(int i=0; i<w.length;i++){
- w[i] += w[i] + v;
- }
- }
-
- /**
- * w[i] = w[i] - v
- * @param w
- * @param v
- */
- public static void minusEquals(double[] w, double v) {
- for(int i=0; i<w.length;i++){
- w[i] -= w[i] + v;
- }
- }
-
- /**
- * w = w + a*v
- * @param w
- * @param v
- * @param a
- */
- public static void plusEquals(double[] w, double[] v, double a) {
- for(int i=0; i<w.length;i++){
- w[i] += a*v[i];
- }
- }
-
- /**
- * w = w - a*v
- * @param w
- * @param v
- * @param a
- */
- public static void minusEquals(double[] w, double[] v, double a) {
- for(int i=0; i<w.length;i++){
- w[i] -= a*v[i];
- }
- }
- /**
- * v = w - a*v
- * @param w
- * @param v
- * @param a
- */
- public static void minusEqualsInverse(double[] w, double[] v, double a) {
- for(int i=0; i<w.length;i++){
- v[i] = w[i] - a*v[i];
- }
- }
-
- public static double dotProduct(double[] w, double[] v){
- double accum = 0;
- for(int i=0; i<w.length;i++){
- accum += w[i]*v[i];
- }
- return accum;
- }
-
- public static double[] arrayMinus(double[]w, double[]v){
- double result[] = w.clone();
- for(int i=0; i<w.length;i++){
- result[i] -= v[i];
- }
- return result;
- }
-
- public static double[] arrayMinus(double[] result , double[]w, double[]v){
- for(int i=0; i<w.length;i++){
- result[i] = w[i]-v[i];
- }
- return result;
- }
-
- public static double[] negation(double[]w){
- double result[] = new double[w.length];
- for(int i=0; i<w.length;i++){
- result[i] = -w[i];
- }
- return result;
- }
-
- public static double square(double value){
- return value*value;
- }
- public static double[][] outerProduct(double[] w, double[] v){
- double[][] result = new double[w.length][v.length];
- for(int i = 0; i < w.length; i++){
- for(int j = 0; j < v.length; j++){
- result[i][j] = w[i]*v[j];
- }
- }
- return result;
- }
- /**
- * results = a*W*V
- * @param w
- * @param v
- * @param a
- * @return
- */
- public static double[][] weightedouterProduct(double[] w, double[] v, double a){
- double[][] result = new double[w.length][v.length];
- for(int i = 0; i < w.length; i++){
- for(int j = 0; j < v.length; j++){
- result[i][j] = a*w[i]*v[j];
- }
- }
- return result;
- }
-
- public static double[][] identity(int size){
- double[][] result = new double[size][size];
- for(int i = 0; i < size; i++){
- result[i][i] = 1;
- }
- return result;
- }
-
- /**
- * v -= w
- * @param v
- * @param w
- */
- public static void minusEquals(double[][] w, double[][] v){
- for(int i = 0; i < w.length; i++){
- for(int j = 0; j < w[0].length; j++){
- w[i][j] -= v[i][j];
- }
- }
- }
-
- /**
- * v[i][j] -= a*w[i][j]
- * @param v
- * @param w
- */
- public static void minusEquals(double[][] w, double[][] v, double a){
- for(int i = 0; i < w.length; i++){
- for(int j = 0; j < w[0].length; j++){
- w[i][j] -= a*v[i][j];
- }
- }
- }
-
- /**
- * v += w
- * @param v
- * @param w
- */
- public static void plusEquals(double[][] w, double[][] v){
- for(int i = 0; i < w.length; i++){
- for(int j = 0; j < w[0].length; j++){
- w[i][j] += v[i][j];
- }
- }
- }
-
- /**
- * v[i][j] += a*w[i][j]
- * @param v
- * @param w
- */
- public static void plusEquals(double[][] w, double[][] v, double a){
- for(int i = 0; i < w.length; i++){
- for(int j = 0; j < w[0].length; j++){
- w[i][j] += a*v[i][j];
- }
- }
- }
-
-
- /**
- * results = w*v
- * @param w
- * @param v
- * @return
- */
- public static double[][] matrixMultiplication(double[][] w,double[][] v){
- int w1 = w.length;
- int w2 = w[0].length;
- int v1 = v.length;
- int v2 = v[0].length;
-
- if(w2 != v1){
- System.out.println("Matrix dimensions do not agree...");
- System.exit(-1);
- }
-
- double[][] result = new double[w1][v2];
- for(int w_i1 = 0; w_i1 < w1; w_i1++){
- for(int v_i2 = 0; v_i2 < v2; v_i2++){
- double sum = 0;
- for(int w_i2 = 0; w_i2 < w2; w_i2++){
- sum += w[w_i1 ][w_i2]*v[w_i2][v_i2];
- }
- result[w_i1][v_i2] = sum;
- }
- }
- return result;
- }
-
- /**
- * w = w.*v
- * @param w
- * @param v
- */
- public static void matrixScalarMultiplication(double[][] w,double v){
- int w1 = w.length;
- int w2 = w[0].length;
- for(int w_i1 = 0; w_i1 < w1; w_i1++){
- for(int w_i2 = 0; w_i2 < w2; w_i2++){
- w[w_i1 ][w_i2] *= v;
- }
- }
- }
-
- public static void scalarMultiplication(double[] w,double v){
- int w1 = w.length;
- for(int w_i1 = 0; w_i1 < w1; w_i1++){
- w[w_i1 ] *= v;
- }
-
- }
-
- public static double[] matrixVector(double[][] w,double[] v){
- int w1 = w.length;
- int w2 = w[0].length;
- int v1 = v.length;
-
- if(w2 != v1){
- System.out.println("Matrix dimensions do not agree...");
- System.exit(-1);
- }
-
- double[] result = new double[w1];
- for(int w_i1 = 0; w_i1 < w1; w_i1++){
- double sum = 0;
- for(int w_i2 = 0; w_i2 < w2; w_i2++){
- sum += w[w_i1 ][w_i2]*v[w_i2];
- }
- result[w_i1] = sum;
- }
- return result;
- }
-
- public static boolean allPositive(double[] array){
- for (int i = 0; i < array.length; i++) {
- if(array[i] < 0) return false;
- }
- return true;
- }
-
-
-
-
-
- public static void main(String[] args) {
- double[][] m1 = new double[2][2];
- m1[0][0]=2;
- m1[1][0]=2;
- m1[0][1]=2;
- m1[1][1]=2;
- MatrixOutput.printDoubleArray(m1, "m1");
- double[][] m2 = new double[2][2];
- m2[0][0]=3;
- m2[1][0]=3;
- m2[0][1]=3;
- m2[1][1]=3;
- MatrixOutput.printDoubleArray(m2, "m2");
- double[][] result = matrixMultiplication(m1, m2);
- MatrixOutput.printDoubleArray(result, "result");
- matrixScalarMultiplication(result, 3);
- MatrixOutput.printDoubleArray(result, "result after multiply by 3");
- }
-
- public static boolean almost(double a, double b, double prec){
- return Math.abs(a-b)/Math.abs(a+b) <= prec || (almostZero(a) && almostZero(b));
- }
-
- public static boolean almost(double a, double b){
- return Math.abs(a-b)/Math.abs(a+b) <= 1e-10 || (almostZero(a) && almostZero(b));
- }
-
- public static boolean almostZero(double a) {
- return Math.abs(a) <= 1e-30;
- }
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/util/MatrixOutput.java b/gi/posterior-regularisation/prjava/src/optimization/util/MatrixOutput.java
deleted file mode 100644
index 9fbdf955..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/util/MatrixOutput.java
+++ /dev/null
@@ -1,28 +0,0 @@
-package optimization.util;
-
-
-public class MatrixOutput {
- public static void printDoubleArray(double[][] array, String arrayName) {
- int size1 = array.length;
- int size2 = array[0].length;
- System.out.println(arrayName);
- for (int i = 0; i < size1; i++) {
- for (int j = 0; j < size2; j++) {
- System.out.print(" " + StaticTools.prettyPrint(array[i][j],
- "00.00E00", 4) + " ");
-
- }
- System.out.println();
- }
- System.out.println();
- }
-
- public static void printDoubleArray(double[] array, String arrayName) {
- System.out.println(arrayName);
- for (int i = 0; i < array.length; i++) {
- System.out.print(" " + StaticTools.prettyPrint(array[i],
- "00.00E00", 4) + " ");
- }
- System.out.println();
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/optimization/util/StaticTools.java b/gi/posterior-regularisation/prjava/src/optimization/util/StaticTools.java
deleted file mode 100644
index bcabee06..00000000
--- a/gi/posterior-regularisation/prjava/src/optimization/util/StaticTools.java
+++ /dev/null
@@ -1,180 +0,0 @@
-package optimization.util;
-
-
-import java.io.File;
-import java.io.PrintStream;
-
-public class StaticTools {
-
- static java.text.DecimalFormat fmt = new java.text.DecimalFormat();
-
- public static void createDir(String directory) {
-
- File dir = new File(directory);
- if (!dir.isDirectory()) {
- boolean success = dir.mkdirs();
- if (!success) {
- System.out.println("Unable to create directory " + directory);
- System.exit(0);
- }
- System.out.println("Created directory " + directory);
- } else {
- System.out.println("Reusing directory " + directory);
- }
- }
-
- /*
- * q and p are indexed by source/foreign Sum_S(q) = 1 the same for p KL(q,p) =
- * Eq*q/p
- */
- public static double KLDistance(double[][] p, double[][] q, int sourceSize,
- int foreignSize) {
- double totalKL = 0;
- // common.StaticTools.printMatrix(q, sourceSize, foreignSize, "q",
- // System.out);
- // common.StaticTools.printMatrix(p, sourceSize, foreignSize, "p",
- // System.out);
- for (int i = 0; i < sourceSize; i++) {
- double kl = 0;
- for (int j = 0; j < foreignSize; j++) {
- assert !Double.isNaN(q[i][j]) : "KLDistance q: prob is NaN";
- assert !Double.isNaN(p[i][j]) : "KLDistance p: prob is NaN";
- if (p[i][j] == 0 || q[i][j] == 0) {
- continue;
- } else {
- kl += q[i][j] * Math.log(q[i][j] / p[i][j]);
- }
-
- }
- totalKL += kl;
- }
- assert !Double.isNaN(totalKL) : "KLDistance: prob is NaN";
- if (totalKL < -1.0E-10) {
- System.out.println("KL Smaller than zero " + totalKL);
- System.out.println("Source Size" + sourceSize);
- System.out.println("Foreign Size" + foreignSize);
- StaticTools.printMatrix(q, sourceSize, foreignSize, "q",
- System.out);
- StaticTools.printMatrix(p, sourceSize, foreignSize, "p",
- System.out);
- System.exit(-1);
- }
- return totalKL / sourceSize;
- }
-
- /*
- * indexed the by [fi][si]
- */
- public static double KLDistancePrime(double[][] p, double[][] q,
- int sourceSize, int foreignSize) {
- double totalKL = 0;
- for (int i = 0; i < sourceSize; i++) {
- double kl = 0;
- for (int j = 0; j < foreignSize; j++) {
- assert !Double.isNaN(q[j][i]) : "KLDistance q: prob is NaN";
- assert !Double.isNaN(p[j][i]) : "KLDistance p: prob is NaN";
- if (p[j][i] == 0 || q[j][i] == 0) {
- continue;
- } else {
- kl += q[j][i] * Math.log(q[j][i] / p[j][i]);
- }
-
- }
- totalKL += kl;
- }
- assert !Double.isNaN(totalKL) : "KLDistance: prob is NaN";
- return totalKL / sourceSize;
- }
-
- public static double Entropy(double[][] p, int sourceSize, int foreignSize) {
- double totalE = 0;
- for (int i = 0; i < foreignSize; i++) {
- double e = 0;
- for (int j = 0; j < sourceSize; j++) {
- e += p[i][j] * Math.log(p[i][j]);
- }
- totalE += e;
- }
- return totalE / sourceSize;
- }
-
- public static double[][] copyMatrix(double[][] original, int sourceSize,
- int foreignSize) {
- double[][] result = new double[sourceSize][foreignSize];
- for (int i = 0; i < sourceSize; i++) {
- for (int j = 0; j < foreignSize; j++) {
- result[i][j] = original[i][j];
- }
- }
- return result;
- }
-
- public static void printMatrix(double[][] matrix, int sourceSize,
- int foreignSize, String info, PrintStream out) {
-
- java.text.DecimalFormat fmt = new java.text.DecimalFormat();
- fmt.setMaximumFractionDigits(3);
- fmt.setMaximumIntegerDigits(3);
- fmt.setMinimumFractionDigits(3);
- fmt.setMinimumIntegerDigits(3);
-
- out.println(info);
-
- for (int i = 0; i < foreignSize; i++) {
- for (int j = 0; j < sourceSize; j++) {
- out.print(prettyPrint(matrix[j][i], ".00E00", 6) + " ");
- }
- out.println();
- }
- out.println();
- out.println();
- }
-
- public static void printMatrix(int[][] matrix, int sourceSize,
- int foreignSize, String info, PrintStream out) {
-
- out.println(info);
- for (int i = 0; i < foreignSize; i++) {
- for (int j = 0; j < sourceSize; j++) {
- out.print(matrix[j][i] + " ");
- }
- out.println();
- }
- out.println();
- out.println();
- }
-
- public static String formatTime(long duration) {
- StringBuilder sb = new StringBuilder();
- double d = duration / 1000;
- fmt.applyPattern("00");
- sb.append(fmt.format((int) (d / (60 * 60))) + ":");
- d -= ((int) d / (60 * 60)) * 60 * 60;
- sb.append(fmt.format((int) (d / 60)) + ":");
- d -= ((int) d / 60) * 60;
- fmt.applyPattern("00.0");
- sb.append(fmt.format(d));
- return sb.toString();
- }
-
- public static String prettyPrint(double d, String patt, int len) {
- fmt.applyPattern(patt);
- String s = fmt.format(d);
- while (s.length() < len) {
- s = " " + s;
- }
- return s;
- }
-
-
- public static long getUsedMemory(){
- System.gc();
- return (Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory())/ (1024 * 1024);
- }
-
- public final static boolean compareDoubles(double d1, double d2){
- return Math.abs(d1-d2) <= 1.E-10;
- }
-
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/phrase/Agree.java b/gi/posterior-regularisation/prjava/src/phrase/Agree.java
deleted file mode 100644
index 8f7b499e..00000000
--- a/gi/posterior-regularisation/prjava/src/phrase/Agree.java
+++ /dev/null
@@ -1,204 +0,0 @@
-package phrase;
-
-import gnu.trove.TIntArrayList;
-
-import io.FileUtil;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.List;
-
-import phrase.Corpus.Edge;
-
-public class Agree {
- PhraseCluster model1;
- C2F model2;
- Corpus c;
- private int K,n_phrases, n_words, n_contexts, n_positions1,n_positions2;
-
- /**@brief sum of loglikelihood of two
- * individual models
- */
- public double llh;
- /**@brief Bhattacharyya distance
- *
- */
- public double bdist;
- /**
- *
- * @param numCluster
- * @param corpus
- */
- public Agree(int numCluster, Corpus corpus){
-
- model1=new PhraseCluster(numCluster, corpus);
- model2=new C2F(numCluster,corpus);
- c=corpus;
- n_words=c.getNumWords();
- n_phrases=c.getNumPhrases();
- n_contexts=c.getNumContexts();
- n_positions1=c.getNumContextPositions();
- n_positions2=2;
- K=numCluster;
-
- }
-
- /**@brief test
- *
- */
- public static void main(String args[]){
- //String in="../pdata/canned.con";
- String in="../pdata/btec.con";
- String out="../pdata/posterior.out";
- int numCluster=25;
- Corpus corpus = null;
- File infile = new File(in);
- try {
- System.out.println("Reading concordance from " + infile);
- corpus = Corpus.readFromFile(FileUtil.reader(infile));
- corpus.printStats(System.out);
- } catch (IOException e) {
- System.err.println("Failed to open input file: " + infile);
- e.printStackTrace();
- System.exit(1);
- }
-
- Agree agree=new Agree(numCluster, corpus);
- int iter=20;
- for(int i=0;i<iter;i++){
- agree.EM();
- System.out.println("Iter"+i+", llh: "+agree.llh+
- ", divergence:"+agree.bdist+
- " sum: "+(agree.llh+agree.bdist));
- }
-
- File outfile = new File (out);
- try {
- PrintStream ps = FileUtil.printstream(outfile);
- agree.displayPosterior(ps);
- // ps.println();
- // c2f.displayModelParam(ps);
- ps.close();
- } catch (IOException e) {
- System.err.println("Failed to open output file: " + outfile);
- e.printStackTrace();
- System.exit(1);
- }
-
- }
-
- public double EM(){
-
- double [][][]exp_emit1=new double [K][n_positions1][n_words];
- double [][]exp_pi1=new double[n_phrases][K];
-
- double [][][]exp_emit2=new double [K][n_positions2][n_words];
- double [][]exp_pi2=new double[n_contexts][K];
-
- llh=0;
- bdist=0;
- //E
- for(int context=0; context< n_contexts; context++){
-
- List<Edge> contexts = c.getEdgesForContext(context);
-
- for (int ctx=0; ctx<contexts.size(); ctx++){
- Edge edge = contexts.get(ctx);
- int phrase=edge.getPhraseId();
- double p[]=posterior(edge);
- double z = arr.F.l1norm(p);
- assert z > 0;
- bdist += edge.getCount() * Math.log(z);
- arr.F.l1normalize(p);
-
- double count = edge.getCount();
- //increment expected count
- TIntArrayList phraseToks = edge.getPhrase();
- TIntArrayList contextToks = edge.getContext();
- for(int tag=0;tag<K;tag++){
-
- for(int position=0;position<n_positions1;position++){
- exp_emit1[tag][position][contextToks.get(position)]+=p[tag]*count;
- }
-
- exp_emit2[tag][0][phraseToks.get(0)]+=p[tag]*count;
- exp_emit2[tag][1][phraseToks.get(phraseToks.size()-1)]+=p[tag]*count;
-
- exp_pi1[phrase][tag]+=p[tag]*count;
- exp_pi2[context][tag]+=p[tag]*count;
- }
- }
- }
-
- //System.out.println("Log likelihood: "+loglikelihood);
-
- //M
- for(double [][]i:exp_emit1){
- for(double []j:i){
- arr.F.l1normalize(j);
- }
- }
-
- for(double []j:exp_pi1){
- arr.F.l1normalize(j);
- }
-
- for(double [][]i:exp_emit2){
- for(double []j:i){
- arr.F.l1normalize(j);
- }
- }
-
- for(double []j:exp_pi2){
- arr.F.l1normalize(j);
- }
-
- model1.emit=exp_emit1;
- model1.pi=exp_pi1;
- model2.emit=exp_emit2;
- model2.pi=exp_pi2;
-
- return llh;
- }
-
- public double[] posterior(Corpus.Edge edge)
- {
- double[] prob1=model1.posterior(edge);
- double[] prob2=model2.posterior(edge);
-
- llh+=edge.getCount()*Math.log(arr.F.l1norm(prob1));
- llh+=edge.getCount()*Math.log(arr.F.l1norm(prob2));
- arr.F.l1normalize(prob1);
- arr.F.l1normalize(prob2);
-
- for(int i=0;i<prob1.length;i++){
- prob1[i]*=prob2[i];
- prob1[i]=Math.sqrt(prob1[i]);
- }
-
- return prob1;
- }
-
- public void displayPosterior(PrintStream ps)
- {
- displayPosterior(ps, c.getEdges());
- }
-
- public void displayPosterior(PrintStream ps, List<Edge> test)
- {
- for (Edge edge : test)
- {
- double probs[] = posterior(edge);
- arr.F.l1normalize(probs);
-
- // emit phrase
- ps.print(edge.getPhraseString());
- ps.print("\t");
- ps.print(edge.getContextString(true));
- int t=arr.F.argmax(probs);
- ps.println(" ||| C=" + t);
- }
- }
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/phrase/Agree2Sides.java b/gi/posterior-regularisation/prjava/src/phrase/Agree2Sides.java
deleted file mode 100644
index 031f887f..00000000
--- a/gi/posterior-regularisation/prjava/src/phrase/Agree2Sides.java
+++ /dev/null
@@ -1,197 +0,0 @@
-package phrase;
-
-import gnu.trove.TIntArrayList;
-
-import io.FileUtil;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.List;
-
-import phrase.Corpus.Edge;
-
-public class Agree2Sides {
- PhraseCluster model1,model2;
- Corpus c1,c2;
- private int K;
-
- /**@brief sum of loglikelihood of two
- * individual models
- */
- public double llh;
- /**@brief Bhattacharyya distance
- *
- */
- public double bdist;
- /**
- *
- * @param numCluster
- * @param corpus
- */
- public Agree2Sides(int numCluster, Corpus corpus1 , Corpus corpus2 ){
-
- model1=new PhraseCluster(numCluster, corpus1);
- model2=new PhraseCluster(numCluster,corpus2);
- c1=corpus1;
- c2=corpus2;
- K=numCluster;
-
- }
-
- /**@brief test
- *
- */
- public static void main(String args[]){
- //String in="../pdata/canned.con";
- // String in="../pdata/btec.con";
- String in1="../pdata/source.txt";
- String in2="../pdata/target.txt";
- String out="../pdata/posterior.out";
- int numCluster=25;
- Corpus corpus1 = null,corpus2=null;
- File infile1 = new File(in1),infile2=new File(in2);
- try {
- System.out.println("Reading concordance from " + infile1);
- corpus1 = Corpus.readFromFile(FileUtil.reader(infile1));
- System.out.println("Reading concordance from " + infile2);
- corpus2 = Corpus.readFromFile(FileUtil.reader(infile2));
- corpus1.printStats(System.out);
- } catch (IOException e) {
- System.err.println("Failed to open input file: " + infile1);
- e.printStackTrace();
- System.exit(1);
- }
-
- Agree2Sides agree=new Agree2Sides(numCluster, corpus1,corpus2);
- int iter=20;
- for(int i=0;i<iter;i++){
- agree.EM();
- System.out.println("Iter"+i+", llh: "+agree.llh+
- ", divergence:"+agree.bdist+
- " sum: "+(agree.llh+agree.bdist));
- }
-
- File outfile = new File (out);
- try {
- PrintStream ps = FileUtil.printstream(outfile);
- agree.displayPosterior(ps);
- // ps.println();
- // c2f.displayModelParam(ps);
- ps.close();
- } catch (IOException e) {
- System.err.println("Failed to open output file: " + outfile);
- e.printStackTrace();
- System.exit(1);
- }
-
- }
-
- public double EM(){
-
- double [][][]exp_emit1=new double [K][c1.getNumContextPositions()][c1.getNumWords()];
- double [][]exp_pi1=new double[c1.getNumPhrases()][K];
-
- double [][][]exp_emit2=new double [K][c2.getNumContextPositions()][c2.getNumWords()];
- double [][]exp_pi2=new double[c2.getNumPhrases()][K];
-
- llh=0;
- bdist=0;
- //E
- for(int i=0;i<c1.getEdges().size();i++){
- Edge edge1=c1.getEdges().get(i);
- Edge edge2=c2.getEdges().get(i);
- double p[]=posterior(i);
- double z = arr.F.l1norm(p);
- assert z > 0;
- bdist += edge1.getCount() * Math.log(z);
- arr.F.l1normalize(p);
- double count = edge1.getCount();
- //increment expected count
- TIntArrayList contextToks1 = edge1.getContext();
- TIntArrayList contextToks2 = edge2.getContext();
- int phrase1=edge1.getPhraseId();
- int phrase2=edge2.getPhraseId();
- for(int tag=0;tag<K;tag++){
- for(int position=0;position<c1.getNumContextPositions();position++){
- exp_emit1[tag][position][contextToks1.get(position)]+=p[tag]*count;
- }
- for(int position=0;position<c2.getNumContextPositions();position++){
- exp_emit2[tag][position][contextToks2.get(position)]+=p[tag]*count;
- }
- exp_pi1[phrase1][tag]+=p[tag]*count;
- exp_pi2[phrase2][tag]+=p[tag]*count;
- }
- }
-
- //System.out.println("Log likelihood: "+loglikelihood);
-
- //M
- for(double [][]i:exp_emit1){
- for(double []j:i){
- arr.F.l1normalize(j);
- }
- }
-
- for(double []j:exp_pi1){
- arr.F.l1normalize(j);
- }
-
- for(double [][]i:exp_emit2){
- for(double []j:i){
- arr.F.l1normalize(j);
- }
- }
-
- for(double []j:exp_pi2){
- arr.F.l1normalize(j);
- }
-
- model1.emit=exp_emit1;
- model1.pi=exp_pi1;
- model2.emit=exp_emit2;
- model2.pi=exp_pi2;
-
- return llh;
- }
-
- public double[] posterior(int edgeIdx)
- {
- return posterior(c1.getEdges().get(edgeIdx), c2.getEdges().get(edgeIdx));
- }
-
- public double[] posterior(Edge e1, Edge e2)
- {
- double[] prob1=model1.posterior(e1);
- double[] prob2=model2.posterior(e2);
-
- llh+=e1.getCount()*Math.log(arr.F.l1norm(prob1));
- llh+=e2.getCount()*Math.log(arr.F.l1norm(prob2));
- arr.F.l1normalize(prob1);
- arr.F.l1normalize(prob2);
-
- for(int i=0;i<prob1.length;i++){
- prob1[i]*=prob2[i];
- prob1[i]=Math.sqrt(prob1[i]);
- }
-
- return prob1;
- }
-
- public void displayPosterior(PrintStream ps)
- {
- for (int i=0;i<c1.getEdges().size();i++)
- {
- Edge edge=c1.getEdges().get(i);
- double probs[] = posterior(i);
- arr.F.l1normalize(probs);
-
- // emit phrase
- ps.print(edge.getPhraseString());
- ps.print("\t");
- ps.print(edge.getContextString(true));
- int t=arr.F.argmax(probs);
- ps.println(" ||| C=" + t);
- }
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/phrase/C2F.java b/gi/posterior-regularisation/prjava/src/phrase/C2F.java
deleted file mode 100644
index e8783950..00000000
--- a/gi/posterior-regularisation/prjava/src/phrase/C2F.java
+++ /dev/null
@@ -1,216 +0,0 @@
-package phrase;
-
-import gnu.trove.TIntArrayList;
-
-import io.FileUtil;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.Arrays;
-import java.util.List;
-
-import phrase.Corpus.Edge;
-
-/**
- * @brief context generates phrase
- * @author desaic
- *
- */
-public class C2F {
- public int K;
- private int n_words, n_contexts, n_positions;
- public Corpus c;
-
- /**@brief
- * emit[tag][position][word] = p(word | tag, position in phrase)
- */
- public double emit[][][];
- /**@brief
- * pi[context][tag] = p(tag | context)
- */
- public double pi[][];
-
- public C2F(int numCluster, Corpus corpus){
- K=numCluster;
- c=corpus;
- n_words=c.getNumWords();
- n_contexts=c.getNumContexts();
-
- //number of words in a phrase to be considered
- //currently the first and last word in source and target
- //if the phrase has length 1 in either dimension then
- //we use the same word for two positions
- n_positions=c.phraseEdges(c.getEdges().get(0).getPhrase()).size();
-
- emit=new double [K][n_positions][n_words];
- pi=new double[n_contexts][K];
-
- for(double [][]i:emit){
- for(double []j:i){
- arr.F.randomise(j);
- }
- }
-
- for(double []j:pi){
- arr.F.randomise(j);
- }
- }
-
- /**@brief test
- *
- */
- public static void main(String args[]){
- String in="../pdata/canned.con";
- String out="../pdata/posterior.out";
- int numCluster=25;
- Corpus corpus = null;
- File infile = new File(in);
- try {
- System.out.println("Reading concordance from " + infile);
- corpus = Corpus.readFromFile(FileUtil.reader(infile));
- corpus.printStats(System.out);
- } catch (IOException e) {
- System.err.println("Failed to open input file: " + infile);
- e.printStackTrace();
- System.exit(1);
- }
-
- C2F c2f=new C2F(numCluster,corpus);
- int iter=20;
- double llh=0;
- for(int i=0;i<iter;i++){
- llh=c2f.EM();
- System.out.println("Iter"+i+", llh: "+llh);
- }
-
- File outfile = new File (out);
- try {
- PrintStream ps = FileUtil.printstream(outfile);
- c2f.displayPosterior(ps);
- // ps.println();
- // c2f.displayModelParam(ps);
- ps.close();
- } catch (IOException e) {
- System.err.println("Failed to open output file: " + outfile);
- e.printStackTrace();
- System.exit(1);
- }
-
- }
-
- public double EM(){
- double [][][]exp_emit=new double [K][n_positions][n_words];
- double [][]exp_pi=new double[n_contexts][K];
-
- double loglikelihood=0;
-
- //E
- for(int context=0; context< n_contexts; context++){
-
- List<Edge> contexts = c.getEdgesForContext(context);
-
- for (int ctx=0; ctx<contexts.size(); ctx++){
- Edge edge = contexts.get(ctx);
- double p[]=posterior(edge);
- double z = arr.F.l1norm(p);
- assert z > 0;
- loglikelihood += edge.getCount() * Math.log(z);
- arr.F.l1normalize(p);
-
- double count = edge.getCount();
- //increment expected count
- TIntArrayList phrase= edge.getPhrase();
- for(int tag=0;tag<K;tag++){
-
- exp_emit[tag][0][phrase.get(0)]+=p[tag]*count;
- exp_emit[tag][1][phrase.get(phrase.size()-1)]+=p[tag]*count;
-
- exp_pi[context][tag]+=p[tag]*count;
- }
- }
- }
-
- //System.out.println("Log likelihood: "+loglikelihood);
-
- //M
- for(double [][]i:exp_emit){
- for(double []j:i){
- arr.F.l1normalize(j);
- }
- }
-
- emit=exp_emit;
-
- for(double []j:exp_pi){
- arr.F.l1normalize(j);
- }
-
- pi=exp_pi;
-
- return loglikelihood;
- }
-
- public double[] posterior(Corpus.Edge edge)
- {
- double[] prob=Arrays.copyOf(pi[edge.getContextId()], K);
-
- TIntArrayList phrase = edge.getPhrase();
- TIntArrayList offsets = c.phraseEdges(phrase);
- for(int tag=0;tag<K;tag++)
- {
- for (int i=0; i < offsets.size(); ++i)
- prob[tag]*=emit[tag][i][phrase.get(offsets.get(i))];
- }
-
- return prob;
- }
-
- public void displayPosterior(PrintStream ps)
- {
- for (Edge edge : c.getEdges())
- {
- double probs[] = posterior(edge);
- arr.F.l1normalize(probs);
-
- // emit phrase
- ps.print(edge.getPhraseString());
- ps.print("\t");
- ps.print(edge.getContextString(true));
- int t=arr.F.argmax(probs);
- ps.println(" ||| C=" + t);
- }
- }
-
- public void displayModelParam(PrintStream ps)
- {
- final double EPS = 1e-6;
-
- ps.println("P(tag|context)");
- for (int i = 0; i < n_contexts; ++i)
- {
- ps.print(c.getContext(i));
- for(int j=0;j<pi[i].length;j++){
- if (pi[i][j] > EPS)
- ps.print("\t" + j + ": " + pi[i][j]);
- }
- ps.println();
- }
-
- ps.println("P(word|tag,position)");
- for (int i = 0; i < K; ++i)
- {
- for(int position=0;position<n_positions;position++){
- ps.println("tag " + i + " position " + position);
- for(int word=0;word<emit[i][position].length;word++){
- if (emit[i][position][word] > EPS)
- ps.print(c.getWord(word)+"="+emit[i][position][word]+"\t");
- }
- ps.println();
- }
- ps.println();
- }
-
- }
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/phrase/Corpus.java b/gi/posterior-regularisation/prjava/src/phrase/Corpus.java
deleted file mode 100644
index 4b1939cd..00000000
--- a/gi/posterior-regularisation/prjava/src/phrase/Corpus.java
+++ /dev/null
@@ -1,288 +0,0 @@
-package phrase;
-
-import gnu.trove.TIntArrayList;
-
-import java.io.*;
-import java.util.*;
-import java.util.regex.Pattern;
-
-
-public class Corpus
-{
- private Lexicon<String> wordLexicon = new Lexicon<String>();
- private Lexicon<TIntArrayList> phraseLexicon = new Lexicon<TIntArrayList>();
- private Lexicon<TIntArrayList> contextLexicon = new Lexicon<TIntArrayList>();
- private List<Edge> edges = new ArrayList<Edge>();
- private List<List<Edge>> phraseToContext = new ArrayList<List<Edge>>();
- private List<List<Edge>> contextToPhrase = new ArrayList<List<Edge>>();
- public int splitSentinel;
- public int phraseSentinel;
- public int rareSentinel;
-
- public Corpus()
- {
- splitSentinel = wordLexicon.insert("<SPLIT>");
- phraseSentinel = wordLexicon.insert("<PHRASE>");
- rareSentinel = wordLexicon.insert("<RARE>");
- }
-
- public class Edge
- {
-
- Edge(int phraseId, int contextId, double count,int tag)
- {
- this.phraseId = phraseId;
- this.contextId = contextId;
- this.count = count;
- fixTag=tag;
- }
-
- Edge(int phraseId, int contextId, double count)
- {
- this.phraseId = phraseId;
- this.contextId = contextId;
- this.count = count;
- fixTag=-1;
- }
- public int getTag(){
- return fixTag;
- }
-
- public int getPhraseId()
- {
- return phraseId;
- }
- public TIntArrayList getPhrase()
- {
- return Corpus.this.getPhrase(phraseId);
- }
- public String getPhraseString()
- {
- return Corpus.this.getPhraseString(phraseId);
- }
- public int getContextId()
- {
- return contextId;
- }
- public TIntArrayList getContext()
- {
- return Corpus.this.getContext(contextId);
- }
- public String getContextString(boolean insertPhraseSentinel)
- {
- return Corpus.this.getContextString(contextId, insertPhraseSentinel);
- }
- public double getCount()
- {
- return count;
- }
- public boolean equals(Object other)
- {
- if (other instanceof Edge)
- {
- Edge oe = (Edge) other;
- return oe.phraseId == phraseId && oe.contextId == contextId;
- }
- else return false;
- }
- public int hashCode()
- { // this is how boost's hash_combine does it
- int seed = phraseId;
- seed ^= contextId + 0x9e3779b9 + (seed << 6) + (seed >> 2);
- return seed;
- }
- public String toString()
- {
- return getPhraseString() + "\t" + getContextString(true);
- }
-
- private int phraseId;
- private int contextId;
- private double count;
- private int fixTag;
- }
-
- List<Edge> getEdges()
- {
- return edges;
- }
-
- int getNumEdges()
- {
- return edges.size();
- }
-
- int getNumPhrases()
- {
- return phraseLexicon.size();
- }
-
- int getNumContextPositions()
- {
- return contextLexicon.lookup(0).size();
- }
-
- List<Edge> getEdgesForPhrase(int phraseId)
- {
- return phraseToContext.get(phraseId);
- }
-
- int getNumContexts()
- {
- return contextLexicon.size();
- }
-
- List<Edge> getEdgesForContext(int contextId)
- {
- return contextToPhrase.get(contextId);
- }
-
- int getNumWords()
- {
- return wordLexicon.size();
- }
-
- String getWord(int wordId)
- {
- return wordLexicon.lookup(wordId);
- }
-
- public TIntArrayList getPhrase(int phraseId)
- {
- return phraseLexicon.lookup(phraseId);
- }
-
- public String getPhraseString(int phraseId)
- {
- StringBuffer b = new StringBuffer();
- for (int tid: getPhrase(phraseId).toNativeArray())
- {
- if (b.length() > 0)
- b.append(" ");
- b.append(wordLexicon.lookup(tid));
- }
- return b.toString();
- }
-
- public TIntArrayList getContext(int contextId)
- {
- return contextLexicon.lookup(contextId);
- }
-
- public String getContextString(int contextId, boolean insertPhraseSentinel)
- {
- StringBuffer b = new StringBuffer();
- TIntArrayList c = getContext(contextId);
- for (int i = 0; i < c.size(); ++i)
- {
- if (i > 0) b.append(" ");
- //if (i == c.size() / 2) b.append("<PHRASE> ");
- b.append(wordLexicon.lookup(c.get(i)));
- }
- return b.toString();
- }
-
- public boolean isSentinel(int wordId)
- {
- return wordId == splitSentinel || wordId == phraseSentinel;
- }
-
- List<Edge> readEdges(Reader in) throws IOException
- {
- // read in line-by-line
- BufferedReader bin = new BufferedReader(in);
- String line;
- Pattern separator = Pattern.compile(" \\|\\|\\| ");
-
- List<Edge> edges = new ArrayList<Edge>();
- while ((line = bin.readLine()) != null)
- {
- // split into phrase and contexts
- StringTokenizer st = new StringTokenizer(line, "\t");
- assert (st.hasMoreTokens());
- String phraseToks = st.nextToken();
- assert (st.hasMoreTokens());
- String rest = st.nextToken();
- assert (!st.hasMoreTokens());
-
- // process phrase
- st = new StringTokenizer(phraseToks, " ");
- TIntArrayList ptoks = new TIntArrayList();
- while (st.hasMoreTokens())
- ptoks.add(wordLexicon.insert(st.nextToken()));
- int phraseId = phraseLexicon.insert(ptoks);
-
- // process contexts
- String[] parts = separator.split(rest);
- assert (parts.length % 2 == 0);
- for (int i = 0; i < parts.length; i += 2)
- {
- // process pairs of strings - context and count
- String ctxString = parts[i];
- String countString = parts[i + 1];
-
- assert (countString.startsWith("C="));
-
- String []countToks=countString.split(" ");
-
- double count = Double.parseDouble(countToks[0].substring(2).trim());
-
- TIntArrayList ctx = new TIntArrayList();
- StringTokenizer ctxStrtok = new StringTokenizer(ctxString, " ");
- while (ctxStrtok.hasMoreTokens())
- {
- String token = ctxStrtok.nextToken();
- ctx.add(wordLexicon.insert(token));
- }
- int contextId = contextLexicon.insert(ctx);
-
-
- if(countToks.length<2){
- edges.add(new Edge(phraseId, contextId, count));
- }
- else{
- int tag=Integer.parseInt(countToks[1].substring(2));
- edges.add(new Edge(phraseId, contextId, count,tag));
- }
- }
- }
- return edges;
- }
-
- static Corpus readFromFile(Reader in) throws IOException
- {
- Corpus c = new Corpus();
- c.edges = c.readEdges(in);
- for (Edge edge: c.edges)
- {
- while (edge.getPhraseId() >= c.phraseToContext.size())
- c.phraseToContext.add(new ArrayList<Edge>());
- while (edge.getContextId() >= c.contextToPhrase.size())
- c.contextToPhrase.add(new ArrayList<Edge>());
-
- // index the edge for fast phrase, context lookup
- c.phraseToContext.get(edge.getPhraseId()).add(edge);
- c.contextToPhrase.get(edge.getContextId()).add(edge);
- }
- return c;
- }
-
- TIntArrayList phraseEdges(TIntArrayList phrase)
- {
- TIntArrayList r = new TIntArrayList(4);
- for (int p = 0; p < phrase.size(); ++p)
- {
- if (p == 0 || phrase.get(p-1) == splitSentinel)
- r.add(p);
- if (p == phrase.size() - 1 || phrase.get(p+1) == splitSentinel)
- r.add(p);
- }
- return r;
- }
-
- public void printStats(PrintStream out)
- {
- out.println("Corpus has " + edges.size() + " edges " + phraseLexicon.size() + " phrases "
- + contextLexicon.size() + " contexts and " + wordLexicon.size() + " word types");
- }
-} \ No newline at end of file
diff --git a/gi/posterior-regularisation/prjava/src/phrase/Lexicon.java b/gi/posterior-regularisation/prjava/src/phrase/Lexicon.java
deleted file mode 100644
index a386e4a3..00000000
--- a/gi/posterior-regularisation/prjava/src/phrase/Lexicon.java
+++ /dev/null
@@ -1,34 +0,0 @@
-package phrase;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-public class Lexicon<T>
-{
- public int insert(T word)
- {
- Integer i = wordToIndex.get(word);
- if (i == null)
- {
- i = indexToWord.size();
- wordToIndex.put(word, i);
- indexToWord.add(word);
- }
- return i;
- }
-
- public T lookup(int index)
- {
- return indexToWord.get(index);
- }
-
- public int size()
- {
- return indexToWord.size();
- }
-
- private Map<T, Integer> wordToIndex = new HashMap<T, Integer>();
- private List<T> indexToWord = new ArrayList<T>();
-} \ No newline at end of file
diff --git a/gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java b/gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java
deleted file mode 100644
index c032bb2b..00000000
--- a/gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java
+++ /dev/null
@@ -1,540 +0,0 @@
-package phrase;
-
-import gnu.trove.TIntArrayList;
-import org.apache.commons.math.special.Gamma;
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.concurrent.Callable;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
-import java.util.concurrent.LinkedBlockingQueue;
-import java.util.concurrent.atomic.AtomicInteger;
-import java.util.concurrent.atomic.AtomicLong;
-import java.util.regex.Pattern;
-
-import phrase.Corpus.Edge;
-
-
-public class PhraseCluster {
-
- public int K;
- private int n_phrases, n_words, n_contexts, n_positions;
- public Corpus c;
- public ExecutorService pool;
-
- double[] lambdaPTCT;
- double[][] lambdaPT;
- boolean cacheLambda = true;
-
- // emit[tag][position][word] = p(word | tag, position in context)
- double emit[][][];
- // pi[phrase][tag] = p(tag | phrase)
- double pi[][];
-
- public PhraseCluster(int numCluster, Corpus corpus)
- {
- K=numCluster;
- c=corpus;
- n_words=c.getNumWords();
- n_phrases=c.getNumPhrases();
- n_contexts=c.getNumContexts();
- n_positions=c.getNumContextPositions();
-
- emit=new double [K][n_positions][n_words];
- pi=new double[n_phrases][K];
-
- for(double [][]i:emit)
- for(double []j:i)
- arr.F.randomise(j, true);
-
- for(double []j:pi)
- arr.F.randomise(j, true);
- }
-
- void useThreadPool(ExecutorService pool)
- {
- this.pool = pool;
- }
-
- public double EM(int phraseSizeLimit)
- {
- double [][][]exp_emit=new double [K][n_positions][n_words];
- double []exp_pi=new double[K];
-
- for(double [][]i:exp_emit)
- for(double []j:i)
- Arrays.fill(j, 1e-10);
-
- double loglikelihood=0;
-
- //E
- for(int phrase=0; phrase < n_phrases; phrase++)
- {
- if (phraseSizeLimit >= 1 && c.getPhrase(phrase).size() > phraseSizeLimit)
- continue;
-
- Arrays.fill(exp_pi, 1e-10);
-
- List<Edge> contexts = c.getEdgesForPhrase(phrase);
-
- for (int ctx=0; ctx<contexts.size(); ctx++)
- {
- Edge edge = contexts.get(ctx);
-
- double p[]=posterior(edge);
- double z = arr.F.l1norm(p);
- assert z > 0;
- loglikelihood += edge.getCount() * Math.log(z);
- arr.F.l1normalize(p);
-
- double count = edge.getCount();
- //increment expected count
- TIntArrayList context = edge.getContext();
- for(int tag=0;tag<K;tag++)
- {
- for(int pos=0;pos<n_positions;pos++){
- exp_emit[tag][pos][context.get(pos)]+=p[tag]*count;
- }
- exp_pi[tag]+=p[tag]*count;
- }
- }
- arr.F.l1normalize(exp_pi);
- System.arraycopy(exp_pi, 0, pi[phrase], 0, K);
- }
-
- //M
- for(double [][]i:exp_emit)
- for(double []j:i)
- arr.F.l1normalize(j);
-
- emit=exp_emit;
-
- return loglikelihood;
- }
-
- public double PREM(double scalePT, double scaleCT, int phraseSizeLimit)
- {
- if (scaleCT == 0)
- {
- if (pool != null)
- return PREM_phrase_constraints_parallel(scalePT, phraseSizeLimit);
- else
- return PREM_phrase_constraints(scalePT, phraseSizeLimit);
- }
- else // FIXME: ignores phraseSizeLimit
- return this.PREM_phrase_context_constraints(scalePT, scaleCT);
- }
-
-
- public double PREM_phrase_constraints(double scalePT, int phraseSizeLimit)
- {
- double [][][]exp_emit=new double[K][n_positions][n_words];
- double []exp_pi=new double[K];
-
- for(double [][]i:exp_emit)
- for(double []j:i)
- Arrays.fill(j, 1e-10);
-
- if (lambdaPT == null && cacheLambda)
- lambdaPT = new double[n_phrases][];
-
- double loglikelihood=0, kl=0, l1lmax=0, primal=0;
- int failures=0, iterations=0;
- long start = System.currentTimeMillis();
- //E
- for(int phrase=0; phrase<n_phrases; phrase++)
- {
- if (phraseSizeLimit >= 1 && c.getPhrase(phrase).size() > phraseSizeLimit)
- {
- //System.arraycopy(pi[phrase], 0, exp_pi[phrase], 0, K);
- continue;
- }
-
- Arrays.fill(exp_pi, 1e-10);
-
- // FIXME: add rare edge check to phrase objective & posterior processing
- PhraseObjective po = new PhraseObjective(this, phrase, scalePT, (cacheLambda) ? lambdaPT[phrase] : null);
- boolean ok = po.optimizeWithProjectedGradientDescent();
- if (!ok) ++failures;
- if (cacheLambda) lambdaPT[phrase] = po.getParameters();
- iterations += po.getNumberUpdateCalls();
- double [][] q=po.posterior();
- loglikelihood += po.loglikelihood();
- kl += po.KL_divergence();
- l1lmax += po.l1lmax();
- primal += po.primal(scalePT);
- List<Edge> edges = c.getEdgesForPhrase(phrase);
-
- for(int edge=0;edge<q.length;edge++){
- Edge e = edges.get(edge);
- TIntArrayList context = e.getContext();
- double contextCnt = e.getCount();
- //increment expected count
- for(int tag=0;tag<K;tag++){
- for(int pos=0;pos<n_positions;pos++){
- exp_emit[tag][pos][context.get(pos)]+=q[edge][tag]*contextCnt;
- }
-
- exp_pi[tag]+=q[edge][tag]*contextCnt;
-
- }
- }
- arr.F.l1normalize(exp_pi);
- System.arraycopy(exp_pi, 0, pi[phrase], 0, K);
- }
-
- long end = System.currentTimeMillis();
- if (failures > 0)
- System.out.println("WARNING: failed to converge in " + failures + "/" + n_phrases + " cases");
- System.out.println("\tmean iters: " + iterations/(double)n_phrases + " elapsed time " + (end - start) / 1000.0);
- System.out.println("\tllh: " + loglikelihood);
- System.out.println("\tKL: " + kl);
- System.out.println("\tphrase l1lmax: " + l1lmax);
-
- //M
- for(double [][]i:exp_emit)
- for(double []j:i)
- arr.F.l1normalize(j);
- emit=exp_emit;
-
- return primal;
- }
-
- public double PREM_phrase_constraints_parallel(final double scalePT, int phraseSizeLimit)
- {
- assert(pool != null);
-
- final LinkedBlockingQueue<PhraseObjective> expectations
- = new LinkedBlockingQueue<PhraseObjective>();
-
- double [][][]exp_emit=new double [K][n_positions][n_words];
- double [][]exp_pi=new double[n_phrases][K];
-
- for(double [][]i:exp_emit)
- for(double []j:i)
- Arrays.fill(j, 1e-10);
- for(double []j:exp_pi)
- Arrays.fill(j, 1e-10);
-
- double loglikelihood=0, kl=0, l1lmax=0, primal=0;
- final AtomicInteger failures = new AtomicInteger(0);
- final AtomicLong elapsed = new AtomicLong(0l);
- int iterations=0;
- long start = System.currentTimeMillis();
- List<Future<PhraseObjective>> results = new ArrayList<Future<PhraseObjective>>();
-
- if (lambdaPT == null && cacheLambda)
- lambdaPT = new double[n_phrases][];
-
- //E
- for(int phrase=0;phrase<n_phrases;phrase++) {
- if (phraseSizeLimit >= 1 && c.getPhrase(phrase).size() > phraseSizeLimit) {
- System.arraycopy(pi[phrase], 0, exp_pi[phrase], 0, K);
- continue;
- }
-
- final int p=phrase;
- results.add(pool.submit(new Callable<PhraseObjective>() {
- public PhraseObjective call() {
- //System.out.println("" + Thread.currentThread().getId() + " optimising lambda for " + p);
- long start = System.currentTimeMillis();
- PhraseObjective po = new PhraseObjective(PhraseCluster.this, p, scalePT, (cacheLambda) ? lambdaPT[p] : null);
- boolean ok = po.optimizeWithProjectedGradientDescent();
- if (!ok) failures.incrementAndGet();
- long end = System.currentTimeMillis();
- elapsed.addAndGet(end - start);
- //System.out.println("" + Thread.currentThread().getId() + " done optimising lambda for " + p);
- return po;
- }
- }));
- }
-
- // aggregate the expectations as they become available
- for (Future<PhraseObjective> fpo : results)
- {
- try {
- //System.out.println("" + Thread.currentThread().getId() + " reading queue #" + count);
-
- // wait (blocking) until something is ready
- PhraseObjective po = fpo.get();
- // process
- int phrase = po.phrase;
- if (cacheLambda) lambdaPT[phrase] = po.getParameters();
- //System.out.println("" + Thread.currentThread().getId() + " taken phrase " + phrase);
- double [][] q=po.posterior();
- loglikelihood += po.loglikelihood();
- kl += po.KL_divergence();
- l1lmax += po.l1lmax();
- primal += po.primal(scalePT);
- iterations += po.getNumberUpdateCalls();
-
- List<Edge> edges = c.getEdgesForPhrase(phrase);
- for(int edge=0;edge<q.length;edge++){
- Edge e = edges.get(edge);
- TIntArrayList context = e.getContext();
- double contextCnt = e.getCount();
- //increment expected count
- for(int tag=0;tag<K;tag++){
- for(int pos=0;pos<n_positions;pos++){
- exp_emit[tag][pos][context.get(pos)]+=q[edge][tag]*contextCnt;
- }
- exp_pi[phrase][tag]+=q[edge][tag]*contextCnt;
- }
- }
- } catch (InterruptedException e) {
- System.err.println("M-step thread interrupted. Probably fatal!");
- throw new RuntimeException(e);
- } catch (ExecutionException e) {
- System.err.println("M-step thread execution died. Probably fatal!");
- throw new RuntimeException(e);
- }
- }
-
- long end = System.currentTimeMillis();
-
- if (failures.get() > 0)
- System.out.println("WARNING: failed to converge in " + failures.get() + "/" + n_phrases + " cases");
- System.out.println("\tmean iters: " + iterations/(double)n_phrases + " walltime " + (end-start)/1000.0 + " threads " + elapsed.get() / 1000.0);
- System.out.println("\tllh: " + loglikelihood);
- System.out.println("\tKL: " + kl);
- System.out.println("\tphrase l1lmax: " + l1lmax);
-
- //M
- for(double [][]i:exp_emit)
- for(double []j:i)
- arr.F.l1normalize(j);
- emit=exp_emit;
-
- for(double []j:exp_pi)
- arr.F.l1normalize(j);
- pi=exp_pi;
-
- return primal;
- }
-
- public double PREM_phrase_context_constraints(double scalePT, double scaleCT)
- {
- double[][][] exp_emit = new double [K][n_positions][n_words];
- double[][] exp_pi = new double[n_phrases][K];
-
- //E step
- PhraseContextObjective pco = new PhraseContextObjective(this, lambdaPTCT, pool, scalePT, scaleCT);
- boolean ok = pco.optimizeWithProjectedGradientDescent();
- if (cacheLambda) lambdaPTCT = pco.getParameters();
-
- //now extract expectations
- List<Corpus.Edge> edges = c.getEdges();
- for(int e = 0; e < edges.size(); ++e)
- {
- double [] q = pco.posterior(e);
- Corpus.Edge edge = edges.get(e);
-
- TIntArrayList context = edge.getContext();
- double contextCnt = edge.getCount();
- //increment expected count
- for(int tag=0;tag<K;tag++)
- {
- for(int pos=0;pos<n_positions;pos++)
- exp_emit[tag][pos][context.get(pos)]+=q[tag]*contextCnt;
- exp_pi[edge.getPhraseId()][tag]+=q[tag]*contextCnt;
- }
- }
-
- System.out.println("\tllh: " + pco.loglikelihood());
- System.out.println("\tKL: " + pco.KL_divergence());
- System.out.println("\tphrase l1lmax: " + pco.phrase_l1lmax());
- System.out.println("\tcontext l1lmax: " + pco.context_l1lmax());
-
- //M step
- for(double [][]i:exp_emit)
- for(double []j:i)
- arr.F.l1normalize(j);
- emit=exp_emit;
-
- for(double []j:exp_pi)
- arr.F.l1normalize(j);
- pi=exp_pi;
-
- return pco.primal();
- }
-
- /**
- * @param phrase index of phrase
- * @param ctx array of context
- * @return unnormalized posterior
- */
- public double[] posterior(Corpus.Edge edge)
- {
- double[] prob;
-
- if(edge.getTag()>=0){
- prob=new double[K];
- prob[edge.getTag()]=1;
- return prob;
- }
-
- if (edge.getPhraseId() < n_phrases)
- prob = Arrays.copyOf(pi[edge.getPhraseId()], K);
- else
- {
- prob = new double[K];
- Arrays.fill(prob, 1.0);
- }
-
- TIntArrayList ctx = edge.getContext();
- for(int tag=0;tag<K;tag++)
- {
- for(int c=0;c<n_positions;c++)
- {
- int word = ctx.get(c);
- if (!this.c.isSentinel(word) && word < n_words)
- prob[tag]*=emit[tag][c][word];
- }
- }
-
- return prob;
- }
-
- public void displayPosterior(PrintStream ps, List<Edge> testing)
- {
- for (Edge edge : testing)
- {
- double probs[] = posterior(edge);
- arr.F.l1normalize(probs);
-
- // emit phrase
- ps.print(edge.getPhraseString());
- ps.print("\t");
- ps.print(edge.getContextString(true));
- int t=arr.F.argmax(probs);
- ps.println(" ||| C=" + t + " T=" + edge.getCount() + " P=" + probs[t]);
- //ps.println("# probs " + Arrays.toString(probs));
- }
- }
-
- public void displayModelParam(PrintStream ps)
- {
- final double EPS = 1e-6;
- ps.println("phrases " + n_phrases + " tags " + K + " positions " + n_positions);
-
- for (int i = 0; i < n_phrases; ++i)
- for(int j=0;j<pi[i].length;j++)
- if (pi[i][j] > EPS)
- ps.println(i + " " + j + " " + pi[i][j]);
-
- ps.println();
- for (int i = 0; i < K; ++i)
- {
- for(int position=0;position<n_positions;position++)
- {
- for(int word=0;word<emit[i][position].length;word++)
- {
- if (emit[i][position][word] > EPS)
- ps.println(i + " " + position + " " + word + " " + emit[i][position][word]);
- }
- }
- }
- }
-
- double phrase_l1lmax()
- {
- double sum=0;
- for(int phrase=0; phrase<n_phrases; phrase++)
- {
- double [] maxes = new double[K];
- for (Edge edge : c.getEdgesForPhrase(phrase))
- {
- double p[] = posterior(edge);
- arr.F.l1normalize(p);
- for(int tag=0;tag<K;tag++)
- maxes[tag] = Math.max(maxes[tag], p[tag]);
- }
- for(int tag=0;tag<K;tag++)
- sum += maxes[tag];
- }
- return sum;
- }
-
- double context_l1lmax()
- {
- double sum=0;
- for(int context=0; context<n_contexts; context++)
- {
- double [] maxes = new double[K];
- for (Edge edge : c.getEdgesForContext(context))
- {
- double p[] = posterior(edge);
- arr.F.l1normalize(p);
- for(int tag=0;tag<K;tag++)
- maxes[tag] = Math.max(maxes[tag], p[tag]);
- }
- for(int tag=0;tag<K;tag++)
- sum += maxes[tag];
- }
- return sum;
- }
-
- public void loadParameters(BufferedReader input) throws IOException
- {
- final double EPS = 1e-50;
-
- // overwrite pi, emit with ~zeros
- for(double [][]i:emit)
- for(double []j:i)
- Arrays.fill(j, EPS);
-
- for(double []j:pi)
- Arrays.fill(j, EPS);
-
- String line = input.readLine();
- assert line != null;
-
- Pattern space = Pattern.compile(" +");
- String[] parts = space.split(line);
- assert parts.length == 6;
-
- assert parts[0].equals("phrases");
- int phrases = Integer.parseInt(parts[1]);
- int tags = Integer.parseInt(parts[3]);
- int positions = Integer.parseInt(parts[5]);
-
- assert phrases == n_phrases;
- assert tags == K;
- assert positions == n_positions;
-
- // read in pi
- while ((line = input.readLine()) != null)
- {
- line = line.trim();
- if (line.isEmpty()) break;
-
- String[] tokens = space.split(line);
- assert tokens.length == 3;
- int p = Integer.parseInt(tokens[0]);
- int t = Integer.parseInt(tokens[1]);
- double v = Double.parseDouble(tokens[2]);
-
- pi[p][t] = v;
- }
-
- // read in emissions
- while ((line = input.readLine()) != null)
- {
- String[] tokens = space.split(line);
- assert tokens.length == 4;
- int t = Integer.parseInt(tokens[0]);
- int p = Integer.parseInt(tokens[1]);
- int w = Integer.parseInt(tokens[2]);
- double v = Double.parseDouble(tokens[3]);
-
- emit[t][p][w] = v;
- }
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/phrase/PhraseContextObjective.java b/gi/posterior-regularisation/prjava/src/phrase/PhraseContextObjective.java
deleted file mode 100644
index 646ff392..00000000
--- a/gi/posterior-regularisation/prjava/src/phrase/PhraseContextObjective.java
+++ /dev/null
@@ -1,436 +0,0 @@
-package phrase;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Future;
-
-import optimization.gradientBasedMethods.ProjectedGradientDescent;
-import optimization.gradientBasedMethods.ProjectedObjective;
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.linesearch.ArmijoLineSearchMinimizationAlongProjectionArc;
-import optimization.linesearch.InterpolationPickFirstStep;
-import optimization.linesearch.LineSearchMethod;
-import optimization.projections.SimplexProjection;
-import optimization.stopCriteria.CompositeStopingCriteria;
-import optimization.stopCriteria.ProjectedGradientL2Norm;
-import optimization.stopCriteria.StopingCriteria;
-import optimization.stopCriteria.ValueDifference;
-import optimization.util.MathUtils;
-import phrase.Corpus.Edge;
-
-public class PhraseContextObjective extends ProjectedObjective
-{
- private static final double GRAD_DIFF = 0.00002;
- private static double INIT_STEP_SIZE = 300;
- private static double VAL_DIFF = 1e-8;
- private static int ITERATIONS = 20;
- boolean debug = false;
-
- private PhraseCluster c;
-
- // un-regularized unnormalized posterior, p[edge][tag]
- // P(tag|edge) \propto P(tag|phrase)P(context|tag)
- private double p[][];
-
- // regularized unnormalized posterior
- // q[edge][tag] propto p[edge][tag]*exp(-lambda)
- private double q[][];
- private List<Corpus.Edge> data;
-
- // log likelihood under q
- private double loglikelihood;
- private SimplexProjection projectionPhrase;
- private SimplexProjection projectionContext;
-
- double[] newPoint;
- private int n_param;
-
- // likelihood under p
- public double llh;
-
- private static Map<Corpus.Edge, Integer> edgeIndex;
-
- private long projectionTime;
- private long objectiveTime;
- private long actualProjectionTime;
- private ExecutorService pool;
-
- double scalePT;
- double scaleCT;
-
- public PhraseContextObjective(PhraseCluster cluster, double[] startingParameters, ExecutorService pool,
- double scalePT, double scaleCT)
- {
- c=cluster;
- data=c.c.getEdges();
- n_param=data.size()*c.K*2;
- this.pool=pool;
- this.scalePT = scalePT;
- this.scaleCT = scaleCT;
-
- parameters = startingParameters;
- if (parameters == null)
- parameters = new double[n_param];
-
- System.out.println("Num parameters " + n_param);
- newPoint = new double[n_param];
- gradient = new double[n_param];
- initP();
- projectionPhrase = new SimplexProjection(scalePT);
- projectionContext = new SimplexProjection(scaleCT);
- q=new double [data.size()][c.K];
-
- if (edgeIndex == null) {
- edgeIndex = new HashMap<Edge, Integer>();
- for (int e=0; e<data.size(); e++)
- {
- edgeIndex.put(data.get(e), e);
- //if (debug) System.out.println("Edge " + data.get(e) + " index " + e);
- }
- }
-
- setParameters(parameters);
- }
-
- private void initP(){
- p=new double[data.size()][];
- for(int edge=0;edge<data.size();edge++)
- {
- p[edge]=c.posterior(data.get(edge));
- llh += data.get(edge).getCount() * Math.log(arr.F.l1norm(p[edge]));
- arr.F.l1normalize(p[edge]);
- }
- }
-
- @Override
- public void setParameters(double[] params) {
- //System.out.println("setParameters " + Arrays.toString(parameters));
- // TODO: test if params have changed and skip update otherwise
- super.setParameters(params);
- updateFunction();
- }
-
- private void updateFunction()
- {
- updateCalls++;
- loglikelihood=0;
-
- System.out.print(".");
- System.out.flush();
-
- long begin = System.currentTimeMillis();
- for (int e=0; e<data.size(); e++)
- {
- Edge edge = data.get(e);
- for(int tag=0; tag<c.K; tag++)
- {
- int ip = index(e, tag, true);
- int ic = index(e, tag, false);
- q[e][tag] = p[e][tag]*
- Math.exp((-parameters[ip]-parameters[ic]) / edge.getCount());
- //if (debug)
- //System.out.println("\tposterior " + edge + " with tag " + tag + " p " + p[e][tag] + " params " + parameters[ip] + " and " + parameters[ic] + " q " + q[e][tag]);
- }
- }
-
- for(int edge=0;edge<data.size();edge++) {
- loglikelihood+=data.get(edge).getCount() * Math.log(arr.F.l1norm(q[edge]));
- arr.F.l1normalize(q[edge]);
- }
-
- for (int e=0; e<data.size(); e++)
- {
- for(int tag=0; tag<c.K; tag++)
- {
- int ip = index(e, tag, true);
- int ic = index(e, tag, false);
- gradient[ip]=-q[e][tag];
- gradient[ic]=-q[e][tag];
- }
- }
- //if (debug) {
- //System.out.println("objective " + loglikelihood + " ||gradient||_2: " + arr.F.l2norm(gradient));
- //System.out.println("gradient " + Arrays.toString(gradient));
- //}
- objectiveTime += System.currentTimeMillis() - begin;
- }
-
- @Override
- public double[] projectPoint(double[] point)
- {
- long begin = System.currentTimeMillis();
- List<Future<?>> tasks = new ArrayList<Future<?>>();
-
- System.out.print(",");
- System.out.flush();
-
- Arrays.fill(newPoint, 0, newPoint.length, 0);
-
- // first project using the phrase-tag constraints,
- // for all p,t: sum_c lambda_ptc < scaleP
- if (pool == null)
- {
- for (int p = 0; p < c.c.getNumPhrases(); ++p)
- {
- List<Edge> edges = c.c.getEdgesForPhrase(p);
- double[] toProject = new double[edges.size()];
- for(int tag=0;tag<c.K;tag++)
- {
- // FIXME: slow hash lookup for e (twice)
- for(int e=0; e<edges.size(); e++)
- toProject[e] = point[index(edges.get(e), tag, true)];
- long lbegin = System.currentTimeMillis();
- projectionPhrase.project(toProject);
- actualProjectionTime += System.currentTimeMillis() - lbegin;
- for(int e=0; e<edges.size(); e++)
- newPoint[index(edges.get(e), tag, true)] = toProject[e];
- }
- }
- }
- else // do above in parallel using thread pool
- {
- for (int p = 0; p < c.c.getNumPhrases(); ++p)
- {
- final int phrase = p;
- final double[] inPoint = point;
- Runnable task = new Runnable()
- {
- public void run()
- {
- List<Edge> edges = c.c.getEdgesForPhrase(phrase);
- double toProject[] = new double[edges.size()];
- for(int tag=0;tag<c.K;tag++)
- {
- // FIXME: slow hash lookup for e
- for(int e=0; e<edges.size(); e++)
- toProject[e] = inPoint[index(edges.get(e), tag, true)];
- projectionPhrase.project(toProject);
- for(int e=0; e<edges.size(); e++)
- newPoint[index(edges.get(e), tag, true)] = toProject[e];
- }
- }
- };
- tasks.add(pool.submit(task));
- }
- }
- //System.out.println("after PT " + Arrays.toString(newPoint));
-
- // now project using the context-tag constraints,
- // for all c,t: sum_p omega_pct < scaleC
- if (pool == null)
- {
- for (int ctx = 0; ctx < c.c.getNumContexts(); ++ctx)
- {
- List<Edge> edges = c.c.getEdgesForContext(ctx);
- double toProject[] = new double[edges.size()];
- for(int tag=0;tag<c.K;tag++)
- {
- // FIXME: slow hash lookup for e
- for(int e=0; e<edges.size(); e++)
- toProject[e] = point[index(edges.get(e), tag, false)];
- long lbegin = System.currentTimeMillis();
- projectionContext.project(toProject);
- actualProjectionTime += System.currentTimeMillis() - lbegin;
- for(int e=0; e<edges.size(); e++)
- newPoint[index(edges.get(e), tag, false)] = toProject[e];
- }
- }
- }
- else
- {
- // do above in parallel using thread pool
- for (int ctx = 0; ctx < c.c.getNumContexts(); ++ctx)
- {
- final int context = ctx;
- final double[] inPoint = point;
- Runnable task = new Runnable()
- {
- public void run()
- {
- List<Edge> edges = c.c.getEdgesForContext(context);
- double toProject[] = new double[edges.size()];
- for(int tag=0;tag<c.K;tag++)
- {
- // FIXME: slow hash lookup for e
- for(int e=0; e<edges.size(); e++)
- toProject[e] = inPoint[index(edges.get(e), tag, false)];
- projectionContext.project(toProject);
- for(int e=0; e<edges.size(); e++)
- newPoint[index(edges.get(e), tag, false)] = toProject[e];
- }
- }
- };
- tasks.add(pool.submit(task));
- }
- }
-
- if (pool != null)
- {
- // wait for all the jobs to complete
- Exception failure = null;
- for (Future<?> task: tasks)
- {
- try {
- task.get();
- } catch (InterruptedException e) {
- System.err.println("ERROR: Projection thread interrupted");
- e.printStackTrace();
- failure = e;
- } catch (ExecutionException e) {
- System.err.println("ERROR: Projection thread died");
- e.printStackTrace();
- failure = e;
- }
- }
- // rethrow the exception
- if (failure != null)
- {
- pool.shutdownNow();
- throw new RuntimeException(failure);
- }
- }
-
- double[] tmp = newPoint;
- newPoint = point;
- projectionTime += System.currentTimeMillis() - begin;
-
- //if (debug)
- //System.out.println("\t\treturning " + Arrays.toString(tmp));
- return tmp;
- }
-
- private int index(Edge edge, int tag, boolean phrase)
- {
- // NB if indexing changes must also change code in updateFunction and constructor
- if (phrase)
- return tag * edgeIndex.size() + edgeIndex.get(edge);
- else
- return (c.K + tag) * edgeIndex.size() + edgeIndex.get(edge);
- }
-
- private int index(int e, int tag, boolean phrase)
- {
- // NB if indexing changes must also change code in updateFunction and constructor
- if (phrase)
- return tag * edgeIndex.size() + e;
- else
- return (c.K + tag) * edgeIndex.size() + e;
- }
-
- @Override
- public double[] getGradient() {
- gradientCalls++;
- return gradient;
- }
-
- @Override
- public double getValue() {
- functionCalls++;
- return loglikelihood;
- }
-
- @Override
- public String toString() {
- return "No need for pointless toString";
- }
-
- public double []posterior(int edgeIndex){
- return q[edgeIndex];
- }
-
- public boolean optimizeWithProjectedGradientDescent()
- {
- projectionTime = 0;
- actualProjectionTime = 0;
- objectiveTime = 0;
- long start = System.currentTimeMillis();
-
- LineSearchMethod ls =
- new ArmijoLineSearchMinimizationAlongProjectionArc
- (new InterpolationPickFirstStep(INIT_STEP_SIZE));
- //LineSearchMethod ls = new WolfRuleLineSearch(
- // (new InterpolationPickFirstStep(INIT_STEP_SIZE)), c1, c2);
- OptimizerStats stats = new OptimizerStats();
-
-
- ProjectedGradientDescent optimizer = new ProjectedGradientDescent(ls);
- StopingCriteria stopGrad = new ProjectedGradientL2Norm(GRAD_DIFF);
- StopingCriteria stopValue = new ValueDifference(VAL_DIFF*(-llh));
- CompositeStopingCriteria compositeStop = new CompositeStopingCriteria();
- compositeStop.add(stopGrad);
- compositeStop.add(stopValue);
- optimizer.setMaxIterations(ITERATIONS);
- updateFunction();
- boolean success = optimizer.optimize(this,stats,compositeStop);
-
- System.out.println();
- System.out.println(stats.prettyPrint(1));
-
- if (success)
- System.out.print("\toptimization took " + optimizer.getCurrentIteration() + " iterations");
- else
- System.out.print("\toptimization failed to converge");
- long total = System.currentTimeMillis() - start;
- System.out.println(" and " + total + " ms: projection " + projectionTime +
- " actual " + actualProjectionTime + " objective " + objectiveTime);
-
- return success;
- }
-
- double loglikelihood()
- {
- return llh;
- }
-
- double KL_divergence()
- {
- return -loglikelihood + MathUtils.dotProduct(parameters, gradient);
- }
-
- double phrase_l1lmax()
- {
- // \sum_{tag,phrase} max_{context} P(tag|context,phrase)
- double sum=0;
- for (int p = 0; p < c.c.getNumPhrases(); ++p)
- {
- List<Edge> edges = c.c.getEdgesForPhrase(p);
- for(int tag=0;tag<c.K;tag++)
- {
- double max=0;
- for (Edge edge: edges)
- max = Math.max(max, q[edgeIndex.get(edge)][tag]);
- sum+=max;
- }
- }
- return sum;
- }
-
- double context_l1lmax()
- {
- // \sum_{tag,context} max_{phrase} P(tag|context,phrase)
- double sum=0;
- for (int ctx = 0; ctx < c.c.getNumContexts(); ++ctx)
- {
- List<Edge> edges = c.c.getEdgesForContext(ctx);
- for(int tag=0; tag<c.K; tag++)
- {
- double max=0;
- for (Edge edge: edges)
- max = Math.max(max, q[edgeIndex.get(edge)][tag]);
- sum+=max;
- }
- }
- return sum;
- }
-
- // L - KL(q||p) - scalePT * l1lmax_phrase - scaleCT * l1lmax_context
- public double primal()
- {
- return loglikelihood() - KL_divergence() - scalePT * phrase_l1lmax() - scaleCT * context_l1lmax();
- }
-} \ No newline at end of file
diff --git a/gi/posterior-regularisation/prjava/src/phrase/PhraseCorpus.java b/gi/posterior-regularisation/prjava/src/phrase/PhraseCorpus.java
deleted file mode 100644
index 0cf31c1c..00000000
--- a/gi/posterior-regularisation/prjava/src/phrase/PhraseCorpus.java
+++ /dev/null
@@ -1,193 +0,0 @@
-package phrase;
-
-import io.FileUtil;
-
-import java.io.BufferedInputStream;
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Scanner;
-
-public class PhraseCorpus
-{
- public HashMap<String,Integer>wordLex;
- public HashMap<String,Integer>phraseLex;
-
- public String wordList[];
- public String phraseList[];
-
- //data[phrase][num context][position]
- public int data[][][];
- public int numContexts;
-
- public PhraseCorpus(String filename) throws FileNotFoundException, IOException
- {
- BufferedReader r = FileUtil.reader(new File(filename));
-
- phraseLex=new HashMap<String,Integer>();
- wordLex=new HashMap<String,Integer>();
-
- ArrayList<int[][]>dataList=new ArrayList<int[][]>();
- String line=null;
- numContexts = 0;
-
- while((line=readLine(r))!=null){
-
- String toks[]=line.split("\t");
- String phrase=toks[0];
- addLex(phrase,phraseLex);
-
- toks=toks[1].split(" \\|\\|\\| ");
-
- ArrayList <int[]>ctxList=new ArrayList<int[]>();
-
- for(int i=0;i<toks.length;i+=2){
- String ctx=toks[i];
- String words[]=ctx.split(" ");
- if (numContexts == 0)
- numContexts = words.length - 1;
- else
- assert numContexts == words.length - 1;
-
- int []context=new int [numContexts+1];
- int idx=0;
- for(String word:words){
- if(word.equals("<PHRASE>")){
- continue;
- }
- addLex(word,wordLex);
- context[idx]=wordLex.get(word);
- idx++;
- }
-
- String count=toks[i+1];
- context[idx]=Integer.parseInt(count.trim().substring(2));
-
- ctxList.add(context);
- }
-
- dataList.add(ctxList.toArray(new int [0][]));
-
- }
- try{
- r.close();
- }catch(IOException ioe){
- ioe.printStackTrace();
- }
- data=dataList.toArray(new int[0][][]);
- }
-
- private void addLex(String key, HashMap<String,Integer>lex){
- Integer i=lex.get(key);
- if(i==null){
- lex.put(key, lex.size());
- }
- }
-
- //for debugging
- public void saveLex(String lexFilename) throws FileNotFoundException, IOException
- {
- PrintStream ps = FileUtil.printstream(new File(lexFilename));
- ps.println("Phrase Lexicon");
- ps.println(phraseLex.size());
- printDict(phraseLex,ps);
-
- ps.println("Word Lexicon");
- ps.println(wordLex.size());
- printDict(wordLex,ps);
- ps.close();
- }
-
- private static void printDict(HashMap<String,Integer>lex,PrintStream ps){
- String []dict=buildList(lex);
- for(int i=0;i<dict.length;i++){
- ps.println(dict[i]);
- }
- }
-
- public void loadLex(String lexFilename){
- Scanner sc=io.FileUtil.openInFile(lexFilename);
-
- sc.nextLine();
- int size=sc.nextInt();
- sc.nextLine();
- String[]dict=new String[size];
- for(int i=0;i<size;i++){
- dict[i]=sc.nextLine();
- }
- phraseLex=buildMap(dict);
-
- sc.nextLine();
- size=sc.nextInt();
- sc.nextLine();
- dict=new String[size];
- for(int i=0;i<size;i++){
- dict[i]=sc.nextLine();
- }
- wordLex=buildMap(dict);
- sc.close();
- }
-
- private HashMap<String, Integer> buildMap(String[]dict){
- HashMap<String,Integer> map=new HashMap<String,Integer>();
- for(int i=0;i<dict.length;i++){
- map.put(dict[i], i);
- }
- return map;
- }
-
- public void buildList(){
- if(wordList==null){
- wordList=buildList(wordLex);
- phraseList=buildList(phraseLex);
- }
- }
-
- private static String[]buildList(HashMap<String,Integer>lex){
- String dict[]=new String [lex.size()];
- for(String key:lex.keySet()){
- dict[lex.get(key)]=key;
- }
- return dict;
- }
-
- public String getContextString(int context[], boolean addPhraseMarker)
- {
- StringBuffer b = new StringBuffer();
- for (int i=0;i<context.length-1;i++)
- {
- if (b.length() > 0)
- b.append(" ");
-
- if (i == context.length/2)
- b.append("<PHRASE> ");
-
- b.append(wordList[context[i]]);
- }
- return b.toString();
- }
-
- public static String readLine(BufferedReader r){
- try{
- return r.readLine();
- }
- catch(IOException ioe){
- ioe.printStackTrace();
- }
- return null;
- }
-
- public static void main(String[] args) throws Exception
- {
- String LEX_FILENAME="../pdata/lex.out";
- String DATA_FILENAME="../pdata/btec.con";
- PhraseCorpus c=new PhraseCorpus(DATA_FILENAME);
- c.saveLex(LEX_FILENAME);
- c.loadLex(LEX_FILENAME);
- c.saveLex(LEX_FILENAME);
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/phrase/PhraseObjective.java b/gi/posterior-regularisation/prjava/src/phrase/PhraseObjective.java
deleted file mode 100644
index ac73a075..00000000
--- a/gi/posterior-regularisation/prjava/src/phrase/PhraseObjective.java
+++ /dev/null
@@ -1,224 +0,0 @@
-package phrase;
-
-import java.util.Arrays;
-import java.util.List;
-
-import optimization.gradientBasedMethods.ProjectedGradientDescent;
-import optimization.gradientBasedMethods.ProjectedObjective;
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.linesearch.ArmijoLineSearchMinimizationAlongProjectionArc;
-import optimization.linesearch.InterpolationPickFirstStep;
-import optimization.linesearch.LineSearchMethod;
-import optimization.linesearch.WolfRuleLineSearch;
-import optimization.projections.SimplexProjection;
-import optimization.stopCriteria.CompositeStopingCriteria;
-import optimization.stopCriteria.ProjectedGradientL2Norm;
-import optimization.stopCriteria.StopingCriteria;
-import optimization.stopCriteria.ValueDifference;
-import optimization.util.MathUtils;
-
-public class PhraseObjective extends ProjectedObjective
-{
- static final double GRAD_DIFF = 0.00002;
- static double INIT_STEP_SIZE = 300;
- static double VAL_DIFF = 1e-8; // tuned to BTEC subsample
- static int ITERATIONS = 100;
- private PhraseCluster c;
-
- /**@brief
- * for debugging purposes
- */
- //public static PrintStream ps;
-
- /**@brief current phrase being optimzed*/
- public int phrase;
-
- /**@brief un-regularized posterior
- * unnormalized
- * p[edge][tag]
- * P(tag|edge) \propto P(tag|phrase)P(context|tag)
- */
- private double[][]p;
-
- /**@brief regularized posterior
- * q[edge][tag] propto p[edge][tag]*exp(-lambda)
- */
- private double q[][];
- private List<Corpus.Edge> data;
-
- /**@brief log likelihood of the associated phrase
- *
- */
- private double loglikelihood;
- private SimplexProjection projection;
-
- double[] newPoint ;
-
- private int n_param;
-
- /**@brief likelihood under p
- *
- */
- public double llh;
-
- public PhraseObjective(PhraseCluster cluster, int phraseIdx, double scale, double[] lambda){
- phrase=phraseIdx;
- c=cluster;
- data=c.c.getEdgesForPhrase(phrase);
- n_param=data.size()*c.K;
- //System.out.println("Num parameters " + n_param + " for phrase #" + phraseIdx);
-
- if (lambda==null)
- lambda=new double[n_param];
-
- parameters = lambda;
- newPoint = new double[n_param];
- gradient = new double[n_param];
- initP();
- projection=new SimplexProjection(scale);
- q=new double [data.size()][c.K];
-
- setParameters(parameters);
- }
-
- private void initP(){
- p=new double[data.size()][];
- for(int edge=0;edge<data.size();edge++){
- p[edge]=c.posterior(data.get(edge));
- llh += data.get(edge).getCount() * Math.log(arr.F.l1norm(p[edge])); // Was bug here - count inside log!
- arr.F.l1normalize(p[edge]);
- }
- }
-
- @Override
- public void setParameters(double[] params) {
- super.setParameters(params);
- updateFunction();
- }
-
- private void updateFunction(){
- updateCalls++;
- loglikelihood=0;
-
- for(int tag=0;tag<c.K;tag++){
- for(int edge=0;edge<data.size();edge++){
- q[edge][tag]=p[edge][tag]*
- Math.exp(-parameters[tag*data.size()+edge]/data.get(edge).getCount());
- }
- }
-
- for(int edge=0;edge<data.size();edge++){
- loglikelihood+=data.get(edge).getCount() * Math.log(arr.F.l1norm(q[edge]));
- arr.F.l1normalize(q[edge]);
- }
-
- for(int tag=0;tag<c.K;tag++){
- for(int edge=0;edge<data.size();edge++){
- gradient[tag*data.size()+edge]=-q[edge][tag];
- }
- }
- }
-
- @Override
- public double[] projectPoint(double[] point)
- {
- double toProject[]=new double[data.size()];
- for(int tag=0;tag<c.K;tag++){
- for(int edge=0;edge<data.size();edge++){
- toProject[edge]=point[tag*data.size()+edge];
- }
- projection.project(toProject);
- for(int edge=0;edge<data.size();edge++){
- newPoint[tag*data.size()+edge]=toProject[edge];
- }
- }
- return newPoint;
- }
-
- @Override
- public double[] getGradient() {
- gradientCalls++;
- return gradient;
- }
-
- @Override
- public double getValue() {
- functionCalls++;
- return loglikelihood;
- }
-
- @Override
- public String toString() {
- return Arrays.toString(parameters);
- }
-
- public double [][]posterior(){
- return q;
- }
-
- long optimizationTime;
-
- public boolean optimizeWithProjectedGradientDescent(){
- long start = System.currentTimeMillis();
-
- LineSearchMethod ls =
- new ArmijoLineSearchMinimizationAlongProjectionArc
- (new InterpolationPickFirstStep(INIT_STEP_SIZE));
- //LineSearchMethod ls = new WolfRuleLineSearch(
- // (new InterpolationPickFirstStep(INIT_STEP_SIZE)), c1, c2);
- OptimizerStats stats = new OptimizerStats();
-
-
- ProjectedGradientDescent optimizer = new ProjectedGradientDescent(ls);
- StopingCriteria stopGrad = new ProjectedGradientL2Norm(GRAD_DIFF);
- StopingCriteria stopValue = new ValueDifference(VAL_DIFF*(-llh));
- CompositeStopingCriteria compositeStop = new CompositeStopingCriteria();
- compositeStop.add(stopGrad);
- compositeStop.add(stopValue);
- optimizer.setMaxIterations(ITERATIONS);
- updateFunction();
- boolean success = optimizer.optimize(this,stats,compositeStop);
- //System.out.println("Ended optimzation Projected Gradient Descent\n" + stats.prettyPrint(1));
- //if(succed){
- //System.out.println("Ended optimization in " + optimizer.getCurrentIteration());
- //}else{
-// System.out.println("Failed to optimize");
- //}
- //System.out.println(Arrays.toString(parameters));
-
- // for(int edge=0;edge<data.getSize();edge++){
- // ps.println(Arrays.toString(q[edge]));
- // }
-
- return success;
- }
-
- public double KL_divergence()
- {
- return -loglikelihood + MathUtils.dotProduct(parameters, gradient);
- }
-
- public double loglikelihood()
- {
- return llh;
- }
-
- public double l1lmax()
- {
- double sum=0;
- for(int tag=0;tag<c.K;tag++){
- double max=0;
- for(int edge=0;edge<data.size();edge++){
- if(q[edge][tag]>max)
- max=q[edge][tag];
- }
- sum+=max;
- }
- return sum;
- }
-
- public double primal(double scale)
- {
- return loglikelihood() - KL_divergence() - scale * l1lmax();
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/phrase/Trainer.java b/gi/posterior-regularisation/prjava/src/phrase/Trainer.java
deleted file mode 100644
index 6f302b20..00000000
--- a/gi/posterior-regularisation/prjava/src/phrase/Trainer.java
+++ /dev/null
@@ -1,257 +0,0 @@
-package phrase;
-
-import io.FileUtil;
-import joptsimple.OptionParser;
-import joptsimple.OptionSet;
-import java.io.File;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.List;
-import java.util.Random;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-
-import phrase.Corpus.Edge;
-
-import arr.F;
-
-public class Trainer
-{
- public static void main(String[] args)
- {
- OptionParser parser = new OptionParser();
- parser.accepts("help");
- parser.accepts("in").withRequiredArg().ofType(File.class);
- parser.accepts("in1").withRequiredArg().ofType(File.class);
- parser.accepts("test").withRequiredArg().ofType(File.class);
- parser.accepts("out").withRequiredArg().ofType(File.class);
- parser.accepts("start").withRequiredArg().ofType(File.class);
- parser.accepts("parameters").withRequiredArg().ofType(File.class);
- parser.accepts("topics").withRequiredArg().ofType(Integer.class).defaultsTo(5);
- parser.accepts("iterations").withRequiredArg().ofType(Integer.class).defaultsTo(10);
- parser.accepts("threads").withRequiredArg().ofType(Integer.class).defaultsTo(0);
- parser.accepts("scale-phrase").withRequiredArg().ofType(Double.class).defaultsTo(0.0);
- parser.accepts("scale-context").withRequiredArg().ofType(Double.class).defaultsTo(0.0);
- parser.accepts("seed").withRequiredArg().ofType(Long.class).defaultsTo(0l);
- parser.accepts("convergence-threshold").withRequiredArg().ofType(Double.class).defaultsTo(1e-6);
- parser.accepts("variational-bayes");
- parser.accepts("alpha-emit").withRequiredArg().ofType(Double.class).defaultsTo(0.1);
- parser.accepts("alpha-pi").withRequiredArg().ofType(Double.class).defaultsTo(0.0001);
- parser.accepts("agree-direction");
- parser.accepts("agree-language");
- parser.accepts("no-parameter-cache");
- parser.accepts("skip-large-phrases").withRequiredArg().ofType(Integer.class).defaultsTo(5);
- OptionSet options = parser.parse(args);
-
- if (options.has("help") || !options.has("in"))
- {
- try {
- parser.printHelpOn(System.err);
- } catch (IOException e) {
- System.err.println("This should never happen.");
- e.printStackTrace();
- }
- System.exit(1);
- }
-
- int tags = (Integer) options.valueOf("topics");
- int iterations = (Integer) options.valueOf("iterations");
- double scale_phrase = (Double) options.valueOf("scale-phrase");
- double scale_context = (Double) options.valueOf("scale-context");
- int threads = (Integer) options.valueOf("threads");
- double threshold = (Double) options.valueOf("convergence-threshold");
- boolean vb = options.has("variational-bayes");
- double alphaEmit = (vb) ? (Double) options.valueOf("alpha-emit") : 0;
- double alphaPi = (vb) ? (Double) options.valueOf("alpha-pi") : 0;
- int skip = (Integer) options.valueOf("skip-large-phrases");
-
- if (options.has("seed"))
- F.rng = new Random((Long) options.valueOf("seed"));
-
- ExecutorService threadPool = null;
- if (threads > 0)
- threadPool = Executors.newFixedThreadPool(threads);
-
- if (tags <= 1 || scale_phrase < 0 || scale_context < 0 || threshold < 0)
- {
- System.err.println("Invalid arguments. Try again!");
- System.exit(1);
- }
-
- Corpus corpus = null;
- File infile = (File) options.valueOf("in");
- Corpus corpus1 = null;
- File infile1 = (File) options.valueOf("in1");
- try {
- System.out.println("Reading concordance from " + infile);
- corpus = Corpus.readFromFile(FileUtil.reader(infile));
- corpus.printStats(System.out);
- if(options.has("in1")){
- corpus1 = Corpus.readFromFile(FileUtil.reader(infile1));
- corpus1.printStats(System.out);
- }
- } catch (IOException e) {
- System.err.println("Failed to open input file: " + infile);
- e.printStackTrace();
- System.exit(1);
- }
-
- if (!(options.has("agree-direction")||options.has("agree-language")))
- System.out.println("Running with " + tags + " tags " +
- "for " + iterations + " iterations " +
- ((skip > 0) ? "skipping large phrases for first " + skip + " iterations " : "") +
- "with scale " + scale_phrase + " phrase and " + scale_context + " context " +
- "and " + threads + " threads");
- else
- System.out.println("Running agreement model with " + tags + " tags " +
- "for " + iterations);
-
- System.out.println();
-
- PhraseCluster cluster = null;
- Agree2Sides agree2sides = null;
- Agree agree= null;
- VB vbModel=null;
- if (options.has("agree-language"))
- agree2sides = new Agree2Sides(tags, corpus,corpus1);
- else if (options.has("agree-direction"))
- agree = new Agree(tags, corpus);
- else
- {
- if (vb)
- {
- vbModel=new VB(tags,corpus);
- vbModel.alpha=alphaPi;
- vbModel.lambda=alphaEmit;
- if (threadPool != null) vbModel.useThreadPool(threadPool);
- }
- else
- {
- cluster = new PhraseCluster(tags, corpus);
- if (threadPool != null) cluster.useThreadPool(threadPool);
-
- if (options.has("no-parameter-cache"))
- cluster.cacheLambda = false;
- if (options.has("start"))
- {
- try {
- System.err.println("Reading starting parameters from " + options.valueOf("start"));
- cluster.loadParameters(FileUtil.reader((File)options.valueOf("start")));
- } catch (IOException e) {
- System.err.println("Failed to open input file: " + options.valueOf("start"));
- e.printStackTrace();
- }
- }
- }
- }
-
- double last = 0;
- for (int i=0; i < iterations; i++)
- {
- double o;
- if (agree != null)
- o = agree.EM();
- else if(agree2sides!=null)
- o = agree2sides.EM();
- else
- {
- if (i < skip)
- System.out.println("Skipping phrases of length > " + (i+1));
-
- if (scale_phrase <= 0 && scale_context <= 0)
- {
- if (!vb)
- o = cluster.EM((i < skip) ? i+1 : 0);
- else
- o = vbModel.EM();
- }
- else
- o = cluster.PREM(scale_phrase, scale_context, (i < skip) ? i+1 : 0);
- }
-
- System.out.println("ITER: "+i+" objective: " + o);
-
- // sometimes takes a few iterations to break the ties
- if (i > 5 && Math.abs((o - last) / o) < threshold)
- {
- last = o;
- break;
- }
- last = o;
- }
-
- double pl1lmax = 0, cl1lmax = 0;
- if (cluster != null)
- {
- pl1lmax = cluster.phrase_l1lmax();
- cl1lmax = cluster.context_l1lmax();
- }
- else if (agree != null)
- {
- // fairly arbitrary choice of model1 cf model2
- pl1lmax = agree.model1.phrase_l1lmax();
- cl1lmax = agree.model1.context_l1lmax();
- }
- else if (agree2sides != null)
- {
- // fairly arbitrary choice of model1 cf model2
- pl1lmax = agree2sides.model1.phrase_l1lmax();
- cl1lmax = agree2sides.model1.context_l1lmax();
- }
-
- System.out.println("\nFinal posterior phrase l1lmax " + pl1lmax + " context l1lmax " + cl1lmax);
-
- if (options.has("out"))
- {
- File outfile = (File) options.valueOf("out");
- try {
- PrintStream ps = FileUtil.printstream(outfile);
- List<Edge> test;
- if (!options.has("test")) // just use the training
- test = corpus.getEdges();
- else
- { // if --test supplied, load up the file
- infile = (File) options.valueOf("test");
- System.out.println("Reading testing concordance from " + infile);
- test = corpus.readEdges(FileUtil.reader(infile));
- }
- if(vb) {
- assert !options.has("test");
- vbModel.displayPosterior(ps);
- } else if (cluster != null)
- cluster.displayPosterior(ps, test);
- else if (agree != null)
- agree.displayPosterior(ps, test);
- else if (agree2sides != null) {
- assert !options.has("test");
- agree2sides.displayPosterior(ps);
- }
-
- ps.close();
- } catch (IOException e) {
- System.err.println("Failed to open either testing file or output file");
- e.printStackTrace();
- System.exit(1);
- }
- }
-
- if (options.has("parameters"))
- {
- assert !vb;
- File outfile = (File) options.valueOf("parameters");
- PrintStream ps;
- try {
- ps = FileUtil.printstream(outfile);
- cluster.displayModelParam(ps);
- ps.close();
- } catch (IOException e) {
- System.err.println("Failed to open output parameters file: " + outfile);
- e.printStackTrace();
- System.exit(1);
- }
- }
-
- if (cluster != null && cluster.pool != null)
- cluster.pool.shutdown();
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/phrase/VB.java b/gi/posterior-regularisation/prjava/src/phrase/VB.java
deleted file mode 100644
index cd3f4966..00000000
--- a/gi/posterior-regularisation/prjava/src/phrase/VB.java
+++ /dev/null
@@ -1,419 +0,0 @@
-package phrase;
-
-import gnu.trove.TIntArrayList;
-
-import io.FileUtil;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.concurrent.Callable;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Future;
-
-import org.apache.commons.math.special.Gamma;
-
-import phrase.Corpus.Edge;
-
-public class VB {
-
- public static int MAX_ITER=400;
-
- /**@brief
- * hyper param for beta
- * where beta is multinomial
- * for generating words from a topic
- */
- public double lambda=0.1;
- /**@brief
- * hyper param for theta
- * where theta is dirichlet for z
- */
- public double alpha=0.0001;
- /**@brief
- * variational param for beta
- */
- private double rho[][][];
- private double digamma_rho[][][];
- private double rho_sum[][];
- /**@brief
- * variational param for z
- */
- //private double phi[][];
- /**@brief
- * variational param for theta
- */
- private double gamma[];
- private static double VAL_DIFF_RATIO=0.005;
-
- private int n_positions;
- private int n_words;
- private int K;
- private ExecutorService pool;
-
- private Corpus c;
- public static void main(String[] args) {
- // String in="../pdata/canned.con";
- String in="../pdata/btec.con";
- String out="../pdata/vb.out";
- int numCluster=25;
- Corpus corpus = null;
- File infile = new File(in);
- try {
- System.out.println("Reading concordance from " + infile);
- corpus = Corpus.readFromFile(FileUtil.reader(infile));
- corpus.printStats(System.out);
- } catch (IOException e) {
- System.err.println("Failed to open input file: " + infile);
- e.printStackTrace();
- System.exit(1);
- }
-
- VB vb=new VB(numCluster, corpus);
- int iter=20;
- for(int i=0;i<iter;i++){
- double obj=vb.EM();
- System.out.println("Iter "+i+": "+obj);
- }
-
- File outfile = new File (out);
- try {
- PrintStream ps = FileUtil.printstream(outfile);
- vb.displayPosterior(ps);
- // ps.println();
- // c2f.displayModelParam(ps);
- ps.close();
- } catch (IOException e) {
- System.err.println("Failed to open output file: " + outfile);
- e.printStackTrace();
- System.exit(1);
- }
- }
-
- public VB(int numCluster, Corpus corpus){
- c=corpus;
- K=numCluster;
- n_positions=c.getNumContextPositions();
- n_words=c.getNumWords();
- rho=new double[K][n_positions][n_words];
- //to init rho
- //loop through data and count up words
- double[] phi_tmp=new double[K];
- for(int i=0;i<K;i++){
- for(int pos=0;pos<n_positions;pos++){
- Arrays.fill(rho[i][pos], lambda);
- }
- }
- for(int d=0;d<c.getNumPhrases();d++){
- List<Edge>doc=c.getEdgesForPhrase(d);
- for(int n=0;n<doc.size();n++){
- TIntArrayList context=doc.get(n).getContext();
- arr.F.randomise(phi_tmp);
- for(int i=0;i<K;i++){
- for(int pos=0;pos<n_positions;pos++){
- rho[i][pos][context.get(pos)]+=phi_tmp[i];
- }
- }
- }
- }
-
- }
-
- private double inference(int phraseID, double[][] phi, double[] gamma)
- {
- List<Edge > doc=c.getEdgesForPhrase(phraseID);
- for(int i=0;i<phi.length;i++){
- for(int j=0;j<phi[i].length;j++){
- phi[i][j]=1.0/K;
- }
- }
- Arrays.fill(gamma,alpha+1.0/K);
-
- double digamma_gamma[]=new double[K];
-
- double gamma_sum=digamma(arr.F.l1norm(gamma));
- for(int i=0;i<K;i++){
- digamma_gamma[i]=digamma(gamma[i]);
- }
- double gammaSum[]=new double [K];
- double prev_val=0;
- double obj=0;
-
- for(int iter=0;iter<MAX_ITER;iter++){
- prev_val=obj;
- obj=0;
- Arrays.fill(gammaSum,0.0);
- for(int n=0;n<doc.size();n++){
- TIntArrayList context=doc.get(n).getContext();
- double phisum=0;
- for(int i=0;i<K;i++){
- double sum=0;
- for(int pos=0;pos<n_positions;pos++){
- int word=context.get(pos);
- sum+=digamma_rho[i][pos][word]-rho_sum[i][pos];
- }
- sum+= digamma_gamma[i]-gamma_sum;
- phi[n][i]=sum;
-
- if (i > 0){
- phisum = log_sum(phisum, phi[n][i]);
- }
- else{
- phisum = phi[n][i];
- }
-
- }//end of a word
-
- for(int i=0;i<K;i++){
- phi[n][i]=Math.exp(phi[n][i]-phisum);
- gammaSum[i]+=phi[n][i];
- }
-
- }//end of doc
-
- for(int i=0;i<K;i++){
- gamma[i]=alpha+gammaSum[i];
- }
- gamma_sum=digamma(arr.F.l1norm(gamma));
- for(int i=0;i<K;i++){
- digamma_gamma[i]=digamma(gamma[i]);
- }
- //compute objective for reporting
-
- obj=0;
-
- for(int i=0;i<K;i++){
- obj+=(alpha-1)*(digamma_gamma[i]-gamma_sum);
- }
-
-
- for(int n=0;n<doc.size();n++){
- TIntArrayList context=doc.get(n).getContext();
-
- for(int i=0;i<K;i++){
- //entropy of phi + expected log likelihood of z
- obj+=phi[n][i]*(digamma_gamma[i]-gamma_sum);
-
- if(phi[n][i]>1e-10){
- obj+=phi[n][i]*Math.log(phi[n][i]);
- }
-
- double beta_sum=0;
- for(int pos=0;pos<n_positions;pos++){
- int word=context.get(pos);
- beta_sum+=(digamma(rho[i][pos][word])-rho_sum[i][pos]);
- }
- obj+=phi[n][i]*beta_sum;
- }
- }
-
- obj-=log_gamma(arr.F.l1norm(gamma));
- for(int i=0;i<K;i++){
- obj+=Gamma.logGamma(gamma[i]);
- obj-=(gamma[i]-1)*(digamma_gamma[i]-gamma_sum);
- }
-
-// System.out.println(phraseID+": "+obj);
- if(iter>0 && (obj-prev_val)/Math.abs(obj)<VAL_DIFF_RATIO){
- break;
- }
- }//end of inference loop
-
- return obj;
- }//end of inference
-
- /**
- * @return objective of this iteration
- */
- public double EM(){
- double emObj=0;
- if(digamma_rho==null){
- digamma_rho=new double[K][n_positions][n_words];
- }
- for(int i=0;i<K;i++){
- for (int pos=0;pos<n_positions;pos++){
- for(int j=0;j<n_words;j++){
- digamma_rho[i][pos][j]= digamma(rho[i][pos][j]);
- }
- }
- }
-
- if(rho_sum==null){
- rho_sum=new double [K][n_positions];
- }
- for(int i=0;i<K;i++){
- for(int pos=0;pos<n_positions;pos++){
- rho_sum[i][pos]=digamma(arr.F.l1norm(rho[i][pos]));
- }
- }
-
- //E
- double exp_rho[][][]=new double[K][n_positions][n_words];
- if (pool == null)
- {
- for (int d=0;d<c.getNumPhrases();d++)
- {
- List<Edge > doc=c.getEdgesForPhrase(d);
- double[][] phi = new double[doc.size()][K];
- double[] gamma = new double[K];
-
- emObj += inference(d, phi, gamma);
-
- for(int n=0;n<doc.size();n++){
- TIntArrayList context=doc.get(n).getContext();
- for(int pos=0;pos<n_positions;pos++){
- int word=context.get(pos);
- for(int i=0;i<K;i++){
- exp_rho[i][pos][word]+=phi[n][i];
- }
- }
- }
- //if(d!=0 && d%100==0) System.out.print(".");
- //if(d!=0 && d%1000==0) System.out.println(d);
- }
- }
- else // multi-threaded version of above loop
- {
- class PartialEStep implements Callable<PartialEStep>
- {
- double[][] phi;
- double[] gamma;
- double obj;
- int d;
- PartialEStep(int d) { this.d = d; }
-
- public PartialEStep call()
- {
- phi = new double[c.getEdgesForPhrase(d).size()][K];
- gamma = new double[K];
- obj = inference(d, phi, gamma);
- return this;
- }
- }
-
- List<Future<PartialEStep>> jobs = new ArrayList<Future<PartialEStep>>();
- for (int d=0;d<c.getNumPhrases();d++)
- jobs.add(pool.submit(new PartialEStep(d)));
-
- for (Future<PartialEStep> job: jobs)
- {
- try {
- PartialEStep e = job.get();
-
- emObj += e.obj;
- List<Edge> doc = c.getEdgesForPhrase(e.d);
- for(int n=0;n<doc.size();n++){
- TIntArrayList context=doc.get(n).getContext();
- for(int pos=0;pos<n_positions;pos++){
- int word=context.get(pos);
- for(int i=0;i<K;i++){
- exp_rho[i][pos][word]+=e.phi[n][i];
- }
- }
- }
- } catch (ExecutionException e) {
- System.err.println("ERROR: E-step thread execution failed.");
- throw new RuntimeException(e);
- } catch (InterruptedException e) {
- System.err.println("ERROR: Failed to join E-step thread.");
- throw new RuntimeException(e);
- }
- }
- }
- // System.out.println("EM Objective:"+emObj);
-
- //M
- for(int i=0;i<K;i++){
- for(int pos=0;pos<n_positions;pos++){
- for(int j=0;j<n_words;j++){
- rho[i][pos][j]=lambda+exp_rho[i][pos][j];
- }
- }
- }
-
- //E[\log p(\beta|\lambda)] - E[\log q(\beta)]
- for(int i=0;i<K;i++){
- double rhoSum=0;
- for(int pos=0;pos<n_positions;pos++){
- for(int j=0;j<n_words;j++){
- rhoSum+=rho[i][pos][j];
- }
- double digamma_rhoSum=Gamma.digamma(rhoSum);
- emObj-=Gamma.logGamma(rhoSum);
- for(int j=0;j<n_words;j++){
- emObj+=(lambda-rho[i][pos][j])*(Gamma.digamma(rho[i][pos][j])-digamma_rhoSum);
- emObj+=Gamma.logGamma(rho[i][pos][j]);
- }
- }
- }
-
- return emObj;
- }//end of EM
-
- public void displayPosterior(PrintStream ps)
- {
- for(int d=0;d<c.getNumPhrases();d++){
- List<Edge > doc=c.getEdgesForPhrase(d);
- double[][] phi = new double[doc.size()][K];
- for(int i=0;i<phi.length;i++)
- for(int j=0;j<phi[i].length;j++)
- phi[i][j]=1.0/K;
- double[] gamma = new double[K];
-
- inference(d, phi, gamma);
-
- for(int n=0;n<doc.size();n++){
- Edge edge=doc.get(n);
- int tag=arr.F.argmax(phi[n]);
- ps.print(edge.getPhraseString());
- ps.print("\t");
- ps.print(edge.getContextString(true));
-
- ps.println(" ||| C=" + tag);
- }
- }
- }
-
- double log_sum(double log_a, double log_b)
- {
- double v;
-
- if (log_a < log_b)
- v = log_b+Math.log(1 + Math.exp(log_a-log_b));
- else
- v = log_a+Math.log(1 + Math.exp(log_b-log_a));
- return(v);
- }
-
- double digamma(double x)
- {
- double p;
- x=x+6;
- p=1/(x*x);
- p=(((0.004166666666667*p-0.003968253986254)*p+
- 0.008333333333333)*p-0.083333333333333)*p;
- p=p+Math.log(x)-0.5/x-1/(x-1)-1/(x-2)-1/(x-3)-1/(x-4)-1/(x-5)-1/(x-6);
- return p;
- }
-
- double log_gamma(double x)
- {
- double z=1/(x*x);
-
- x=x+6;
- z=(((-0.000595238095238*z+0.000793650793651)
- *z-0.002777777777778)*z+0.083333333333333)/x;
- z=(x-0.5)*Math.log(x)-x+0.918938533204673+z-Math.log(x-1)-
- Math.log(x-2)-Math.log(x-3)-Math.log(x-4)-Math.log(x-5)-Math.log(x-6);
- return z;
- }
-
- public void useThreadPool(ExecutorService threadPool)
- {
- pool = threadPool;
- }
-}//End of class
diff --git a/gi/posterior-regularisation/prjava/src/test/CorpusTest.java b/gi/posterior-regularisation/prjava/src/test/CorpusTest.java
deleted file mode 100644
index b4c3041f..00000000
--- a/gi/posterior-regularisation/prjava/src/test/CorpusTest.java
+++ /dev/null
@@ -1,60 +0,0 @@
-package test;
-
-import java.util.Arrays;
-import java.util.HashMap;
-
-import data.Corpus;
-import hmm.POS;
-
-public class CorpusTest {
-
- public static void main(String[] args) {
- Corpus c=new Corpus(POS.trainFilename);
-
-
- int idx=30;
-
-
- HashMap<String, Integer>vocab=
- (HashMap<String, Integer>) io.SerializedObjects.readSerializedObject(Corpus.alphaFilename);
-
- HashMap<String, Integer>tagVocab=
- (HashMap<String, Integer>) io.SerializedObjects.readSerializedObject(Corpus.tagalphaFilename);
-
-
- String [] dict=new String [vocab.size()+1];
- for(String key:vocab.keySet()){
- dict[vocab.get(key)]=key;
- }
- dict[dict.length-1]=Corpus.UNK_TOK;
-
- String [] tagdict=new String [tagVocab.size()+1];
- for(String key:tagVocab.keySet()){
- tagdict[tagVocab.get(key)]=key;
- }
- tagdict[tagdict.length-1]=Corpus.UNK_TOK;
-
- String[] sent=c.get(idx);
- int []data=c.getInt(idx);
-
-
- String []roundtrip=new String [sent.length];
- for(int i=0;i<sent.length;i++){
- roundtrip[i]=dict[data[i]];
- }
- System.out.println(Arrays.toString(sent));
- System.out.println(Arrays.toString(roundtrip));
-
- sent=c.tag.get(idx);
- data=c.tagData.get(idx);
-
-
- roundtrip=new String [sent.length];
- for(int i=0;i<sent.length;i++){
- roundtrip[i]=tagdict[data[i]];
- }
- System.out.println(Arrays.toString(sent));
- System.out.println(Arrays.toString(roundtrip));
- }
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/test/HMMModelStats.java b/gi/posterior-regularisation/prjava/src/test/HMMModelStats.java
deleted file mode 100644
index d54525c8..00000000
--- a/gi/posterior-regularisation/prjava/src/test/HMMModelStats.java
+++ /dev/null
@@ -1,105 +0,0 @@
-package test;
-
-import hmm.HMM;
-import hmm.POS;
-
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-
-import data.Corpus;
-
-public class HMMModelStats {
-
- public static String modelFilename="../posdata/posModel.out";
- public static String alphaFilename="../posdata/corpus.alphabet";
- public static String statsFilename="../posdata/model.stats";
-
- public static final int NUM_WORD=50;
-
- public static String testFilename="../posdata/en_test.conll";
-
- public static double [][]maxwt;
-
- public static void main(String[] args) {
- HashMap<String, Integer>vocab=
- (HashMap<String, Integer>) io.SerializedObjects.readSerializedObject(alphaFilename);
-
- Corpus test=new Corpus(testFilename,vocab);
-
- String [] dict=new String [vocab.size()+1];
- for(String key:vocab.keySet()){
- dict[vocab.get(key)]=key;
- }
- dict[dict.length-1]=Corpus.UNK_TOK;
-
- HMM hmm=new HMM();
- hmm.readModel(modelFilename);
-
-
-
- PrintStream ps = null;
- try {
- ps = io.FileUtil.printstream(new File(statsFilename));
- } catch (IOException e) {
- e.printStackTrace();
- System.exit(1);
- }
-
- double [][] emit=hmm.getEmitProb();
- for(int i=0;i<emit.length;i++){
- ArrayList<IntDoublePair>l=new ArrayList<IntDoublePair>();
- for(int j=0;j<emit[i].length;j++){
- l.add(new IntDoublePair(j,emit[i][j]));
- }
- Collections.sort(l);
- ps.println(i);
- for(int j=0;j<NUM_WORD;j++){
- if(j>=dict.length){
- break;
- }
- ps.print(dict[l.get(j).idx]+"\t");
- if((1+j)%10==0){
- ps.println();
- }
- }
- ps.println("\n");
- }
-
- checkMaxwt(hmm,ps,test.getAllData());
-
- int terminalSym=vocab.get(Corpus .END_SYM);
- //sample 10 sentences
- for(int i=0;i<10;i++){
- int []sent=hmm.sample(terminalSym);
- for(int j=0;j<sent.length;j++){
- ps.print(dict[sent[j]]+"\t");
- }
- ps.println();
- }
-
- ps.close();
-
- }
-
- public static void checkMaxwt(HMM hmm,PrintStream ps,int [][]data){
- double [][]emit=hmm.getEmitProb();
- maxwt=new double[emit.length][emit[0].length];
-
- hmm.computeMaxwt(maxwt,data);
- double sum=0;
- for(int i=0;i<maxwt.length;i++){
- for(int j=0;j<maxwt.length;j++){
- sum+=maxwt[i][j];
- }
- }
-
- ps.println("max w t P(w_i|t): "+sum);
-
- }
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/test/IntDoublePair.java b/gi/posterior-regularisation/prjava/src/test/IntDoublePair.java
deleted file mode 100644
index 3f9f0ad7..00000000
--- a/gi/posterior-regularisation/prjava/src/test/IntDoublePair.java
+++ /dev/null
@@ -1,23 +0,0 @@
-package test;
-
-public class IntDoublePair implements Comparable{
- double val;
- int idx;
- public int compareTo(Object o){
- if(o instanceof IntDoublePair){
- IntDoublePair pair=(IntDoublePair)o;
- if(pair.val>val){
- return 1;
- }
- if(pair.val<val){
- return -1;
- }
- return 0;
- }
- return -1;
- }
- public IntDoublePair(int i,double v){
- val=v;
- idx=i;
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/test/X2y2WithConstraints.java b/gi/posterior-regularisation/prjava/src/test/X2y2WithConstraints.java
deleted file mode 100644
index 9059a59e..00000000
--- a/gi/posterior-regularisation/prjava/src/test/X2y2WithConstraints.java
+++ /dev/null
@@ -1,131 +0,0 @@
-package test;
-
-
-
-import optimization.gradientBasedMethods.ProjectedGradientDescent;
-import optimization.gradientBasedMethods.ProjectedObjective;
-import optimization.gradientBasedMethods.stats.OptimizerStats;
-import optimization.linesearch.ArmijoLineSearchMinimizationAlongProjectionArc;
-import optimization.linesearch.InterpolationPickFirstStep;
-import optimization.linesearch.LineSearchMethod;
-import optimization.projections.BoundsProjection;
-import optimization.projections.Projection;
-import optimization.projections.SimplexProjection;
-import optimization.stopCriteria.CompositeStopingCriteria;
-import optimization.stopCriteria.GradientL2Norm;
-import optimization.stopCriteria.ProjectedGradientL2Norm;
-import optimization.stopCriteria.StopingCriteria;
-import optimization.stopCriteria.ValueDifference;
-
-
-/**
- * @author javg
- *
- *
- *ax2+ b(y2 -displacement)
- */
-public class X2y2WithConstraints extends ProjectedObjective{
-
-
- double a, b;
- double dx;
- double dy;
- Projection projection;
-
-
- public X2y2WithConstraints(double a, double b, double[] params, double dx, double dy, Projection proj){
- //projection = new BoundsProjection(0.2,Double.MAX_VALUE);
- super();
- projection = proj;
- this.a = a;
- this.b = b;
- this.dx = dx;
- this.dy = dy;
- setInitialParameters(params);
- System.out.println("Function " +a+"(x-"+dx+")^2 + "+b+"(y-"+dy+")^2");
- System.out.println("Gradient " +(2*a)+"(x-"+dx+") ; "+(b*2)+"(y-"+dy+")");
- printParameters();
- projection.project(parameters);
- printParameters();
- gradient = new double[2];
- }
-
- public double getValue() {
- functionCalls++;
- return a*(parameters[0]-dx)*(parameters[0]-dx)+b*((parameters[1]-dy)*(parameters[1]-dy));
- }
-
- public double[] getGradient() {
- if(gradient == null){
- gradient = new double[2];
- }
- gradientCalls++;
- gradient[0]=2*a*(parameters[0]-dx);
- gradient[1]=2*b*(parameters[1]-dy);
- return gradient;
- }
-
-
- public double[] projectPoint(double[] point) {
- double[] newPoint = point.clone();
- projection.project(newPoint);
- return newPoint;
- }
-
- public void optimizeWithProjectedGradientDescent(LineSearchMethod ls, OptimizerStats stats, X2y2WithConstraints o){
- ProjectedGradientDescent optimizer = new ProjectedGradientDescent(ls);
- StopingCriteria stopGrad = new ProjectedGradientL2Norm(0.001);
- StopingCriteria stopValue = new ValueDifference(0.001);
- CompositeStopingCriteria compositeStop = new CompositeStopingCriteria();
- compositeStop.add(stopGrad);
- compositeStop.add(stopValue);
-
- optimizer.setMaxIterations(5);
- boolean succed = optimizer.optimize(o,stats,compositeStop);
- System.out.println("Ended optimzation Projected Gradient Descent\n" + stats.prettyPrint(1));
- System.out.println("Solution: " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1]);
- if(succed){
- System.out.println("Ended optimization in " + optimizer.getCurrentIteration());
- }else{
- System.out.println("Failed to optimize");
- }
- }
-
-
-
- public String toString(){
-
- return "P1: " + parameters[0] + " P2: " + parameters[1] + " value " + getValue() + " grad (" + getGradient()[0] + ":" + getGradient()[1]+")";
- }
-
- public static void main(String[] args) {
- double a = 1;
- double b=1;
- double x0 = 0;
- double y0 =1;
- double dx = 0.5;
- double dy = 0.2 ;
- double [] parameters = new double[2];
- parameters[0] = x0;
- parameters[1] = y0;
- X2y2WithConstraints o = new X2y2WithConstraints(a,b,parameters,dx,dy,
- new SimplexProjection(0.5)
- //new BoundsProjection(0.0,0.4)
- );
- System.out.println("Starting optimization " + " x0 " + o.parameters[0]+ " x1 " + o.parameters[1] + " a " + a + " b "+b );
- o.setDebugLevel(4);
-
- LineSearchMethod ls = new ArmijoLineSearchMinimizationAlongProjectionArc(new InterpolationPickFirstStep(1));
-
- OptimizerStats stats = new OptimizerStats();
- o.optimizeWithProjectedGradientDescent(ls, stats, o);
-
-// o = new x2y2WithConstraints(a,b,x0,y0,dx,dy);
-// stats = new OptimizerStats();
-// o.optimizeWithSpectralProjectedGradientDescent(stats, o);
- }
-
-
-
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/util/Array.java b/gi/posterior-regularisation/prjava/src/util/Array.java
deleted file mode 100644
index cc4725af..00000000
--- a/gi/posterior-regularisation/prjava/src/util/Array.java
+++ /dev/null
@@ -1,41 +0,0 @@
-package util;
-
-import java.util.Arrays;
-
-public class Array {
-
-
-
- public static void sortDescending(double[] ds){
- for (int i = 0; i < ds.length; i++) ds[i] = -ds[i];
- Arrays.sort(ds);
- for (int i = 0; i < ds.length; i++) ds[i] = -ds[i];
- }
-
- /**
- * Return a new reversed array
- * @param array
- * @return
- */
- public static int[] reverseIntArray(int[] array){
- int[] reversed = new int[array.length];
- for (int i = 0; i < reversed.length; i++) {
- reversed[i] = array[reversed.length-1-i];
- }
- return reversed;
- }
-
- public static String[] sumArray(String[] in, int from){
- String[] res = new String[in.length-from];
- for (int i = from; i < in.length; i++) {
- res[i-from] = in[i];
- }
- return res;
- }
-
- public static void main(String[] args) {
- int[] i = {1,2,3,4};
- util.Printing.printIntArray(i, null, "original");
- util.Printing.printIntArray(reverseIntArray(i), null, "reversed");
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/util/ArrayMath.java b/gi/posterior-regularisation/prjava/src/util/ArrayMath.java
deleted file mode 100644
index 398a13a2..00000000
--- a/gi/posterior-regularisation/prjava/src/util/ArrayMath.java
+++ /dev/null
@@ -1,186 +0,0 @@
-package util;
-
-import java.util.Arrays;
-
-public class ArrayMath {
-
- public static double dotProduct(double[] v1, double[] v2) {
- assert(v1.length == v2.length);
- double result = 0;
- for(int i = 0; i < v1.length; i++)
- result += v1[i]*v2[i];
- return result;
- }
-
- public static double twoNormSquared(double[] v) {
- double result = 0;
- for(double d : v)
- result += d*d;
- return result;
- }
-
- public static boolean containsInvalid(double[] v) {
- for(int i = 0; i < v.length; i++)
- if(Double.isNaN(v[i]) || Double.isInfinite(v[i]))
- return true;
- return false;
- }
-
-
-
- public static double safeAdd(double[] toAdd) {
- // Make sure there are no positive infinities
- double sum = 0;
- for(int i = 0; i < toAdd.length; i++) {
- assert(!(Double.isInfinite(toAdd[i]) && toAdd[i] > 0));
- assert(!Double.isNaN(toAdd[i]));
- sum += toAdd[i];
- }
-
- return sum;
- }
-
- /* Methods for filling integer and double arrays (of up to four dimensions) with the given value. */
-
- public static void set(int[][][][] array, int value) {
- for(int i = 0; i < array.length; i++) {
- set(array[i], value);
- }
- }
-
- public static void set(int[][][] array, int value) {
- for(int i = 0; i < array.length; i++) {
- set(array[i], value);
- }
- }
-
- public static void set(int[][] array, int value) {
- for(int i = 0; i < array.length; i++) {
- set(array[i], value);
- }
- }
-
- public static void set(int[] array, int value) {
- Arrays.fill(array, value);
- }
-
-
- public static void set(double[][][][] array, double value) {
- for(int i = 0; i < array.length; i++) {
- set(array[i], value);
- }
- }
-
- public static void set(double[][][] array, double value) {
- for(int i = 0; i < array.length; i++) {
- set(array[i], value);
- }
- }
-
- public static void set(double[][] array, double value) {
- for(int i = 0; i < array.length; i++) {
- set(array[i], value);
- }
- }
-
- public static void set(double[] array, double value) {
- Arrays.fill(array, value);
- }
-
- public static void setEqual(double[][][][] dest, double[][][][] source){
- for (int i = 0; i < source.length; i++) {
- setEqual(dest[i],source[i]);
- }
- }
-
-
- public static void setEqual(double[][][] dest, double[][][] source){
- for (int i = 0; i < source.length; i++) {
- set(dest[i],source[i]);
- }
- }
-
-
- public static void set(double[][] dest, double[][] source){
- for (int i = 0; i < source.length; i++) {
- setEqual(dest[i],source[i]);
- }
- }
-
- public static void setEqual(double[] dest, double[] source){
- System.arraycopy(source, 0, dest, 0, source.length);
- }
-
- public static void plusEquals(double[][][][] array, double val){
- for (int i = 0; i < array.length; i++) {
- plusEquals(array[i], val);
- }
- }
-
- public static void plusEquals(double[][][] array, double val){
- for (int i = 0; i < array.length; i++) {
- plusEquals(array[i], val);
- }
- }
-
- public static void plusEquals(double[][] array, double val){
- for (int i = 0; i < array.length; i++) {
- plusEquals(array[i], val);
- }
- }
-
- public static void plusEquals(double[] array, double val){
- for (int i = 0; i < array.length; i++) {
- array[i] += val;
- }
- }
-
-
- public static double sum(double[] array) {
- double res = 0;
- for (int i = 0; i < array.length; i++) res += array[i];
- return res;
- }
-
-
-
- public static double[][] deepclone(double[][] in){
- double[][] res = new double[in.length][];
- for (int i = 0; i < res.length; i++) {
- res[i] = in[i].clone();
- }
- return res;
- }
-
-
- public static double[][][] deepclone(double[][][] in){
- double[][][] res = new double[in.length][][];
- for (int i = 0; i < res.length; i++) {
- res[i] = deepclone(in[i]);
- }
- return res;
- }
-
- public static double cosine(double[] a,
- double[] b) {
- return (dotProduct(a, b)+1e-5)/(Math.sqrt(dotProduct(a, a)+1e-5)*Math.sqrt(dotProduct(b, b)+1e-5));
- }
-
- public static double max(double[] ds) {
- double max = Double.NEGATIVE_INFINITY;
- for(double d:ds) max = Math.max(d,max);
- return max;
- }
-
- public static void exponentiate(double[] a) {
- for (int i = 0; i < a.length; i++) {
- a[i] = Math.exp(a[i]);
- }
- }
-
- public static int sum(int[] array) {
- int res = 0;
- for (int i = 0; i < array.length; i++) res += array[i];
- return res;
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/util/DifferentiableObjective.java b/gi/posterior-regularisation/prjava/src/util/DifferentiableObjective.java
deleted file mode 100644
index 1ff1ae4a..00000000
--- a/gi/posterior-regularisation/prjava/src/util/DifferentiableObjective.java
+++ /dev/null
@@ -1,14 +0,0 @@
-package util;
-
-public interface DifferentiableObjective {
-
- public double getValue();
-
- public void getGradient(double[] gradient);
-
- public void getParameters(double[] params);
-
- public void setParameters(double[] newParameters);
-
- public int getNumParameters();
-}
diff --git a/gi/posterior-regularisation/prjava/src/util/DigammaFunction.java b/gi/posterior-regularisation/prjava/src/util/DigammaFunction.java
deleted file mode 100644
index ff1478ad..00000000
--- a/gi/posterior-regularisation/prjava/src/util/DigammaFunction.java
+++ /dev/null
@@ -1,21 +0,0 @@
-package util;
-
-public class DigammaFunction {
- public static double expDigamma(double number){
- if(number==0)return number;
- return Math.exp(digamma(number));
- }
-
- public static double digamma(double number){
- if(number > 7){
- return digammApprox(number-0.5);
- }else{
- return digamma(number+1) - 1.0/number;
- }
- }
-
- private static double digammApprox(double value){
- return Math.log(value) + 0.04167*Math.pow(value, -2) - 0.00729*Math.pow(value, -4)
- + 0.00384*Math.pow(value, -6) - 0.00413*Math.pow(value, -8);
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/util/FileSystem.java b/gi/posterior-regularisation/prjava/src/util/FileSystem.java
deleted file mode 100644
index d7812e40..00000000
--- a/gi/posterior-regularisation/prjava/src/util/FileSystem.java
+++ /dev/null
@@ -1,21 +0,0 @@
-package util;
-
-import java.io.File;
-
-public class FileSystem {
- public static boolean createDir(String directory) {
-
- File dir = new File(directory);
- if (!dir.isDirectory()) {
- boolean success = dir.mkdirs();
- if (!success) {
- System.out.println("Unable to create directory " + directory);
- return false;
- }
- System.out.println("Created directory " + directory);
- } else {
- System.out.println("Reusing directory " + directory);
- }
- return true;
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/util/InputOutput.java b/gi/posterior-regularisation/prjava/src/util/InputOutput.java
deleted file mode 100644
index da7f71bf..00000000
--- a/gi/posterior-regularisation/prjava/src/util/InputOutput.java
+++ /dev/null
@@ -1,67 +0,0 @@
-package util;
-
-import java.io.BufferedReader;
-import java.io.FileInputStream;
-import java.io.FileNotFoundException;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.io.OutputStream;
-import java.io.PrintStream;
-import java.io.UnsupportedEncodingException;
-import java.util.Properties;
-import java.util.zip.GZIPInputStream;
-import java.util.zip.GZIPOutputStream;
-
-public class InputOutput {
-
- /**
- * Opens a file either compress with gzip or not compressed.
- */
- public static BufferedReader openReader(String fileName) throws UnsupportedEncodingException, FileNotFoundException, IOException{
- System.out.println("Reading: " + fileName);
- BufferedReader reader;
- fileName = fileName.trim();
- if(fileName.endsWith("gz")){
- reader = new BufferedReader(
- new InputStreamReader(new GZIPInputStream(new FileInputStream(fileName)),"UTF8"));
- }else{
- reader = new BufferedReader(new InputStreamReader(
- new FileInputStream(fileName), "UTF8"));
- }
-
- return reader;
- }
-
-
- public static PrintStream openWriter(String fileName)
- throws UnsupportedEncodingException, FileNotFoundException, IOException{
- System.out.println("Writting to file: " + fileName);
- PrintStream writter;
- fileName = fileName.trim();
- if(fileName.endsWith("gz")){
- writter = new PrintStream(new GZIPOutputStream(new FileOutputStream(fileName)),
- true, "UTF-8");
-
- }else{
- writter = new PrintStream(new FileOutputStream(fileName),
- true, "UTF-8");
-
- }
-
- return writter;
- }
-
- public static Properties readPropertiesFile(String fileName) {
- Properties properties = new Properties();
- try {
- properties.load(new FileInputStream(fileName));
- } catch (IOException e) {
- e.printStackTrace();
- throw new AssertionError("Wrong properties file " + fileName);
- }
- System.out.println(properties.toString());
-
- return properties;
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/util/LogSummer.java b/gi/posterior-regularisation/prjava/src/util/LogSummer.java
deleted file mode 100644
index 117393b9..00000000
--- a/gi/posterior-regularisation/prjava/src/util/LogSummer.java
+++ /dev/null
@@ -1,86 +0,0 @@
-package util;
-
-import java.lang.Math;
-
-/*
- * Math tool for computing logs of sums, when the terms of the sum are already in log form.
- * (Useful if the terms of the sum are very small numbers.)
- */
-public class LogSummer {
-
- private LogSummer() {
- }
-
- /**
- * Given log(a) and log(b), computes log(a + b).
- *
- * @param loga log of first sum term
- * @param logb log of second sum term
- * @return log(sum), where sum = a + b
- */
- public static double sum(double loga, double logb) {
- assert(!Double.isNaN(loga));
- assert(!Double.isNaN(logb));
-
- if(Double.isInfinite(loga))
- return logb;
- if(Double.isInfinite(logb))
- return loga;
-
- double maxLog;
- double difference;
- if(loga > logb) {
- difference = logb - loga;
- maxLog = loga;
- }
- else {
- difference = loga - logb;
- maxLog = logb;
- }
-
- return Math.log1p(Math.exp(difference)) + maxLog;
- }
-
- /**
- * Computes log(exp(array[index]) + b), and
- * modifies array[index] to contain this new value.
- *
- * @param array array to modify
- * @param index index at which to modify
- * @param logb log of the second sum term
- */
- public static void sum(double[] array, int index, double logb) {
- array[index] = sum(array[index], logb);
- }
-
- /**
- * Computes log(a + b + c + ...) from log(a), log(b), log(c), ...
- * by recursively splitting the input and delegating to the sum method.
- *
- * @param terms an array containing the log of all the terms for the sum
- * @return log(sum), where sum = exp(terms[0]) + exp(terms[1]) + ...
- */
- public static double sumAll(double... terms) {
- return sumAllHelper(terms, 0, terms.length);
- }
-
- /**
- * Computes log(a_0 + a_1 + ...) from a_0 = exp(terms[begin]),
- * a_1 = exp(terms[begin + 1]), ..., a_{end - 1 - begin} = exp(terms[end - 1]).
- *
- * @param terms an array containing the log of all the terms for the sum,
- * and possibly some other terms that will not go into the sum
- * @return log of the sum of the elements in the [begin, end) region of the terms array
- */
- private static double sumAllHelper(final double[] terms, final int begin, final int end) {
- int length = end - begin;
- switch(length) {
- case 0: return Double.NEGATIVE_INFINITY;
- case 1: return terms[begin];
- default:
- int midIndex = begin + length/2;
- return sum(sumAllHelper(terms, begin, midIndex), sumAllHelper(terms, midIndex, end));
- }
- }
-
-} \ No newline at end of file
diff --git a/gi/posterior-regularisation/prjava/src/util/MathUtil.java b/gi/posterior-regularisation/prjava/src/util/MathUtil.java
deleted file mode 100644
index 799b1faf..00000000
--- a/gi/posterior-regularisation/prjava/src/util/MathUtil.java
+++ /dev/null
@@ -1,148 +0,0 @@
-package util;
-
-import java.util.Random;
-
-public class MathUtil {
- public static final boolean closeToOne(double number){
- return Math.abs(number-1) < 1.E-10;
- }
-
- public static final boolean closeToZero(double number){
- return Math.abs(number) < 1.E-5;
- }
-
- /**
- * Return a ramdom multinominal distribution.
- *
- * @param size
- * @return
- */
- public static final double[] randomVector(int size, Random r){
- double[] random = new double[size];
- double sum=0;
- for(int i = 0; i < size; i++){
- double number = r.nextDouble();
- random[i] = number;
- sum+=number;
- }
- for(int i = 0; i < size; i++){
- random[i] = random[i]/sum;
- }
- return random;
- }
-
-
-
- public static double sum(double[] ds) {
- double res = 0;
- for (int i = 0; i < ds.length; i++) {
- res+=ds[i];
- }
- return res;
- }
-
- public static double max(double[] ds) {
- double res = Double.NEGATIVE_INFINITY;
- for (int i = 0; i < ds.length; i++) {
- res = Math.max(res, ds[i]);
- }
- return res;
- }
-
- public static double min(double[] ds) {
- double res = Double.POSITIVE_INFINITY;
- for (int i = 0; i < ds.length; i++) {
- res = Math.min(res, ds[i]);
- }
- return res;
- }
-
-
- public static double KLDistance(double[] p, double[] q) {
- int len = p.length;
- double kl = 0;
- for (int j = 0; j < len; j++) {
- if (p[j] == 0 || q[j] == 0) {
- continue;
- } else {
- kl += q[j] * Math.log(q[j] / p[j]);
- }
-
- }
- return kl;
- }
-
- public static double L2Distance(double[] p, double[] q) {
- int len = p.length;
- double l2 = 0;
- for (int j = 0; j < len; j++) {
- if (p[j] == 0 || q[j] == 0) {
- continue;
- } else {
- l2 += (q[j] - p[j])*(q[j] - p[j]);
- }
-
- }
- return Math.sqrt(l2);
- }
-
- public static double L1Distance(double[] p, double[] q) {
- int len = p.length;
- double l1 = 0;
- for (int j = 0; j < len; j++) {
- if (p[j] == 0 || q[j] == 0) {
- continue;
- } else {
- l1 += Math.abs(q[j] - p[j]);
- }
-
- }
- return l1;
- }
-
- public static double dot(double[] ds, double[] ds2) {
- double res = 0;
- for (int i = 0; i < ds2.length; i++) {
- res+= ds[i]*ds2[i];
- }
- return res;
- }
-
- public static double expDigamma(double number){
- return Math.exp(digamma(number));
- }
-
- public static double digamma(double number){
- if(number > 7){
- return digammApprox(number-0.5);
- }else{
- return digamma(number+1) - 1.0/number;
- }
- }
-
- private static double digammApprox(double value){
- return Math.log(value) + 0.04167*Math.pow(value, -2) - 0.00729*Math.pow(value, -4)
- + 0.00384*Math.pow(value, -6) - 0.00413*Math.pow(value, -8);
- }
-
- public static double eulerGamma = 0.57721566490152386060651209008240243;
- // FIXME -- so far just the initialization from Minka's paper "Estimating a Dirichlet distribution".
- public static double invDigamma(double y) {
- if (y>= -2.22) return Math.exp(y)+0.5;
- return -1.0/(y+eulerGamma);
- }
-
-
-
- public static void main(String[] args) {
- for(double i = 0; i < 10 ; i+=0.1){
- System.out.println(i+"\t"+expDigamma(i)+"\t"+(i-0.5));
- }
-// double gammaValue = (expDigamma(3)/expDigamma(10) + expDigamma(3)/expDigamma(10) + expDigamma(4)/expDigamma(10));
-// double normalValue = 3/10+3/4+10/10;
-// System.out.println("Gamma " + gammaValue + " normal " + normalValue);
- }
-
-
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/util/Matrix.java b/gi/posterior-regularisation/prjava/src/util/Matrix.java
deleted file mode 100644
index 8fb6d911..00000000
--- a/gi/posterior-regularisation/prjava/src/util/Matrix.java
+++ /dev/null
@@ -1,16 +0,0 @@
-package util;
-
-public class Matrix {
- int x;
- int y;
- double[][] values;
-
- public Matrix(int x, int y){
- this.x = x;
- this.y=y;
- values = new double[x][y];
- }
-
-
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/util/MemoryTracker.java b/gi/posterior-regularisation/prjava/src/util/MemoryTracker.java
deleted file mode 100644
index 83a65611..00000000
--- a/gi/posterior-regularisation/prjava/src/util/MemoryTracker.java
+++ /dev/null
@@ -1,47 +0,0 @@
-package util;
-
-
-public class MemoryTracker {
-
- double initM,finalM;
- boolean start = false,finish = false;
-
- public MemoryTracker(){
-
- }
-
- public void start(){
- System.gc();
- System.gc();
- System.gc();
- initM = (Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory())/(1024*1024);
- start = true;
- }
-
- public void finish(){
- if(!start){
- throw new RuntimeException("Canot stop before starting");
- }
- System.gc();
- System.gc();
- System.gc();
- finalM = (Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory())/(1024*1024);
- finish = true;
- }
-
- public String print(){
- if(!finish){
- throw new RuntimeException("Canot print before stopping");
- }
- return "Used: " + (finalM - initM) + "MB";
- }
-
- public void clear(){
- initM = 0;
- finalM = 0;
- finish = false;
- start = false;
- }
-
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/util/Pair.java b/gi/posterior-regularisation/prjava/src/util/Pair.java
deleted file mode 100644
index 7b1f108d..00000000
--- a/gi/posterior-regularisation/prjava/src/util/Pair.java
+++ /dev/null
@@ -1,31 +0,0 @@
-package util;
-
-public class Pair<O1, O2> {
- public O1 _first;
- public O2 _second;
-
- public final O1 first() {
- return _first;
- }
-
- public final O2 second() {
- return _second;
- }
-
- public final void setFirst(O1 value){
- _first = value;
- }
-
- public final void setSecond(O2 value){
- _second = value;
- }
-
- public Pair(O1 first, O2 second) {
- _first = first;
- _second = second;
- }
-
- public String toString(){
- return _first + " " + _second;
- }
-}
diff --git a/gi/posterior-regularisation/prjava/src/util/Printing.java b/gi/posterior-regularisation/prjava/src/util/Printing.java
deleted file mode 100644
index 14fcbe91..00000000
--- a/gi/posterior-regularisation/prjava/src/util/Printing.java
+++ /dev/null
@@ -1,158 +0,0 @@
-package util;
-
-public class Printing {
- static java.text.DecimalFormat fmt = new java.text.DecimalFormat();
-
- public static String padWithSpace(String s, int len){
- StringBuffer sb = new StringBuffer();
- while(sb.length() +s.length() < len){
- sb.append(" ");
- }
- sb.append(s);
- return sb.toString();
- }
-
- public static String prettyPrint(double d, String patt, int len) {
- fmt.applyPattern(patt);
- String s = fmt.format(d);
- while (s.length() < len) {
- s = " " + s;
- }
- return s;
- }
-
- public static String formatTime(long duration) {
- StringBuilder sb = new StringBuilder();
- double d = duration / 1000;
- fmt.applyPattern("00");
- sb.append(fmt.format((int) (d / (60 * 60))) + ":");
- d -= ((int) d / (60 * 60)) * 60 * 60;
- sb.append(fmt.format((int) (d / 60)) + ":");
- d -= ((int) d / 60) * 60;
- fmt.applyPattern("00.0");
- sb.append(fmt.format(d));
- return sb.toString();
- }
-
-
- public static String doubleArrayToString(double[] array, String[] labels, String arrayName) {
- StringBuffer res = new StringBuffer();
- res.append(arrayName);
- res.append("\n");
- for (int i = 0; i < array.length; i++) {
- if (labels == null){
- res.append(i+" \t");
- }else{
- res.append(labels[i]+ "\t");
- }
- }
- res.append("sum\n");
- double sum = 0;
- for (int i = 0; i < array.length; i++) {
- res.append(prettyPrint(array[i],
- "0.00000E00", 8) + "\t");
- sum+=array[i];
- }
- res.append(prettyPrint(sum,
- "0.00000E00", 8)+"\n");
- return res.toString();
- }
-
-
-
- public static void printDoubleArray(double[] array, String labels[], String arrayName) {
- System.out.println(doubleArrayToString(array, labels,arrayName));
- }
-
-
- public static String doubleArrayToString(double[][] array, String[] labels1, String[] labels2,
- String arrayName){
- StringBuffer res = new StringBuffer();
- res.append(arrayName);
- res.append("\n\t");
- //Calculates the column sum to keeps the sums
- double[] sums = new double[array[0].length+1];
- //Prints rows headings
- for (int i = 0; i < array[0].length; i++) {
- if (labels1 == null){
- res.append(i+" \t");
- }else{
- res.append(labels1[i]+" \t");
- }
- }
- res.append("sum\n");
- double sum = 0;
- //For each row print heading
- for (int i = 0; i < array.length; i++) {
- if (labels2 == null){
- res.append(i+"\t");
- }else{
- res.append(labels2[i]+"\t");
- }
- //Print values for that row
- for (int j = 0; j < array[0].length; j++) {
- res.append(" " + prettyPrint(array[i][j],
- "0.00000E00", 8) + "\t");
- sums[j] += array[i][j];
- sum+=array[i][j]; //Sum all values of that row
- }
- //Print row sum
- res.append(prettyPrint(sum,"0.00000E00", 8)+"\n");
- sums[array[0].length]+=sum;
- sum=0;
- }
- res.append("sum\t");
- //Print values for colums sum
- for (int i = 0; i < array[0].length+1; i++) {
- res.append(prettyPrint(sums[i],"0.00000E00", 8)+"\t");
- }
- res.append("\n");
- return res.toString();
- }
-
- public static void printDoubleArray(double[][] array, String[] labels1, String[] labels2
- , String arrayName) {
- System.out.println(doubleArrayToString(array, labels1,labels2,arrayName));
- }
-
-
- public static void printIntArray(int[][] array, String[] labels1, String[] labels2, String arrayName,
- int size1, int size2) {
- System.out.println(arrayName);
- for (int i = 0; i < size1; i++) {
- for (int j = 0; j < size2; j++) {
- System.out.print(" " + array[i][j] + " ");
-
- }
- System.out.println();
- }
- System.out.println();
- }
-
- public static String intArrayToString(int[] array, String[] labels, String arrayName) {
- StringBuffer res = new StringBuffer();
- res.append(arrayName);
- for (int i = 0; i < array.length; i++) {
- res.append(" " + array[i] + " ");
-
- }
- res.append("\n");
- return res.toString();
- }
-
- public static void printIntArray(int[] array, String[] labels, String arrayName) {
- System.out.println(intArrayToString(array, labels,arrayName));
- }
-
- public static String toString(double[][] d){
- StringBuffer sb = new StringBuffer();
- for (int i = 0; i < d.length; i++) {
- for (int j = 0; j < d[0].length; j++) {
- sb.append(prettyPrint(d[i][j], "0.00E0", 10));
- }
- sb.append("\n");
- }
- return sb.toString();
- }
-
-}
diff --git a/gi/posterior-regularisation/prjava/src/util/Sorters.java b/gi/posterior-regularisation/prjava/src/util/Sorters.java
deleted file mode 100644
index 836444e5..00000000
--- a/gi/posterior-regularisation/prjava/src/util/Sorters.java
+++ /dev/null
@@ -1,39 +0,0 @@
-package util;
-
-import java.util.Comparator;
-
-public class Sorters {
- public static class sortWordsCounts implements Comparator{
-
- /**
- * Sorter for a pair of word id, counts. Sort ascending by counts
- */
- public int compare(Object arg0, Object arg1) {
- Pair<Integer,Integer> p1 = (Pair<Integer,Integer>)arg0;
- Pair<Integer,Integer> p2 = (Pair<Integer,Integer>)arg1;
- if(p1.second() > p2.second()){
- return 1;
- }else{
- return -1;
- }
- }
-
- }
-
-public static class sortWordsDouble implements Comparator{
-
- /**
- * Sorter for a pair of word id, counts. Sort by counts
- */
- public int compare(Object arg0, Object arg1) {
- Pair<Integer,Double> p1 = (Pair<Integer,Double>)arg0;
- Pair<Integer,Double> p2 = (Pair<Integer,Double>)arg1;
- if(p1.second() < p2.second()){
- return 1;
- }else{
- return -1;
- }
- }
-
- }
-}
diff --git a/gi/posterior-regularisation/prjava/train-PR-cluster.sh b/gi/posterior-regularisation/prjava/train-PR-cluster.sh
deleted file mode 100755
index 67552c00..00000000
--- a/gi/posterior-regularisation/prjava/train-PR-cluster.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/sh
-
-d=`dirname $0`
-java -ea -Xmx30g -cp $d/prjava.jar:$d/lib/trove-2.0.2.jar:$d/lib/optimization.jar:$d/lib/jopt-simple-3.2.jar:$d/lib/commons-math-2.1.jar phrase.Trainer $*
diff --git a/gi/posterior-regularisation/projected_gradient.cc b/gi/posterior-regularisation/projected_gradient.cc
deleted file mode 100644
index f7c39817..00000000
--- a/gi/posterior-regularisation/projected_gradient.cc
+++ /dev/null
@@ -1,87 +0,0 @@
-//
-// Minimises given functional using the projected gradient method. Based on
-// algorithm and demonstration example in Linear and Nonlinear Programming,
-// Luenberger and Ye, 3rd ed., p 370.
-//
-
-#include "invert.hh"
-#include <iostream>
-
-using namespace std;
-
-double
-f(double x1, double x2, double x3, double x4)
-{
- return x1 * x1 + x2 * x2 + x3 * x3 + x4 * x4 - 2 * x1 - 3 * x4;
-}
-
-ublas::vector<double>
-g(double x1, double x2, double x3, double x4)
-{
- ublas::vector<double> v(4);
- v(0) = 2 * x1 - 2;
- v(1) = 2 * x2;
- v(2) = 2 * x3;
- v(3) = 2 * x4 - 3;
- return v;
-}
-
-ublas::matrix<double>
-activeConstraints(double x1, double x2, double x3, double x4)
-{
- int n = 2;
- if (x1 == 0) ++n;
- if (x2 == 0) ++n;
- if (x3 == 0) ++n;
- if (x4 == 0) ++n;
-
- ublas::matrix<double> a(n,4);
- a(0, 0) = 2; a(0, 1) = 1; a(0, 2) = 1; a(0, 3) = 4;
- a(1, 0) = 1; a(1, 1) = 1; a(1, 2) = 2; a(1, 3) = 1;
-
- int c = 2;
- if (x1 == 0) a(c++, 0) = 1;
- if (x2 == 0) a(c++, 1) = 1;
- if (x3 == 0) a(c++, 2) = 1;
- if (x4 == 0) a(c++, 3) = 1;
-
- return a;
-}
-
-ublas::matrix<double>
-projection(const ublas::matrix<double> &a)
-{
- ublas::matrix<double> aT = ublas::trans(a);
- ublas::matrix<double> inv(a.size1(), a.size1());
- bool ok = invert_matrix(ublas::matrix<double>(ublas::prod(a, aT)), inv);
- assert(ok && "Failed to invert matrix");
- return ublas::identity_matrix<double>(4) -
- ublas::prod(aT, ublas::matrix<double>(ublas::prod(inv, a)));
-}
-
-int main(int argc, char *argv[])
-{
- double x1 = 2, x2 = 2, x3 = 1, x4 = 0;
-
- double fval = f(x1, x2, x3, x4);
- cout << "f = " << fval << endl;
- ublas::vector<double> grad = g(x1, x2, x3, x4);
- cout << "g = " << grad << endl;
- ublas::matrix<double> A = activeConstraints(x1, x2, x3, x4);
- cout << "A = " << A << endl;
- ublas::matrix<double> P = projection(A);
- cout << "P = " << P << endl;
- // the direction of movement
- ublas::vector<double> d = prod(P, grad);
- cout << "d = " << (d / d(0)) << endl;
-
- // special case for d = 0
-
- // next solve for limits on the line search
-
- // then use golden rule technique between these values (if bounded)
-
- // or simple Armijo's rule technique
-
- return 0;
-}
diff --git a/gi/posterior-regularisation/simplex_pg.py b/gi/posterior-regularisation/simplex_pg.py
deleted file mode 100644
index 5da796d3..00000000
--- a/gi/posterior-regularisation/simplex_pg.py
+++ /dev/null
@@ -1,55 +0,0 @@
-#
-# Following Leunberger and Ye, Linear and Nonlinear Progamming, 3rd ed. p367
-# "The gradient projection method"
-# applied to an equality constraint for a simplex.
-#
-# min f(x)
-# s.t. x >= 0, sum_i x = d
-#
-# FIXME: enforce the positivity constraint - a limit on the line search?
-#
-
-from numpy import *
-from scipy import *
-from linesearch import line_search
-# local copy of scipy's Amijo line_search - wasn't enforcing alpha max correctly
-import sys
-
-dims = 4
-
-def f(x):
- fv = x[0]*x[0] + x[1]*x[1] + x[2]*x[2] + x[3]*x[3] - 2*x[0] - 3*x[3]
- # print 'evaluating f at', x, 'value', fv
- return fv
-
-def g(x):
- return array([2*x[0] - 2, 2*x[1], 2*x[2], 2*x[3]-3])
-
-def pg(x):
- gv = g(x)
- return gv - sum(gv) / dims
-
-x = ones(dims) / dims
-old_fval = None
-
-while True:
- fv = f(x)
- gv = g(x)
- dv = pg(x)
-
- print 'x', x, 'f', fv, 'g', gv, 'd', dv
-
- if old_fval == None:
- old_fval = fv + 0.1
-
- # solve for maximum step size i.e. when positivity constraints kick in
- # x - alpha d = 0 => alpha = x/d
- amax = max(x/dv)
- if amax < 1e-8: break
-
- stuff = line_search(f, pg, x, -dv, dv, fv, old_fval, amax=amax)
- alpha = stuff[0] # Nb. can avoid next evaluation of f,g,d using 'stuff'
- if alpha < 1e-8: break
- x -= alpha * dv
-
- old_fval = fv
diff --git a/gi/posterior-regularisation/split-languages.py b/gi/posterior-regularisation/split-languages.py
deleted file mode 100755
index 206da661..00000000
--- a/gi/posterior-regularisation/split-languages.py
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/usr/bin/python
-
-import sys
-
-sout = open(sys.argv[1], 'w')
-tout = open(sys.argv[2], 'w')
-for line in sys.stdin:
- phrase, contexts = line.rstrip().split('\t')
- sp, tp = phrase.split(' <SPLIT> ')
- sout.write('%s\t' % sp)
- tout.write('%s\t' % tp)
- parts = contexts.split(' ||| ')
- for i in range(0, len(parts), 2):
- sc, tc = parts[i].split(' <SPLIT> ')
- if i != 0:
- sout.write(' ||| ')
- tout.write(' ||| ')
- sout.write('%s ||| %s' % (sc, parts[i+1]))
- tout.write('%s ||| %s' % (tc, parts[i+1]))
- sout.write('\n')
- tout.write('\n')
-sout.close()
-tout.close()
diff --git a/gi/posterior-regularisation/train_pr_agree.py b/gi/posterior-regularisation/train_pr_agree.py
deleted file mode 100644
index 9d41362d..00000000
--- a/gi/posterior-regularisation/train_pr_agree.py
+++ /dev/null
@@ -1,400 +0,0 @@
-import sys
-import scipy.optimize
-from scipy.stats import geom
-from numpy import *
-from numpy.random import random, seed
-
-style = sys.argv[1]
-if len(sys.argv) >= 3:
- seed(int(sys.argv[2]))
-
-#
-# Step 1: load the concordance counts
-#
-
-edges = []
-word_types = {}
-phrase_types = {}
-context_types = {}
-
-for line in sys.stdin:
- phrase, rest = line.strip().split('\t')
- ptoks = tuple(map(lambda t: word_types.setdefault(t, len(word_types)), phrase.split()))
- pid = phrase_types.setdefault(ptoks, len(phrase_types))
-
- parts = rest.split('|||')
- for i in range(0, len(parts), 2):
- context, count = parts[i:i+2]
-
- ctx = filter(lambda x: x != '<PHRASE>', context.split())
- ctoks = tuple(map(lambda t: word_types.setdefault(t, len(word_types)), ctx))
- cid = context_types.setdefault(ctoks, len(context_types))
-
- cnt = int(count.strip()[2:])
- edges.append((pid, cid, cnt))
-
-word_type_list = [None] * len(word_types)
-for typ, index in word_types.items():
- word_type_list[index] = typ
-
-phrase_type_list = [None] * len(phrase_types)
-for typ, index in phrase_types.items():
- phrase_type_list[index] = typ
-
-context_type_list = [None] * len(context_types)
-for typ, index in context_types.items():
- context_type_list[index] = typ
-
-num_tags = 5
-num_types = len(word_types)
-num_phrases = len(phrase_types)
-num_contexts = len(context_types)
-num_edges = len(edges)
-
-print 'Read in', num_edges, 'edges', num_phrases, 'phrases', num_contexts, 'contexts and', num_types, 'word types'
-
-#
-# Step 2: expectation maximisation
-#
-
-def normalise(a):
- return a / float(sum(a))
-
-class PhraseToContextModel:
- def __init__(self):
- # Pr(tag | phrase)
- self.tagDist = [normalise(random(num_tags)+1) for p in range(num_phrases)]
- # Pr(context at pos i = w | tag) indexed by i, tag, word
- self.contextWordDist = [[normalise(random(num_types)+1) for t in range(num_tags)] for i in range(4)]
-
- def prob(self, pid, cid):
- # return distribution p(tag, context | phrase) as vector of length |tags|
- context = context_type_list[cid]
- dist = zeros(num_tags)
- for t in range(num_tags):
- prob = self.tagDist[pid][t]
- for k, tokid in enumerate(context):
- prob *= self.contextWordDist[k][t][tokid]
- dist[t] = prob
- return dist
-
- def expectation_maximisation_step(self, lamba=None):
- tagCounts = zeros((num_phrases, num_tags))
- contextWordCounts = zeros((4, num_tags, num_types))
-
- # E-step
- llh = 0
- for pid, cid, cnt in edges:
- q = self.prob(pid, cid)
- z = sum(q)
- q /= z
- llh += log(z)
- context = context_type_list[cid]
- if lamba != None:
- q *= exp(lamba)
- q /= sum(q)
- for t in range(num_tags):
- tagCounts[pid][t] += cnt * q[t]
- for i in range(4):
- for t in range(num_tags):
- contextWordCounts[i][t][context[i]] += cnt * q[t]
-
- # M-step
- for p in range(num_phrases):
- self.tagDist[p] = normalise(tagCounts[p])
- for i in range(4):
- for t in range(num_tags):
- self.contextWordDist[i][t] = normalise(contextWordCounts[i,t])
-
- return llh
-
-class ContextToPhraseModel:
- def __init__(self):
- # Pr(tag | context) = Multinomial
- self.tagDist = [normalise(random(num_tags)+1) for p in range(num_contexts)]
- # Pr(phrase = w | tag) = Multinomial
- self.phraseSingleDist = [normalise(random(num_types)+1) for t in range(num_tags)]
- # Pr(phrase_1 = w | tag) = Multinomial
- self.phraseLeftDist = [normalise(random(num_types)+1) for t in range(num_tags)]
- # Pr(phrase_-1 = w | tag) = Multinomial
- self.phraseRightDist = [normalise(random(num_types)+1) for t in range(num_tags)]
- # Pr(|phrase| = l | tag) = Geometric
- self.phraseLengthDist = [0.5] * num_tags
- # n.b. internal words for phrases of length >= 3 are drawn from uniform distribution
-
- def prob(self, pid, cid):
- # return distribution p(tag, phrase | context) as vector of length |tags|
- phrase = phrase_type_list[pid]
- dist = zeros(num_tags)
- for t in range(num_tags):
- prob = self.tagDist[cid][t]
- f = self.phraseLengthDist[t]
- prob *= geom.pmf(len(phrase), f)
- if len(phrase) == 1:
- prob *= self.phraseSingleDist[t][phrase[0]]
- else:
- prob *= self.phraseLeftDist[t][phrase[0]]
- prob *= self.phraseRightDist[t][phrase[-1]]
- dist[t] = prob
- return dist
-
- def expectation_maximisation_step(self, lamba=None):
- tagCounts = zeros((num_contexts, num_tags))
- phraseSingleCounts = zeros((num_tags, num_types))
- phraseLeftCounts = zeros((num_tags, num_types))
- phraseRightCounts = zeros((num_tags, num_types))
- phraseLength = zeros(num_types)
-
- # E-step
- llh = 0
- for pid, cid, cnt in edges:
- q = self.prob(pid, cid)
- z = sum(q)
- q /= z
- llh += log(z)
- if lamba != None:
- q *= exp(lamba)
- q /= sum(q)
- #print 'p', phrase_type_list[pid], 'c', context_type_list[cid], 'q', q
- phrase = phrase_type_list[pid]
- for t in range(num_tags):
- tagCounts[cid][t] += cnt * q[t]
- phraseLength[t] += cnt * len(phrase) * q[t]
- if len(phrase) == 1:
- phraseSingleCounts[t][phrase[0]] += cnt * q[t]
- else:
- phraseLeftCounts[t][phrase[0]] += cnt * q[t]
- phraseRightCounts[t][phrase[-1]] += cnt * q[t]
-
- # M-step
- for t in range(num_tags):
- self.phraseLengthDist[t] = min(max(sum(tagCounts[:,t]) / phraseLength[t], 1e-6), 1-1e-6)
- self.phraseSingleDist[t] = normalise(phraseSingleCounts[t])
- self.phraseLeftDist[t] = normalise(phraseLeftCounts[t])
- self.phraseRightDist[t] = normalise(phraseRightCounts[t])
- for c in range(num_contexts):
- self.tagDist[c] = normalise(tagCounts[c])
-
- #print 't', self.tagDist
- #print 'l', self.phraseLengthDist
- #print 's', self.phraseSingleDist
- #print 'L', self.phraseLeftDist
- #print 'R', self.phraseRightDist
-
- return llh
-
-class ProductModel:
- """
- WARNING: I haven't verified the maths behind this model. It's quite likely to be incorrect.
- """
-
- def __init__(self):
- self.pcm = PhraseToContextModel()
- self.cpm = ContextToPhraseModel()
-
- def prob(self, pid, cid):
- p1 = self.pcm.prob(pid, cid)
- p2 = self.cpm.prob(pid, cid)
- return (p1 / sum(p1)) * (p2 / sum(p2))
-
- def expectation_maximisation_step(self):
- tagCountsGivenPhrase = zeros((num_phrases, num_tags))
- contextWordCounts = zeros((4, num_tags, num_types))
-
- tagCountsGivenContext = zeros((num_contexts, num_tags))
- phraseSingleCounts = zeros((num_tags, num_types))
- phraseLeftCounts = zeros((num_tags, num_types))
- phraseRightCounts = zeros((num_tags, num_types))
- phraseLength = zeros(num_types)
-
- kl = llh1 = llh2 = 0
- for pid, cid, cnt in edges:
- p1 = self.pcm.prob(pid, cid)
- llh1 += log(sum(p1)) * cnt
- p2 = self.cpm.prob(pid, cid)
- llh2 += log(sum(p2)) * cnt
-
- q = (p1 / sum(p1)) * (p2 / sum(p2))
- kl += log(sum(q)) * cnt
- qi = sqrt(q)
- qi /= sum(qi)
-
- phrase = phrase_type_list[pid]
- context = context_type_list[cid]
- for t in range(num_tags):
- tagCountsGivenPhrase[pid][t] += cnt * qi[t]
- tagCountsGivenContext[cid][t] += cnt * qi[t]
- phraseLength[t] += cnt * len(phrase) * qi[t]
- if len(phrase) == 1:
- phraseSingleCounts[t][phrase[0]] += cnt * qi[t]
- else:
- phraseLeftCounts[t][phrase[0]] += cnt * qi[t]
- phraseRightCounts[t][phrase[-1]] += cnt * qi[t]
- for i in range(4):
- contextWordCounts[i][t][context[i]] += cnt * qi[t]
-
- kl *= -2
-
- for t in range(num_tags):
- for i in range(4):
- self.pcm.contextWordDist[i][t] = normalise(contextWordCounts[i,t])
- self.cpm.phraseLengthDist[t] = min(max(sum(tagCountsGivenContext[:,t]) / phraseLength[t], 1e-6), 1-1e-6)
- self.cpm.phraseSingleDist[t] = normalise(phraseSingleCounts[t])
- self.cpm.phraseLeftDist[t] = normalise(phraseLeftCounts[t])
- self.cpm.phraseRightDist[t] = normalise(phraseRightCounts[t])
- for p in range(num_phrases):
- self.pcm.tagDist[p] = normalise(tagCountsGivenPhrase[p])
- for c in range(num_contexts):
- self.cpm.tagDist[c] = normalise(tagCountsGivenContext[c])
-
- # return the overall objective
- return llh1 + llh2 + kl
-
-class RegularisedProductModel:
- # as above, but with a slack regularisation term which kills the
- # closed-form solution for the E-step
-
- def __init__(self, epsilon):
- self.pcm = PhraseToContextModel()
- self.cpm = ContextToPhraseModel()
- self.epsilon = epsilon
- self.lamba = zeros(num_tags)
-
- def prob(self, pid, cid):
- p1 = self.pcm.prob(pid, cid)
- p2 = self.cpm.prob(pid, cid)
- return (p1 / sum(p1)) * (p2 / sum(p2))
-
- def dual(self, lamba):
- return self.logz(lamba) + self.epsilon * dot(lamba, lamba) ** 0.5
-
- def dual_gradient(self, lamba):
- return self.expected_features(lamba) + self.epsilon * 2 * lamba
-
- def expectation_maximisation_step(self):
- # PR-step: optimise lambda to minimise log(z_lambda) + eps ||lambda||_2
- self.lamba = scipy.optimize.fmin_slsqp(self.dual, self.lamba,
- bounds=[(0, 1e100)] * num_tags,
- fprime=self.dual_gradient, iprint=1)
-
- # E,M-steps: collect expected counts under q_lambda and normalise
- llh1 = self.pcm.expectation_maximisation_step(self.lamba)
- llh2 = self.cpm.expectation_maximisation_step(-self.lamba)
-
- # return the overall objective: llh - KL(q||p1.p2)
- # llh = llh1 + llh2
- # kl = sum q log q / p1 p2 = sum q { lambda . phi } - log Z
- return llh1 + llh2 + self.logz(self.lamba) \
- - dot(self.lamba, self.expected_features(self.lamba))
-
- def logz(self, lamba):
- lz = 0
- for pid, cid, cnt in edges:
- p1 = self.pcm.prob(pid, cid)
- z1 = dot(p1 / sum(p1), exp(lamba))
- lz += log(z1) * cnt
-
- p2 = self.cpm.prob(pid, cid)
- z2 = dot(p2 / sum(p2), exp(-lamba))
- lz += log(z2) * cnt
- return lz
-
- def expected_features(self, lamba):
- fs = zeros(num_tags)
- for pid, cid, cnt in edges:
- p1 = self.pcm.prob(pid, cid)
- q1 = (p1 / sum(p1)) * exp(lamba)
- fs += cnt * q1 / sum(q1)
-
- p2 = self.cpm.prob(pid, cid)
- q2 = (p2 / sum(p2)) * exp(-lamba)
- fs -= cnt * q2 / sum(q2)
- return fs
-
-
-class InterpolatedModel:
- def __init__(self, epsilon):
- self.pcm = PhraseToContextModel()
- self.cpm = ContextToPhraseModel()
- self.epsilon = epsilon
- self.lamba = zeros(num_tags)
-
- def prob(self, pid, cid):
- p1 = self.pcm.prob(pid, cid)
- p2 = self.cpm.prob(pid, cid)
- return (p1 + p2) / 2
-
- def dual(self, lamba):
- return self.logz(lamba) + self.epsilon * dot(lamba, lamba) ** 0.5
-
- def dual_gradient(self, lamba):
- return self.expected_features(lamba) + self.epsilon * 2 * lamba
-
- def expectation_maximisation_step(self):
- # PR-step: optimise lambda to minimise log(z_lambda) + eps ||lambda||_2
- self.lamba = scipy.optimize.fmin_slsqp(self.dual, self.lamba,
- bounds=[(0, 1e100)] * num_tags,
- fprime=self.dual_gradient, iprint=2)
-
- # E,M-steps: collect expected counts under q_lambda and normalise
- llh1 = self.pcm.expectation_maximisation_step(self.lamba)
- llh2 = self.cpm.expectation_maximisation_step(self.lamba)
-
- # return the overall objective: llh1 + llh2 - KL(q||p1.p2)
- # kl = sum_y q log q / 0.5 * (p1 + p2) = sum_y q(y) { -lambda . phi(y) } - log Z
- # = -log Z + lambda . (E_q1[-phi] + E_q2[-phi]) / 2
- kl = -self.logz(self.lamba) + dot(self.lamba, self.expected_features(self.lamba))
- return llh1 + llh2 - kl, llh1, llh2, kl
- # FIXME: KL comes out negative...
-
- def logz(self, lamba):
- lz = 0
- for pid, cid, cnt in edges:
- p1 = self.pcm.prob(pid, cid)
- q1 = p1 / sum(p1) * exp(-lamba)
- q1z = sum(q1)
-
- p2 = self.cpm.prob(pid, cid)
- q2 = p2 / sum(p2) * exp(-lamba)
- q2z = sum(q2)
-
- lz += log(0.5 * (q1z + q2z)) * cnt
- return lz
-
- # z = 1/2 * (sum_y p1(y|x) exp (-lambda . phi(y)) + sum_y p2(y|x) exp (-lambda . phi(y)))
- # = 1/2 (z1 + z2)
- # d (log z) / dlambda = 1/2 (E_q1 [ -phi ] + E_q2 [ -phi ] )
- def expected_features(self, lamba):
- fs = zeros(num_tags)
- for pid, cid, cnt in edges:
- p1 = self.pcm.prob(pid, cid)
- q1 = (p1 / sum(p1)) * exp(-lamba)
- fs -= 0.5 * cnt * q1 / sum(q1)
-
- p2 = self.cpm.prob(pid, cid)
- q2 = (p2 / sum(p2)) * exp(-lamba)
- fs -= 0.5 * cnt * q2 / sum(q2)
- return fs
-
-if style == 'p2c':
- m = PhraseToContextModel()
-elif style == 'c2p':
- m = ContextToPhraseModel()
-elif style == 'prod':
- m = ProductModel()
-elif style == 'prodslack':
- m = RegularisedProductModel(0.5)
-elif style == 'sum':
- m = InterpolatedModel(0.5)
-
-for iteration in range(30):
- obj = m.expectation_maximisation_step()
- print 'iteration', iteration, 'objective', obj
-
-for pid, cid, cnt in edges:
- p = m.prob(pid, cid)
- phrase = phrase_type_list[pid]
- phrase_str = ' '.join(map(word_type_list.__getitem__, phrase))
- context = context_type_list[cid]
- context_str = ' '.join(map(word_type_list.__getitem__, context))
- print '%s\t%s ||| C=%d' % (phrase_str, context_str, argmax(p))
diff --git a/gi/posterior-regularisation/train_pr_global.py b/gi/posterior-regularisation/train_pr_global.py
deleted file mode 100644
index 8521bccb..00000000
--- a/gi/posterior-regularisation/train_pr_global.py
+++ /dev/null
@@ -1,296 +0,0 @@
-import sys
-import scipy.optimize
-from numpy import *
-from numpy.random import random
-
-#
-# Step 1: load the concordance counts
-#
-
-edges_phrase_to_context = []
-edges_context_to_phrase = []
-types = {}
-context_types = {}
-num_edges = 0
-
-for line in sys.stdin:
- phrase, rest = line.strip().split('\t')
- parts = rest.split('|||')
- edges_phrase_to_context.append((phrase, []))
- for i in range(0, len(parts), 2):
- context, count = parts[i:i+2]
-
- ctx = tuple(filter(lambda x: x != '<PHRASE>', context.split()))
- cnt = int(count.strip()[2:])
- edges_phrase_to_context[-1][1].append((ctx, cnt))
-
- cid = context_types.get(ctx, len(context_types))
- if cid == len(context_types):
- context_types[ctx] = cid
- edges_context_to_phrase.append((ctx, []))
- edges_context_to_phrase[cid][1].append((phrase, cnt))
-
- for token in ctx:
- types.setdefault(token, len(types))
- for token in phrase.split():
- types.setdefault(token, len(types))
-
- num_edges += 1
-
-print 'Read in', num_edges, 'edges and', len(types), 'word types'
-
-print 'edges_phrase_to_context', edges_phrase_to_context
-
-#
-# Step 2: initialise the model parameters
-#
-
-num_tags = 10
-num_types = len(types)
-num_phrases = len(edges_phrase_to_context)
-num_contexts = len(edges_context_to_phrase)
-delta = int(sys.argv[1])
-gamma = int(sys.argv[2])
-
-def normalise(a):
- return a / float(sum(a))
-
-# Pr(tag | phrase)
-tagDist = [normalise(random(num_tags)+1) for p in range(num_phrases)]
-#tagDist = [normalise(array(range(1,num_tags+1))) for p in range(num_phrases)]
-# Pr(context at pos i = w | tag) indexed by i, tag, word
-#contextWordDist = [[normalise(array(range(1,num_types+1))) for t in range(num_tags)] for i in range(4)]
-contextWordDist = [[normalise(random(num_types)+1) for t in range(num_tags)] for i in range(4)]
-# PR langrange multipliers
-lamba = zeros(2 * num_edges * num_tags)
-omega_offset = num_edges * num_tags
-lamba_index = {}
-next = 0
-for phrase, ccs in edges_phrase_to_context:
- for context, count in ccs:
- lamba_index[phrase,context] = next
- next += num_tags
-#print lamba_index
-
-#
-# Step 3: expectation maximisation
-#
-
-for iteration in range(20):
- tagCounts = [zeros(num_tags) for p in range(num_phrases)]
- contextWordCounts = [[zeros(num_types) for t in range(num_tags)] for i in range(4)]
-
- #print 'tagDist', tagDist
- #print 'contextWordCounts[0][0]', contextWordCounts[0][0]
-
- # Tune lambda
- # dual: min log Z(lamba) s.t. lamba >= 0;
- # sum_c lamba_pct <= delta; sum_p lamba_pct <= gamma
- def dual(ls):
- logz = 0
- for p, (phrase, ccs) in enumerate(edges_phrase_to_context):
- for context, count in ccs:
- conditionals = zeros(num_tags)
- for t in range(num_tags):
- prob = tagDist[p][t]
- for i in range(4):
- prob *= contextWordDist[i][t][types[context[i]]]
- conditionals[t] = prob
- cz = sum(conditionals)
- conditionals /= cz
-
- #print 'dual', phrase, context, count, 'p =', conditionals
-
- local_z = 0
- for t in range(num_tags):
- li = lamba_index[phrase,context] + t
- local_z += conditionals[t] * exp(-ls[li] - ls[omega_offset+li])
- logz += log(local_z) * count
-
- #print 'ls', ls
- #print 'lambda', list(ls)
- #print 'dual', logz
- return logz
-
- def loglikelihood():
- llh = 0
- for p, (phrase, ccs) in enumerate(edges_phrase_to_context):
- for context, count in ccs:
- conditionals = zeros(num_tags)
- for t in range(num_tags):
- prob = tagDist[p][t]
- for i in range(4):
- prob *= contextWordDist[i][t][types[context[i]]]
- conditionals[t] = prob
- cz = sum(conditionals)
- llh += log(cz) * count
- return llh
-
- def primal(ls):
- # FIXME: returns negative values for KL (impossible)
- logz = dual(ls)
- expectations = -dual_deriv(ls)
- kl = -logz - dot(ls, expectations)
- llh = loglikelihood()
-
- pt_l1linf = 0
- for phrase, ccs in edges_phrase_to_context:
- for t in range(num_tags):
- best = -1e500
- for context, count in ccs:
- li = lamba_index[phrase,context] + t
- s = expectations[li]
- if s > best: best = s
- pt_l1linf += best
-
- ct_l1linf = 0
- for context, pcs in edges_context_to_phrase:
- for t in range(num_tags):
- best = -1e500
- for phrase, count in pcs:
- li = omega_offset + lamba_index[phrase,context] + t
- s = expectations[li]
- if s > best: best = s
- ct_l1linf += best
-
- return llh, kl, pt_l1linf, ct_l1linf, llh - kl - delta * pt_l1linf - gamma * ct_l1linf
-
- def dual_deriv(ls):
- # d/dl log(z) = E_q[phi]
- deriv = zeros(2 * num_edges * num_tags)
- for p, (phrase, ccs) in enumerate(edges_phrase_to_context):
- for context, count in ccs:
- conditionals = zeros(num_tags)
- for t in range(num_tags):
- prob = tagDist[p][t]
- for i in range(4):
- prob *= contextWordDist[i][t][types[context[i]]]
- conditionals[t] = prob
- cz = sum(conditionals)
- conditionals /= cz
-
- scores = zeros(num_tags)
- for t in range(num_tags):
- li = lamba_index[phrase,context] + t
- scores[t] = conditionals[t] * exp(-ls[li] - ls[omega_offset + li])
- local_z = sum(scores)
-
- #print 'ddual', phrase, context, count, 'q =', scores / local_z
-
- for t in range(num_tags):
- deriv[lamba_index[phrase,context] + t] -= count * scores[t] / local_z
- deriv[omega_offset + lamba_index[phrase,context] + t] -= count * scores[t] / local_z
-
- #print 'ddual', list(deriv)
- return deriv
-
- def constraints(ls):
- cons = zeros(num_phrases * num_tags + num_edges * num_tags)
-
- index = 0
- for phrase, ccs in edges_phrase_to_context:
- for t in range(num_tags):
- if delta > 0:
- total = delta
- for cprime, count in ccs:
- total -= ls[lamba_index[phrase, cprime] + t]
- cons[index] = total
- index += 1
-
- for context, pcs in edges_context_to_phrase:
- for t in range(num_tags):
- if gamma > 0:
- total = gamma
- for pprime, count in pcs:
- total -= ls[omega_offset + lamba_index[pprime, context] + t]
- cons[index] = total
- index += 1
-
- #print 'cons', cons
- return cons
-
- def constraints_deriv(ls):
- cons = zeros((num_phrases * num_tags + num_edges * num_tags, 2 * num_edges * num_tags))
-
- index = 0
- for phrase, ccs in edges_phrase_to_context:
- for t in range(num_tags):
- if delta > 0:
- d = cons[index,:]#zeros(num_edges * num_tags)
- for cprime, count in ccs:
- d[lamba_index[phrase, cprime] + t] = -1
- #cons[index] = d
- index += 1
-
- for context, pcs in edges_context_to_phrase:
- for t in range(num_tags):
- if gamma > 0:
- d = cons[index,:]#d = zeros(num_edges * num_tags)
- for pprime, count in pcs:
- d[omega_offset + lamba_index[pprime, context] + t] = -1
- #cons[index] = d
- index += 1
- #print 'dcons', cons
- return cons
-
- print 'Pre lambda optimisation dual', dual(lamba), 'primal', primal(lamba)
- #print 'lambda', lamba, lamba.shape
- #print 'bounds', [(0, max(delta, gamma))] * (2 * num_edges * num_tags)
-
- lamba = scipy.optimize.fmin_slsqp(dual, lamba,
- bounds=[(0, max(delta, gamma))] * (2 * num_edges * num_tags),
- f_ieqcons=constraints,
- fprime=dual_deriv,
- fprime_ieqcons=constraints_deriv,
- iprint=0)
- print 'Post lambda optimisation dual', dual(lamba), 'primal', primal(lamba)
-
- # E-step
- llh = log_z = 0
- for p, (phrase, ccs) in enumerate(edges_phrase_to_context):
- for context, count in ccs:
- conditionals = zeros(num_tags)
- for t in range(num_tags):
- prob = tagDist[p][t]
- for i in range(4):
- prob *= contextWordDist[i][t][types[context[i]]]
- conditionals[t] = prob
- cz = sum(conditionals)
- conditionals /= cz
- llh += log(cz) * count
-
- q = zeros(num_tags)
- li = lamba_index[phrase, context]
- for t in range(num_tags):
- q[t] = conditionals[t] * exp(-lamba[li + t] - lamba[omega_offset + li + t])
- qz = sum(q)
- log_z += count * log(qz)
-
- for t in range(num_tags):
- tagCounts[p][t] += count * q[t] / qz
-
- for i in range(4):
- for t in range(num_tags):
- contextWordCounts[i][t][types[context[i]]] += count * q[t] / qz
-
- print 'iteration', iteration, 'llh', llh, 'logz', log_z
-
- # M-step
- for p in range(num_phrases):
- tagDist[p] = normalise(tagCounts[p])
- for i in range(4):
- for t in range(num_tags):
- contextWordDist[i][t] = normalise(contextWordCounts[i][t])
-
-for p, (phrase, ccs) in enumerate(edges_phrase_to_context):
- for context, count in ccs:
- conditionals = zeros(num_tags)
- for t in range(num_tags):
- prob = tagDist[p][t]
- for i in range(4):
- prob *= contextWordDist[i][t][types[context[i]]]
- conditionals[t] = prob
- cz = sum(conditionals)
- conditionals /= cz
-
- print '%s\t%s ||| C=%d |||' % (phrase, context, argmax(conditionals)), conditionals
diff --git a/gi/posterior-regularisation/train_pr_parallel.py b/gi/posterior-regularisation/train_pr_parallel.py
deleted file mode 100644
index 3b9cefed..00000000
--- a/gi/posterior-regularisation/train_pr_parallel.py
+++ /dev/null
@@ -1,333 +0,0 @@
-import sys
-import scipy.optimize
-from numpy import *
-from numpy.random import random, seed
-
-#
-# Step 1: load the concordance counts
-#
-
-edges_phrase_to_context = []
-edges_context_to_phrase = []
-types = {}
-context_types = {}
-num_edges = 0
-
-for line in sys.stdin:
- phrase, rest = line.strip().split('\t')
- parts = rest.split('|||')
- edges_phrase_to_context.append((phrase, []))
- for i in range(0, len(parts), 2):
- context, count = parts[i:i+2]
-
- ctx = tuple(filter(lambda x: x != '<PHRASE>', context.split()))
- cnt = int(count.strip()[2:])
- edges_phrase_to_context[-1][1].append((ctx, cnt))
-
- cid = context_types.get(ctx, len(context_types))
- if cid == len(context_types):
- context_types[ctx] = cid
- edges_context_to_phrase.append((ctx, []))
- edges_context_to_phrase[cid][1].append((phrase, cnt))
-
- for token in ctx:
- types.setdefault(token, len(types))
- for token in phrase.split():
- types.setdefault(token, len(types))
-
- num_edges += 1
-
-#
-# Step 2: initialise the model parameters
-#
-
-num_tags = 25
-num_types = len(types)
-num_phrases = len(edges_phrase_to_context)
-num_contexts = len(edges_context_to_phrase)
-delta = float(sys.argv[1])
-assert sys.argv[2] in ('local', 'global')
-local = sys.argv[2] == 'local'
-if len(sys.argv) >= 2:
- seed(int(sys.argv[3]))
-
-print 'Read in', num_edges, 'edges', num_phrases, 'phrases', num_contexts, 'contexts and', len(types), 'word types'
-
-def normalise(a):
- return a / float(sum(a))
-
-# Pr(tag | phrase)
-tagDist = [normalise(random(num_tags)+1) for p in range(num_phrases)]
-# Pr(context at pos i = w | tag) indexed by i, tag, word
-contextWordDist = [[normalise(random(num_types)+1) for t in range(num_tags)] for i in range(4)]
-
-#
-# Step 3: expectation maximisation
-#
-
-class GlobalDualObjective:
- """
- Objective, log(z), for all phrases s.t. lambda >= 0, sum_c lambda_pct <= scale
- """
-
- def __init__(self, scale):
- self.scale = scale
- self.posterior = zeros((num_edges, num_tags))
- self.q = zeros((num_edges, num_tags))
- self.llh = 0
-
- index = 0
- for j, (phrase, edges) in enumerate(edges_phrase_to_context):
- for context, count in edges:
- for t in range(num_tags):
- prob = tagDist[j][t]
- for k, token in enumerate(context):
- prob *= contextWordDist[k][t][types[token]]
- self.posterior[index,t] = prob
- z = sum(self.posterior[index,:])
- self.posterior[index,:] /= z
- self.llh += log(z) * count
- index += 1
-
- def objective(self, ls):
- ls = ls.reshape((num_edges, num_tags))
- logz = 0
-
- index = 0
- for j, (phrase, edges) in enumerate(edges_phrase_to_context):
- for context, count in edges:
- for t in range(num_tags):
- self.q[index,t] = self.posterior[index,t] * exp(-ls[index,t])
- local_z = sum(self.q[index,:])
- self.q[index,:] /= local_z
- logz += log(local_z) * count
- index += 1
-
- return logz
-
- # FIXME: recomputes q many more times than necessary
-
- def gradient(self, ls):
- ls = ls.reshape((num_edges, num_tags))
- gradient = zeros((num_edges, num_tags))
-
- index = 0
- for j, (phrase, edges) in enumerate(edges_phrase_to_context):
- for context, count in edges:
- for t in range(num_tags):
- self.q[index,t] = self.posterior[index,t] * exp(-ls[index,t])
- local_z = sum(self.q[index,:])
- self.q[index,:] /= local_z
- for t in range(num_tags):
- gradient[index,t] -= self.q[index,t] * count
- index += 1
-
- return gradient.ravel()
-
- def constraints(self, ls):
- ls = ls.reshape((num_edges, num_tags))
- cons = ones((num_phrases, num_tags)) * self.scale
- index = 0
- for j, (phrase, edges) in enumerate(edges_phrase_to_context):
- for i, (context, count) in enumerate(edges):
- for t in range(num_tags):
- cons[j,t] -= ls[index,t] * count
- index += 1
- return cons.ravel()
-
- def constraints_gradient(self, ls):
- ls = ls.reshape((num_edges, num_tags))
- gradient = zeros((num_phrases, num_tags, num_edges, num_tags))
- index = 0
- for j, (phrase, edges) in enumerate(edges_phrase_to_context):
- for i, (context, count) in enumerate(edges):
- for t in range(num_tags):
- gradient[j,t,index,t] -= count
- index += 1
- return gradient.reshape((num_phrases*num_tags, num_edges*num_tags))
-
- def optimize(self):
- ls = zeros(num_edges * num_tags)
- #print '\tpre lambda optimisation dual', self.objective(ls) #, 'primal', primal(lamba)
- ls = scipy.optimize.fmin_slsqp(self.objective, ls,
- bounds=[(0, self.scale)] * num_edges * num_tags,
- f_ieqcons=self.constraints,
- fprime=self.gradient,
- fprime_ieqcons=self.constraints_gradient,
- iprint=0) # =2 for verbose
- #print '\tpost lambda optimisation dual', self.objective(ls) #, 'primal', primal(lamba)
-
- # returns llh, kl and l1lmax contribution
- l1lmax = 0
- index = 0
- for j, (phrase, edges) in enumerate(edges_phrase_to_context):
- for t in range(num_tags):
- lmax = None
- for i, (context, count) in enumerate(edges):
- lmax = max(lmax, self.q[index+i,t])
- l1lmax += lmax
- index += len(edges)
-
- return self.llh, -self.objective(ls) + dot(ls, self.gradient(ls)), l1lmax
-
-class LocalDualObjective:
- """
- Local part of objective, log(z) relevant to lambda_p**.
- Optimised subject to lambda >= 0, sum_c lambda_pct <= scale forall t
- """
-
- def __init__(self, phraseId, scale):
- self.phraseId = phraseId
- self.scale = scale
- edges = edges_phrase_to_context[self.phraseId][1]
- self.posterior = zeros((len(edges), num_tags))
- self.q = zeros((len(edges), num_tags))
- self.llh = 0
-
- for i, (context, count) in enumerate(edges):
- for t in range(num_tags):
- prob = tagDist[phraseId][t]
- for j, token in enumerate(context):
- prob *= contextWordDist[j][t][types[token]]
- self.posterior[i,t] = prob
- z = sum(self.posterior[i,:])
- self.posterior[i,:] /= z
- self.llh += log(z) * count
-
- def objective(self, ls):
- edges = edges_phrase_to_context[self.phraseId][1]
- ls = ls.reshape((len(edges), num_tags))
- logz = 0
-
- for i, (context, count) in enumerate(edges):
- for t in range(num_tags):
- self.q[i,t] = self.posterior[i,t] * exp(-ls[i,t])
- local_z = sum(self.q[i,:])
- self.q[i,:] /= local_z
- logz += log(local_z) * count
-
- return logz
-
- # FIXME: recomputes q many more times than necessary
-
- def gradient(self, ls):
- edges = edges_phrase_to_context[self.phraseId][1]
- ls = ls.reshape((len(edges), num_tags))
- gradient = zeros((len(edges), num_tags))
-
- for i, (context, count) in enumerate(edges):
- for t in range(num_tags):
- self.q[i,t] = self.posterior[i,t] * exp(-ls[i,t])
- local_z = sum(self.q[i,:])
- self.q[i,:] /= local_z
- for t in range(num_tags):
- gradient[i,t] -= self.q[i,t] * count
-
- return gradient.ravel()
-
- def constraints(self, ls):
- edges = edges_phrase_to_context[self.phraseId][1]
- ls = ls.reshape((len(edges), num_tags))
- cons = ones(num_tags) * self.scale
- for t in range(num_tags):
- for i, (context, count) in enumerate(edges):
- cons[t] -= ls[i,t] * count
- return cons
-
- def constraints_gradient(self, ls):
- edges = edges_phrase_to_context[self.phraseId][1]
- ls = ls.reshape((len(edges), num_tags))
- gradient = zeros((num_tags, len(edges), num_tags))
- for t in range(num_tags):
- for i, (context, count) in enumerate(edges):
- gradient[t,i,t] -= count
- return gradient.reshape((num_tags, len(edges)*num_tags))
-
- def optimize(self, ls=None):
- edges = edges_phrase_to_context[self.phraseId][1]
- if ls == None:
- ls = zeros(len(edges) * num_tags)
- #print '\tpre lambda optimisation dual', self.objective(ls) #, 'primal', primal(lamba)
- ls = scipy.optimize.fmin_slsqp(self.objective, ls,
- bounds=[(0, self.scale)] * len(edges) * num_tags,
- f_ieqcons=self.constraints,
- fprime=self.gradient,
- fprime_ieqcons=self.constraints_gradient,
- iprint=0) # =2 for verbose
- #print '\tlambda', list(ls)
- #print '\tpost lambda optimisation dual', self.objective(ls) #, 'primal', primal(lamba)
-
- # returns llh, kl and l1lmax contribution
- l1lmax = 0
- for t in range(num_tags):
- lmax = None
- for i, (context, count) in enumerate(edges):
- lmax = max(lmax, self.q[i,t])
- l1lmax += lmax
-
- return self.llh, -self.objective(ls) + dot(ls, self.gradient(ls)), l1lmax, ls
-
-ls = [None] * num_phrases
-for iteration in range(20):
- tagCounts = [zeros(num_tags) for p in range(num_phrases)]
- contextWordCounts = [[zeros(num_types) for t in range(num_tags)] for i in range(4)]
-
- # E-step
- llh = kl = l1lmax = 0
- if local:
- for p in range(num_phrases):
- o = LocalDualObjective(p, delta)
- #print '\toptimising lambda for phrase', p, '=', edges_phrase_to_context[p][0]
- #print '\toptimising lambda for phrase', p, 'ls', ls[p]
- obj = o.optimize(ls[p])
- #print '\tphrase', p, 'deltas', obj
- llh += obj[0]
- kl += obj[1]
- l1lmax += obj[2]
- ls[p] = obj[3]
-
- edges = edges_phrase_to_context[p][1]
- for j, (context, count) in enumerate(edges):
- for t in range(num_tags):
- tagCounts[p][t] += count * o.q[j,t]
- for i in range(4):
- for t in range(num_tags):
- contextWordCounts[i][t][types[context[i]]] += count * o.q[j,t]
-
- #print 'iteration', iteration, 'LOCAL objective', (llh + kl + delta * l1lmax), 'llh', llh, 'kl', kl, 'l1lmax', l1lmax
- else:
- o = GlobalDualObjective(delta)
- obj = o.optimize()
- llh, kl, l1lmax = o.optimize()
-
- index = 0
- for p, (phrase, edges) in enumerate(edges_phrase_to_context):
- for context, count in edges:
- for t in range(num_tags):
- tagCounts[p][t] += count * o.q[index,t]
- for i in range(4):
- for t in range(num_tags):
- contextWordCounts[i][t][types[context[i]]] += count * o.q[index,t]
- index += 1
-
- print 'iteration', iteration, 'objective', (llh - kl - delta * l1lmax), 'llh', llh, 'kl', kl, 'l1lmax', l1lmax
-
- # M-step
- for p in range(num_phrases):
- tagDist[p] = normalise(tagCounts[p])
- for i in range(4):
- for t in range(num_tags):
- contextWordDist[i][t] = normalise(contextWordCounts[i][t])
-
-for p, (phrase, ccs) in enumerate(edges_phrase_to_context):
- for context, count in ccs:
- conditionals = zeros(num_tags)
- for t in range(num_tags):
- prob = tagDist[p][t]
- for i in range(4):
- prob *= contextWordDist[i][t][types[context[i]]]
- conditionals[t] = prob
- cz = sum(conditionals)
- conditionals /= cz
-
- print '%s\t%s ||| C=%d |||' % (phrase, context, argmax(conditionals)), conditionals
diff --git a/gi/pyp-topics/scripts/contexts2documents.py b/gi/pyp-topics/scripts/contexts2documents.py
deleted file mode 100755
index 9be4ebbb..00000000
--- a/gi/pyp-topics/scripts/contexts2documents.py
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/usr/bin/python
-
-import sys
-from operator import itemgetter
-
-if len(sys.argv) > 3:
- print "Usage: contexts2documents.py [contexts_index_out] [phrases_index_out]"
- exit(1)
-
-context_index = {}
-phrase_index = {}
-for line in sys.stdin:
- phrase, line_tail = line.split('\t')
-
- raw_contexts = line_tail.split('|||')
- contexts = [c.strip() for x,c in enumerate(raw_contexts) if x%2 == 0]
- counts = [int(c.split('=')[1].strip()) for x,c in enumerate(raw_contexts) if x%2 != 0]
- phrase_index.setdefault(phrase, len(phrase_index))
- print len(contexts),
- for context,count in zip(contexts,counts):
- c = context_index.setdefault(context, len(context_index))
- print "%d:%d" % (c,count),
- print
-if 1 < len(sys.argv) < 4:
- contexts_out = open(sys.argv[1],'w')
- contexts = context_index.items()
- contexts.sort(key = itemgetter(1))
- for context in contexts:
- print >>contexts_out, context[0]
- contexts_out.close()
-if len(sys.argv) == 3:
- phrases_out = open(sys.argv[2],'w')
- phrases = phrase_index.items()
- phrases.sort(key = itemgetter(1))
- for phrase in phrases:
- print >>phrases_out, phrase[0]
- phrases_out.close()
diff --git a/gi/pyp-topics/scripts/extract_contexts.py b/gi/pyp-topics/scripts/extract_contexts.py
deleted file mode 100755
index b2723f2a..00000000
--- a/gi/pyp-topics/scripts/extract_contexts.py
+++ /dev/null
@@ -1,144 +0,0 @@
-#!/usr/bin/python
-
-import sys,collections
-
-def extract_backoff(context_list, order):
- assert len(context_list) == (2*order)
- backoffs = []
- for i in range(1,order+1):
- if i == order:
- backoffs.append(([context_list[i-1]+"|"], ["|"+context_list[i]]))
- else:
- right_limit = 2*order-i
- core = context_list[i:right_limit]
- left = [context_list[i-1]+"|"*(order-i+1)]
- right = ["|"*(order-i+1)+context_list[right_limit]]
- backoffs.append((core, left, right))
-# print context_list, backoffs
- return backoffs
-
-def tuple_to_str(t):
- s=""
- for i,x in enumerate(t):
- if i > 0: s += "|"
- s += str(x)
- return s
-
-if len(sys.argv) < 3:
- print "Usage: extract-contexts.py output_filename order cutoff lowercase"
- exit(1)
-
-output_filename = sys.argv[1]
-order = int(sys.argv[2])
-cutoff = 0
-if len(sys.argv) > 3:
- cutoff = int(sys.argv[3])
-lowercase = False
-if len(sys.argv) > 4:
- lowercase = bool(sys.argv[4])
-
-contexts_dict={}
-contexts_list=[]
-contexts_freq=collections.defaultdict(int)
-contexts_backoff={}
-
-token_dict={}
-token_list=[]
-documents_dict=collections.defaultdict(dict)
-
-contexts_at_order = [i for i in range(order+1)]
-
-prefix = ["<s%d>|<s>"%i for i in range(order)]
-suffix = ["</s%d>|</s>"%i for i in range(order)]
-
-for line in sys.stdin:
- tokens = list(prefix)
- tokens.extend(line.split())
- tokens.extend(suffix)
- if lowercase:
- tokens = map(lambda x: x.lower(), tokens)
-
- for i in range(order, len(tokens)-order):
- context_list = []
- term=""
- for j in range(i-order, i+order+1):
- token,tag = tokens[j].rsplit('|',2)
- if j != i:
- context_list.append(token)
- else:
- if token not in token_dict:
- token_dict[token] = len(token_dict)
- token_list.append(token)
- term = token_dict[token]
-
- context = tuple_to_str(tuple(context_list))
-
- if context not in contexts_dict:
- context_index = len(contexts_dict)
- contexts_dict[context] = context_index
- contexts_list.append(context)
- contexts_at_order[0] += 1
-
- # handle backoff
- backoff_contexts = extract_backoff(context_list, order)
- bo_indexes=[(context_index,)]
-# bo_indexes=[(context,)]
- for i,bo in enumerate(backoff_contexts):
- factor_indexes=[]
- for factor in bo:
- bo_tuple = tuple_to_str(tuple(factor))
- if bo_tuple not in contexts_dict:
- contexts_dict[bo_tuple] = len(contexts_dict)
- contexts_list.append(bo_tuple)
- contexts_at_order[i+1] += 1
-# factor_indexes.append(bo_tuple)
- factor_indexes.append(contexts_dict[bo_tuple])
- bo_indexes.append(tuple(factor_indexes))
-
- for i in range(len(bo_indexes)-1):
- contexts_backoff[bo_indexes[i][0]] = bo_indexes[i+1]
-
- context_index = contexts_dict[context]
- contexts_freq[context_index] += 1
-
- if context_index not in documents_dict[term]:
- documents_dict[term][context_index] = 1
- else:
- documents_dict[term][context_index] += 1
-
-term_file = open(output_filename+".terms",'w')
-for t in token_list: print >>term_file, t
-term_file.close()
-
-contexts_file = open(output_filename+".contexts",'w')
-for c in contexts_list:
- print >>contexts_file, c
-contexts_file.close()
-
-data_file = open(output_filename+".data",'w')
-for t in range(len(token_list)):
- line=""
- num_active=0
- for c in documents_dict[t]:
- count = documents_dict[t][c]
- if contexts_freq[c] >= cutoff:
- line += (' ' + str(c) + ':' + str(count))
- num_active += 1
- if num_active > 0:
- print >>data_file, "%d%s" % (num_active,line)
-data_file.close()
-
-contexts_backoff_file = open(output_filename+".contexts_backoff",'w')
-print >>contexts_backoff_file, len(contexts_list), order,
-#for x in contexts_at_order:
-# print >>contexts_backoff_file, x,
-#print >>contexts_backoff_file
-for x in range(order-1):
- print >>contexts_backoff_file, 3,
-print >>contexts_backoff_file, 2
-
-for x in contexts_backoff:
- print >>contexts_backoff_file, x,
- for y in contexts_backoff[x]: print >>contexts_backoff_file, y,
- print >>contexts_backoff_file
-contexts_backoff_file.close()
diff --git a/gi/pyp-topics/scripts/extract_contexts_test.py b/gi/pyp-topics/scripts/extract_contexts_test.py
deleted file mode 100755
index 693b6e0b..00000000
--- a/gi/pyp-topics/scripts/extract_contexts_test.py
+++ /dev/null
@@ -1,72 +0,0 @@
-#!/usr/bin/python
-
-import sys,collections
-
-def tuple_to_str(t):
- s=""
- for i,x in enumerate(t):
- if i > 0: s += "|"
- s += str(x)
- return s
-
-if len(sys.argv) < 5:
- print "Usage: extract-contexts_test.py output_filename vocab contexts order lowercase"
- exit(1)
-
-output_filename = sys.argv[1]
-output = open(output_filename+".test_data",'w')
-
-unk_term="-UNK-"
-vocab_dict={}
-for i,x in enumerate(file(sys.argv[2], 'r').readlines()):
- vocab_dict[x.strip()]=i
-
-contexts_dict={}
-contexts_list=[]
-for i,x in enumerate(file(sys.argv[3], 'r').readlines()):
- contexts_dict[x.strip()]=i
- contexts_list.append(x.strip())
-
-order = int(sys.argv[4])
-
-lowercase = False
-if len(sys.argv) > 5:
- lowercase = bool(sys.argv[5])
-if lowercase: unk_term = unk_term.lower()
-
-prefix = ["<s%d>|<s>"%i for i in range(order)]
-suffix = ["</s%d>|</s>"%i for i in range(order)]
-
-assert unk_term in vocab_dict
-for line in sys.stdin:
- tokens = list(prefix)
- tokens.extend(line.split())
- tokens.extend(suffix)
- if lowercase:
- tokens = map(lambda x: x.lower(), tokens)
-
- for i in range(order, len(tokens)-order):
- context_list=[]
- term=""
- for j in range(i-order, i+order+1):
- token,tag = tokens[j].rsplit('|',2)
- if j != i:
- context_list.append(token)
- else:
- if token not in vocab_dict:
- term = vocab_dict[unk_term]
- else:
- term = vocab_dict[token]
- context = tuple_to_str(context_list)
- if context not in contexts_dict:
- contexts_dict[context] = len(contexts_dict)
- contexts_list.append(context)
- context_index = contexts_dict[context]
- print >>output, "%d:%d" % (term,context_index),
- print >>output
-output.close()
-
-contexts_file = open(output_filename+".test_contexts",'w')
-for c in contexts_list:
- print >>contexts_file, c
-contexts_file.close()
diff --git a/gi/pyp-topics/scripts/extract_leaves.py b/gi/pyp-topics/scripts/extract_leaves.py
deleted file mode 100755
index 14783b36..00000000
--- a/gi/pyp-topics/scripts/extract_leaves.py
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/usr/bin/python
-
-import nltk
-import nltk.probability
-import sys
-import getopt
-
-lexicalise=False
-rm_traces=False
-cutoff=100
-length_cutoff=10000
-try:
- opts, args = getopt.getopt(sys.argv[1:], "hs:c:l", ["help", "lexicalise", "cutoff","sentence-length","remove-traces"])
-except getopt.GetoptError:
- print "Usage: extract_leaves.py [-lsc]"
- sys.exit(2)
-for opt, arg in opts:
- if opt in ("-h", "--help"):
- print "Usage: extract_leaves.py [-lsc]"
- sys.exit()
- elif opt in ("-l", "--lexicalise"):
- lexicalise = True
- elif opt in ("-c", "--cutoff"):
- cutoff = int(arg)
- elif opt in ("-s", "--sentence-length"):
- length_cutoff = int(arg)
- elif opt in ("--remove-traces"):
- rm_traces = True
-
-token_freq = nltk.probability.FreqDist()
-lines = []
-for line in sys.stdin:
- t = nltk.Tree.parse(line)
- pos = t.pos()
- if len(pos) <= length_cutoff:
- lines.append(pos)
- for token, tag in pos:
- token_freq.inc(token)
-
-for line in lines:
- for token,tag in line:
- if not (rm_traces and tag == "-NONE-"):
- if lexicalise:
- if token_freq[token] < cutoff:
- token = '-UNK-'
- print '%s|%s' % (token,tag),
- else:
- print '%s' % tag,
- print
diff --git a/gi/pyp-topics/scripts/map-documents.py b/gi/pyp-topics/scripts/map-documents.py
deleted file mode 100755
index 703de312..00000000
--- a/gi/pyp-topics/scripts/map-documents.py
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/usr/bin/python
-
-import sys
-
-if len(sys.argv) != 2:
- print "Usage: map-documents.py vocab-file"
- exit(1)
-
-vocab = file(sys.argv[1], 'r').readlines()
-term_dict = map(lambda x: x.strip(), vocab)
-
-for line in sys.stdin:
- tokens = line.split()
- for token in tokens:
- elements = token.split(':')
- if len(elements) == 1:
- print "%s" % (term_dict[int(elements[0])]),
- else:
- print "%s:%s" % (term_dict[int(elements[0])], elements[1]),
- print
diff --git a/gi/pyp-topics/scripts/map-terms.py b/gi/pyp-topics/scripts/map-terms.py
deleted file mode 100755
index eb0298d7..00000000
--- a/gi/pyp-topics/scripts/map-terms.py
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/usr/bin/python
-
-import sys
-
-if len(sys.argv) != 2:
- print "Usage: map-terms.py vocab-file"
- exit(1)
-
-vocab = file(sys.argv[1], 'r').readlines()
-term_dict = map(lambda x: x.strip().replace(' ','_'), vocab)
-
-for line in sys.stdin:
- tokens = line.split()
- for token in tokens:
- elements = token.split(':')
- if len(elements) == 1:
- print "%s" % (term_dict[int(elements[0])]),
- else:
- print "%s:%s" % (term_dict[int(elements[0])], elements[1]),
- print
diff --git a/gi/pyp-topics/scripts/run.sh b/gi/pyp-topics/scripts/run.sh
deleted file mode 100644
index 19e625b1..00000000
--- a/gi/pyp-topics/scripts/run.sh
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/bin/sh
-
-
-./simple-extract-context.sh ~/workspace/clsp2010/jhuws2010/data/btec/split.zh-en.al 1 | ~/workspace/pyp-topics/scripts/contexts2documents.py > split.zh-en.data
-
-~/workspace/pyp-topics/bin/pyp-topics-train -d split.zh-en.data -t 50 -s 100 -o split.zh-en.documents.gz -w split.zh-en.topics.gz
-gunzip split.zh-en.documents.gz
-
-~/workspace/cdec/extools/extractor -i ../jhuws2010/data/btec/split.zh-en.al -S 1 -c 500000 -L 12 --base_phrase_spans | ~/workspace/pyp-topics/scripts/spans2labels.py split.zh-en.phrases split.zh-en.contexts split.zh-en.documents > corpus.zh-en.labelled_spans
-
-paste -d " " ~/workspace/clsp2010/jhuws2010/data/btec/split.zh-en.al corpus.labelled_spans > split.zh-en.labelled_spans
-
-./simple-extract.sh ~/workspace/clsp2010/scratch/split.zh-en.labelled_spans
diff --git a/gi/pyp-topics/scripts/score-mkcls.py b/gi/pyp-topics/scripts/score-mkcls.py
deleted file mode 100755
index 6bd33fc5..00000000
--- a/gi/pyp-topics/scripts/score-mkcls.py
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/usr/bin/python
-
-import sys
-from collections import defaultdict
-
-def dict_max(d):
- max_val=-1
- max_key=None
- for k in d:
- if d[k] > max_val:
- max_val = d[k]
- max_key = k
- assert max_key
- return max_key
-
-if len(sys.argv) != 3:
- print "Usage: score-mkcls.py gold classes"
- exit(1)
-
-gold_file=open(sys.argv[1],'r')
-
-term_to_topics = {}
-for line in open(sys.argv[2],'r'):
- term,cls = line.split()
- term_to_topics[term] = cls
-
-gold_to_topics = defaultdict(dict)
-topics_to_gold = defaultdict(dict)
-
-for gold_line in gold_file:
- gold_tokens = gold_line.split()
- for gold_token in gold_tokens:
- gold_term,gold_tag = gold_token.rsplit('|',1)
- pred_token = term_to_topics[gold_term]
- gold_to_topics[gold_tag][pred_token] \
- = gold_to_topics[gold_tag].get(pred_token, 0) + 1
- topics_to_gold[pred_token][gold_tag] \
- = topics_to_gold[pred_token].get(gold_tag, 0) + 1
-
-pred=0
-correct=0
-gold_file=open(sys.argv[1],'r')
-for gold_line in gold_file:
- gold_tokens = gold_line.split()
-
- for gold_token in gold_tokens:
- gold_term,gold_tag = gold_token.rsplit('|',1)
- pred_token = term_to_topics[gold_term]
- print "%s|%s|%s" % (gold_token, pred_token, dict_max(topics_to_gold[pred_token])),
- pred += 1
- if gold_tag == dict_max(topics_to_gold[pred_token]):
- correct += 1
- print
-print >>sys.stderr, "Many-to-One Accuracy = %f" % (float(correct) / pred)
-#for x in gold_to_topics:
-# print x,dict_max(gold_to_topics[x])
-#print "###################################################"
-#for x in range(len(topics_to_gold)):
-# print x,dict_max(topics_to_gold[str(x)])
-# print x,topics_to_gold[str(x)]
-#print term_to_topics
diff --git a/gi/pyp-topics/scripts/score-topics.py b/gi/pyp-topics/scripts/score-topics.py
deleted file mode 100755
index 1d8a1fcd..00000000
--- a/gi/pyp-topics/scripts/score-topics.py
+++ /dev/null
@@ -1,64 +0,0 @@
-#!/usr/bin/python
-
-import sys
-from collections import defaultdict
-
-def dict_max(d):
- max_val=-1
- max_key=None
- for k in d:
- if d[k] > max_val:
- max_val = d[k]
- max_key = k
- assert max_key
- return max_key
-
-if len(sys.argv) != 3:
- print "Usage: score-topics.py gold pred"
- exit(1)
-
-gold_file=open(sys.argv[1],'r')
-pred_file=open(sys.argv[2],'r')
-
-gold_to_topics = defaultdict(dict)
-topics_to_gold = defaultdict(dict)
-term_to_topics = defaultdict(dict)
-
-for gold_line,pred_line in zip(gold_file,pred_file):
- gold_tokens = gold_line.split()
- pred_tokens = pred_line.split()
- assert len(gold_tokens) == len(pred_tokens)
-
- for gold_token,pred_token in zip(gold_tokens,pred_tokens):
- gold_term,gold_tag = gold_token.rsplit('|',1)
- gold_to_topics[gold_tag][pred_token] \
- = gold_to_topics[gold_tag].get(pred_token, 0) + 1
- term_to_topics[gold_term][pred_token] \
- = term_to_topics[gold_term].get(pred_token, 0) + 1
- topics_to_gold[pred_token][gold_tag] \
- = topics_to_gold[pred_token].get(gold_tag, 0) + 1
-
-pred=0
-correct=0
-gold_file=open(sys.argv[1],'r')
-pred_file=open(sys.argv[2],'r')
-for gold_line,pred_line in zip(gold_file,pred_file):
- gold_tokens = gold_line.split()
- pred_tokens = pred_line.split()
-
- for gold_token,pred_token in zip(gold_tokens,pred_tokens):
- gold_term,gold_tag = gold_token.rsplit('|',1)
-# print "%s|%s" % (gold_token, dict_max(gold_to_topics[gold_tag])),
- print "%s|%s|%s" % (gold_token, pred_token, dict_max(topics_to_gold[pred_token])),
- pred += 1
- if gold_tag == dict_max(topics_to_gold[pred_token]):
- correct += 1
- print
-print >>sys.stderr, "Many-to-One Accuracy = %f" % (float(correct) / pred)
-#for x in gold_to_topics:
-# print x,dict_max(gold_to_topics[x])
-#print "###################################################"
-#for x in range(len(topics_to_gold)):
-# print x,dict_max(topics_to_gold[str(x)])
-# print x,topics_to_gold[str(x)]
-#print term_to_topics
diff --git a/gi/pyp-topics/scripts/spans2labels.py b/gi/pyp-topics/scripts/spans2labels.py
deleted file mode 100755
index 50fa8106..00000000
--- a/gi/pyp-topics/scripts/spans2labels.py
+++ /dev/null
@@ -1,137 +0,0 @@
-#!/usr/bin/python
-
-import sys
-from operator import itemgetter
-
-if len(sys.argv) <= 2:
- print "Usage: spans2labels.py phrase_context_index [order] [threshold] [languages={s,t,b}{s,t,b}] [type={tag,tok,both},{tag,tok,both}]"
- exit(1)
-
-order=1
-threshold = 0
-cutoff_cat = "<UNK>"
-if len(sys.argv) > 2:
- order = int(sys.argv[2])
-if len(sys.argv) > 3:
- threshold = float(sys.argv[3])
-phr=ctx='t'
-if len(sys.argv) > 4:
- phr, ctx = sys.argv[4]
- assert phr in 'stb'
- assert ctx in 'stb'
-phr_typ = ctx_typ = 'both'
-if len(sys.argv) > 5:
- phr_typ, ctx_typ = sys.argv[5].split(',')
- assert phr_typ in ('tag', 'tok', 'both')
- assert ctx_typ in ('tag', 'tok', 'both')
-
-#print >>sys.stderr, "Loading phrase index"
-phrase_context_index = {}
-for line in file(sys.argv[1], 'r'):
- phrase,tail= line.split('\t')
- contexts = tail.split(" ||| ")
- try: # remove Phil's bizarre integer pair
- x,y = contexts[0].split()
- x=int(x); y=int(y)
- contexts = contexts[1:]
- except:
- pass
- if len(contexts) == 1: continue
- assert len(contexts) % 2 == 0
- for i in range(0, len(contexts), 2):
- #parse contexts[i+1] = " C=1 P=0.8 ... "
- features=dict([ keyval.split('=') for keyval in contexts[i+1].split()])
- category = features['C']
- if features.has_key('P') and float(features['P']) < threshold:
- category = cutoff_cat
-
- phrase_context_index[(phrase,contexts[i])] = category
- #print (phrase,contexts[i]), category
-
-#print >>sys.stderr, "Labelling spans"
-for line in sys.stdin:
- #print >>sys.stderr, "line", line.strip()
- line_segments = line.split(' ||| ')
- assert len(line_segments) >= 3
- source = ['<s>' for x in range(order)] + line_segments[0].split() + ['</s>' for x in range(order)]
- target = ['<s>' for x in range(order)] + line_segments[1].split() + ['</s>' for x in range(order)]
- phrases = [ [int(i) for i in x.split('-')] for x in line_segments[2].split()]
-
- if phr_typ != 'both' or ctx_typ != 'both':
- if phr in 'tb' or ctx in 'tb':
- target_toks = ['<s>' for x in range(order)] + map(lambda x: x.rsplit('_', 1)[0], line_segments[1].split()) + ['</s>' for x in range(order)]
- target_tags = ['<s>' for x in range(order)] + map(lambda x: x.rsplit('_', 1)[-1], line_segments[1].split()) + ['</s>' for x in range(order)]
-
- if phr in 'tb':
- if phr_typ == 'tok':
- targetP = target_toks
- elif phr_typ == 'tag':
- targetP = target_tags
- if ctx in 'tb':
- if ctx_typ == 'tok':
- targetC = target_toks
- elif ctx_typ == 'tag':
- targetC = target_tags
-
- if phr in 'sb' or ctx in 'sb':
- source_toks = ['<s>' for x in range(order)] + map(lambda x: x.rsplit('_', 1)[0], line_segments[0].split()) + ['</s>' for x in range(order)]
- source_tags = ['<s>' for x in range(order)] + map(lambda x: x.rsplit('_', 1)[-1], line_segments[0].split()) + ['</s>' for x in range(order)]
-
- if phr in 'sb':
- if phr_typ == 'tok':
- sourceP = source_toks
- elif phr_typ == 'tag':
- sourceP = source_tags
- if ctx in 'sb':
- if ctx_typ == 'tok':
- sourceC = source_toks
- elif ctx_typ == 'tag':
- sourceC = source_tags
- else:
- sourceP = sourceC = source
- targetP = targetC = target
-
- #print >>sys.stderr, "line", source, '---', target, 'phrases', phrases
-
- print "|||",
-
- for s1,s2,t1,t2 in phrases:
- s1 += order
- s2 += order
- t1 += order
- t2 += order
-
- phraset = phrases = contextt = contexts = ''
- if phr in 'tb':
- phraset = reduce(lambda x, y: x+y+" ", targetP[t1:t2], "").strip()
- if phr in 'sb':
- phrases = reduce(lambda x, y: x+y+" ", sourceP[s1:s2], "").strip()
-
- if ctx in 'tb':
- left_context = reduce(lambda x, y: x+y+" ", targetC[t1-order:t1], "")
- right_context = reduce(lambda x, y: x+y+" ", targetC[t2:t2+order], "").strip()
- contextt = "%s<PHRASE> %s" % (left_context, right_context)
- if ctx in 'sb':
- left_context = reduce(lambda x, y: x+y+" ", sourceC[s1-order:s1], "")
- right_context = reduce(lambda x, y: x+y+" ", sourceC[s2:s2+order], "").strip()
- contexts = "%s<PHRASE> %s" % (left_context, right_context)
-
- if phr == 'b':
- phrase = phraset + ' <SPLIT> ' + phrases
- elif phr == 's':
- phrase = phrases
- else:
- phrase = phraset
-
- if ctx == 'b':
- context = contextt + ' <SPLIT> ' + contexts
- elif ctx == 's':
- context = contexts
- else:
- context = contextt
-
- #print "%d-%d-%d-%d looking up" % (s1-order,s2-order,t1-order,t2-order), (phrase, context)
- label = phrase_context_index.get((phrase,context), cutoff_cat)
- if label != cutoff_cat: #cutoff'd spans are left unlabelled
- print "%d-%d-%d-%d:X%s" % (s1-order,s2-order,t1-order,t2-order,label),
- print
diff --git a/gi/pyp-topics/scripts/tokens2classes.py b/gi/pyp-topics/scripts/tokens2classes.py
deleted file mode 100755
index 33df255f..00000000
--- a/gi/pyp-topics/scripts/tokens2classes.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/python
-
-import sys
-
-if len(sys.argv) != 3:
- print "Usage: tokens2classes.py source_classes target_classes"
- exit(1)
-
-source_to_topics = {}
-for line in open(sys.argv[1],'r'):
- term,cls = line.split()
- source_to_topics[term] = cls
-
-target_to_topics = {}
-for line in open(sys.argv[2],'r'):
- term,cls = line.split()
- target_to_topics[term] = cls
-
-for line in sys.stdin:
- source, target, tail = line.split(" ||| ")
-
- for token in source.split():
- print source_to_topics[token],
- print "|||",
- for token in target.split():
- print target_to_topics[token],
- print "|||", tail,
diff --git a/gi/pyp-topics/scripts/topics.py b/gi/pyp-topics/scripts/topics.py
deleted file mode 100755
index 0db1af71..00000000
--- a/gi/pyp-topics/scripts/topics.py
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/usr/bin/python
-
-import sys
-
-if len(sys.argv) != 2:
- print "Usage: topics.py words-per-topic"
- exit(1)
-
-for t,line in enumerate(sys.stdin):
- tokens = line.split()
- terms = []
- for token in tokens:
- elements = token.rsplit(':',1)
- terms.append((int(elements[1]),elements[0]))
- terms.sort()
- terms.reverse()
-
- print "Topic %d:" % t
- map(lambda (x,y) : sys.stdout.write(" %s:%s\n" % (y,x)), terms[:int(sys.argv[1])])
- print
diff --git a/gi/pyp-topics/src/Makefile.am b/gi/pyp-topics/src/Makefile.am
deleted file mode 100644
index d3f95d0b..00000000
--- a/gi/pyp-topics/src/Makefile.am
+++ /dev/null
@@ -1,16 +0,0 @@
-bin_PROGRAMS = pyp-topics-train pyp-contexts-train #mpi-pyp-contexts-train
-
-contexts_lexer.cc: contexts_lexer.l
- $(LEX) -s -CF -8 -o$@ $<
-
-pyp_topics_train_SOURCES = mt19937ar.c corpus.cc gzstream.cc pyp-topics.cc train.cc contexts_lexer.cc contexts_corpus.cc
-pyp_topics_train_LDADD = $(top_srcdir)/utils/libutils.a -lz
-
-pyp_contexts_train_SOURCES = mt19937ar.c corpus.cc gzstream.cc pyp-topics.cc contexts_lexer.cc contexts_corpus.cc train-contexts.cc
-pyp_contexts_train_LDADD = $(top_srcdir)/utils/libutils.a -lz
-
-#mpi_pyp_contexts_train_SOURCES = mt19937ar.c corpus.cc gzstream.cc mpi-pyp-topics.cc contexts_lexer.cc contexts_corpus.cc mpi-train-contexts.cc
-#mpi_pyp_contexts_train_LDADD = $(top_srcdir)/utils/libutils.a -lz
-
-AM_CPPFLAGS = -W -Wall -Wno-sign-compare -funroll-loops -I../../../utils
-
diff --git a/gi/pyp-topics/src/Makefile.mpi b/gi/pyp-topics/src/Makefile.mpi
deleted file mode 100644
index b7b8a290..00000000
--- a/gi/pyp-topics/src/Makefile.mpi
+++ /dev/null
@@ -1,26 +0,0 @@
-BLD_ARCH=$(shell uname -s)
--include macros.${BLD_ARCH}
-
-local_objs = mt19937ar.o corpus.o gzstream.o mpi-pyp-topics.o contexts_lexer.o contexts_corpus.o mpi-train-contexts.o
-
-all: mpi-pyp-contexts-train
-
--include makefile.depend
-
-#-----------------------#
-# Local stuff
-#-----------------------#
-
-mpi-pyp-contexts-train: mpi-train-contexts.o $(local_objs)
- $(CXX) -o $@ $^ $(LDFLAGS)
-
-.PHONY: depend echo
-depend:
-#$(CXX) -MM $(CXXFLAGS) *.cc *.c | sed 's/^\(.*\.o:\)/obj\/\1/' > makefile.depend
- $(CXX) -MM $(CXXFLAGS) *.cc *.c > makefile.depend
-
-clean:
- rm -f *.o
-
-#clobber: clean
-# rm makefile.depend ../bin/${ARCH}/*
diff --git a/gi/pyp-topics/src/clock_gettime_stub.c b/gi/pyp-topics/src/clock_gettime_stub.c
deleted file mode 100644
index 4883b7c1..00000000
--- a/gi/pyp-topics/src/clock_gettime_stub.c
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Copyright (c), MM Weiss
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the MM Weiss nor the names of its contributors
- * may be used to endorse or promote products derived from this software without
- * specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
- * SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
- * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
- * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
- * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- * clock_gettime_stub.c
- * gcc -Wall -c clock_gettime_stub.c
- * posix realtime functions; MacOS user space glue
- */
-
-/* @comment
- * other possible implementation using intel builtin rdtsc
- * rdtsc-workaround: http://www.mcs.anl.gov/~kazutomo/rdtsc.html
- *
- * we could get the ticks by doing this
- *
- * __asm __volatile("mov %%ebx, %%esi\n\t"
- * "cpuid\n\t"
- * "xchg %%esi, %%ebx\n\t"
- * "rdtsc"
- * : "=a" (a),
- * "=d" (d)
- * );
-
- * we could even replace our tricky sched_yield call by assembly code to get a better accurency,
- * anyway the following C stub will satisfy 99% of apps using posix clock_gettime call,
- * moreover, the setter version (clock_settime) could be easly written using mach primitives:
- * http://www.opensource.apple.com/source/xnu/xnu-${VERSION}/osfmk/man/ (clock_[set|get]_time)
- *
- * hackers don't be crackers, don't you use a flush toilet?
- *
- *
- * @see draft: ./posix-realtime-stub/posix-realtime-stub.c
- *
- */
-
-
-#ifdef __APPLE__
-
-#pragma weak clock_gettime
-
-#include <sys/time.h>
-#include <sys/resource.h>
-#include <mach/mach.h>
-#include <mach/clock.h>
-#include <mach/mach_time.h>
-#include <errno.h>
-#include <unistd.h>
-#include <sched.h>
-
-typedef enum {
- CLOCK_REALTIME,
- CLOCK_MONOTONIC,
- CLOCK_PROCESS_CPUTIME_ID,
- CLOCK_THREAD_CPUTIME_ID
-} clockid_t;
-
-static mach_timebase_info_data_t __clock_gettime_inf;
-
-static int clock_gettime(clockid_t clk_id, struct timespec *tp) {
- kern_return_t ret;
- clock_serv_t clk;
- clock_id_t clk_serv_id;
- mach_timespec_t tm;
-
- uint64_t start, end, delta, nano;
-
- //task_basic_info_data_t tinfo;
- //task_thread_times_info_data_t ttinfo;
- //mach_msg_type_number_t tflag;
-
- int retval = -1;
- switch (clk_id) {
- case CLOCK_REALTIME:
- case CLOCK_MONOTONIC:
- clk_serv_id = clk_id == CLOCK_REALTIME ? CALENDAR_CLOCK : SYSTEM_CLOCK;
- if (KERN_SUCCESS == (ret = host_get_clock_service(mach_host_self(), clk_serv_id, &clk))) {
- if (KERN_SUCCESS == (ret = clock_get_time(clk, &tm))) {
- tp->tv_sec = tm.tv_sec;
- tp->tv_nsec = tm.tv_nsec;
- retval = 0;
- }
- }
- if (KERN_SUCCESS != ret) {
- errno = EINVAL;
- retval = -1;
- }
- break;
- case CLOCK_PROCESS_CPUTIME_ID:
- case CLOCK_THREAD_CPUTIME_ID:
- start = mach_absolute_time();
- if (clk_id == CLOCK_PROCESS_CPUTIME_ID) {
- getpid();
- } else {
- sched_yield();
- }
- end = mach_absolute_time();
- delta = end - start;
- if (0 == __clock_gettime_inf.denom) {
- mach_timebase_info(&__clock_gettime_inf);
- }
- nano = delta * __clock_gettime_inf.numer / __clock_gettime_inf.denom;
- tp->tv_sec = nano * 1e-9;
- tp->tv_nsec = nano - (tp->tv_sec * 1e9);
- retval = 0;
- break;
- default:
- errno = EINVAL;
- retval = -1;
- }
- return retval;
-}
-
-#endif // __APPLE__
-
-/* EOF */
diff --git a/gi/pyp-topics/src/contexts_corpus.cc b/gi/pyp-topics/src/contexts_corpus.cc
deleted file mode 100644
index 92b1b34c..00000000
--- a/gi/pyp-topics/src/contexts_corpus.cc
+++ /dev/null
@@ -1,164 +0,0 @@
-#include <sstream>
-#include <iostream>
-#include <set>
-
-#include "contexts_corpus.hh"
-#include "gzstream.hh"
-#include "contexts_lexer.h"
-
-#include <boost/tuple/tuple.hpp>
-
-
-using namespace std;
-
-//////////////////////////////////////////////////
-// ContextsCorpus
-//////////////////////////////////////////////////
-
-bool read_callback_binary_contexts = false;
-
-void read_callback(const ContextsLexer::PhraseContextsType& new_contexts, void* extra) {
- assert(new_contexts.contexts.size() == new_contexts.counts.size());
-
- boost::tuple<ContextsCorpus*, BackoffGenerator*, map<string,int>* >* extra_pair
- = static_cast< boost::tuple<ContextsCorpus*, BackoffGenerator*, map<string,int>* >* >(extra);
-
- ContextsCorpus* corpus_ptr = extra_pair->get<0>();
- BackoffGenerator* backoff_gen = extra_pair->get<1>();
- //map<string,int>* counts = extra_pair->get<2>();
-
- Document* doc(new Document());
-
- //cout << "READ: " << new_contexts.phrase << "\t";
- for (int i=0; i < (int)new_contexts.counts.size(); ++i) {
- int cache_word_count = corpus_ptr->m_dict.max();
-
- //string context_str = corpus_ptr->m_dict.toString(new_contexts.contexts[i]);
- int context_index = new_contexts.counts.at(i).first;
- string context_str = corpus_ptr->m_dict.toString(new_contexts.contexts[context_index]);
-
- // filter out singleton contexts
- //if (!counts->empty()) {
- // map<string,int>::const_iterator find_it = counts->find(context_str);
- // if (find_it == counts->end() || find_it->second < 2)
- // continue;
- //}
-
- WordID id = corpus_ptr->m_dict.Convert(context_str);
- if (cache_word_count != corpus_ptr->m_dict.max()) {
- corpus_ptr->m_backoff->terms_at_level(0)++;
- corpus_ptr->m_num_types++;
- }
-
- //int count = new_contexts.counts[i];
- int count = new_contexts.counts.at(i).second;
- if (read_callback_binary_contexts) {
- doc->push_back(id);
- corpus_ptr->m_num_terms++;
- }
- else {
- for (int j=0; j<count; ++j)
- doc->push_back(id);
- corpus_ptr->m_num_terms += count;
- }
-
- // generate the backoff map
- if (backoff_gen) {
- int order = 1;
- WordID backoff_id = id;
- //ContextsLexer::Context backedoff_context = new_contexts.contexts[i];
- ContextsLexer::Context backedoff_context = new_contexts.contexts[context_index];
- while (true) {
- if (!corpus_ptr->m_backoff->has_backoff(backoff_id)) {
- //cerr << "Backing off from " << corpus_ptr->m_dict.Convert(backoff_id) << " to ";
- backedoff_context = (*backoff_gen)(backedoff_context);
-
- if (backedoff_context.empty()) {
- //cerr << "Nothing." << endl;
- (*corpus_ptr->m_backoff)[backoff_id] = -1;
- break;
- }
-
- if (++order > corpus_ptr->m_backoff->order())
- corpus_ptr->m_backoff->order(order);
-
- int cache_word_count = corpus_ptr->m_dict.max();
- int new_backoff_id = corpus_ptr->m_dict.Convert(backedoff_context);
- if (cache_word_count != corpus_ptr->m_dict.max())
- corpus_ptr->m_backoff->terms_at_level(order-1)++;
-
- //cerr << corpus_ptr->m_dict.Convert(new_backoff_id) << " ." << endl;
-
- backoff_id = ((*corpus_ptr->m_backoff)[backoff_id] = new_backoff_id);
- }
- else break;
- }
- }
- //cout << context_str << " (" << id << ") ||| C=" << count << " ||| ";
- }
- //cout << endl;
-
- //if (!doc->empty()) {
- corpus_ptr->m_documents.push_back(doc);
- corpus_ptr->m_keys.push_back(new_contexts.phrase);
- //}
-}
-
-void filter_callback(const ContextsLexer::PhraseContextsType& new_contexts, void* extra) {
- assert(new_contexts.contexts.size() == new_contexts.counts.size());
-
- map<string,int>* context_counts = (static_cast<map<string,int>*>(extra));
-
- for (int i=0; i < (int)new_contexts.counts.size(); ++i) {
- int context_index = new_contexts.counts.at(i).first;
- int count = new_contexts.counts.at(i).second;
- //if (read_callback_binary_contexts) count = 1;
- //int count = new_contexts.counts[i];
- pair<map<string,int>::iterator,bool> result
- = context_counts->insert(make_pair(Dict::toString(new_contexts.contexts[context_index]),count));
- //= context_counts->insert(make_pair(Dict::toString(new_contexts.contexts[i]),count));
- if (!result.second)
- result.first->second += count;
- }
-}
-
-
-unsigned ContextsCorpus::read_contexts(const string &filename,
- BackoffGenerator* backoff_gen_ptr,
- bool /*filter_singeltons*/,
- bool binary_contexts) {
- read_callback_binary_contexts = binary_contexts;
-
- map<string,int> counts;
- //if (filter_singeltons)
- {
- // cerr << "--- Filtering singleton contexts ---" << endl;
-
- igzstream in(filename.c_str());
- ContextsLexer::ReadContexts(&in, filter_callback, &counts);
- }
-
- m_num_terms = 0;
- m_num_types = 0;
-
- igzstream in(filename.c_str());
- boost::tuple<ContextsCorpus*, BackoffGenerator*, map<string,int>* > extra_pair(this,backoff_gen_ptr,&counts);
- ContextsLexer::ReadContexts(&in, read_callback, &extra_pair);
-
- //m_num_types = m_dict.max();
-
- cerr << "Read backoff with order " << m_backoff->order() << "\n";
- for (int o=0; o<m_backoff->order(); o++)
- cerr << " Terms at " << o << " = " << m_backoff->terms_at_level(o) << endl;
- //cerr << endl;
-
- int i=0; double av_freq=0;
- for (map<string,int>::const_iterator it=counts.begin(); it != counts.end(); ++it, ++i) {
- WordID id = m_dict.Convert(it->first);
- m_context_counts[id] = it->second;
- av_freq += it->second;
- }
- cerr << " Average term frequency = " << av_freq / (double) i << endl;
-
- return m_documents.size();
-}
diff --git a/gi/pyp-topics/src/contexts_corpus.hh b/gi/pyp-topics/src/contexts_corpus.hh
deleted file mode 100644
index 2527f655..00000000
--- a/gi/pyp-topics/src/contexts_corpus.hh
+++ /dev/null
@@ -1,90 +0,0 @@
-#ifndef _CONTEXTS_CORPUS_HH
-#define _CONTEXTS_CORPUS_HH
-
-#include <vector>
-#include <string>
-#include <map>
-#include <tr1/unordered_map>
-
-#include <boost/ptr_container/ptr_vector.hpp>
-
-#include "corpus.hh"
-#include "contexts_lexer.h"
-#include "dict.h"
-
-
-class BackoffGenerator {
-public:
- virtual ContextsLexer::Context
- operator()(const ContextsLexer::Context& c) = 0;
-
-protected:
- ContextsLexer::Context strip_edges(const ContextsLexer::Context& c) {
- if (c.size() <= 1) return ContextsLexer::Context();
- assert(c.size() % 2 == 1);
- return ContextsLexer::Context(c.begin() + 1, c.end() - 1);
- }
-};
-
-class NullBackoffGenerator : public BackoffGenerator {
- virtual ContextsLexer::Context
- operator()(const ContextsLexer::Context&)
- { return ContextsLexer::Context(); }
-};
-
-class SimpleBackoffGenerator : public BackoffGenerator {
- virtual ContextsLexer::Context
- operator()(const ContextsLexer::Context& c) {
- if (c.size() <= 3)
- return ContextsLexer::Context();
- return strip_edges(c);
- }
-};
-
-
-////////////////////////////////////////////////////////////////
-// ContextsCorpus
-////////////////////////////////////////////////////////////////
-
-class ContextsCorpus : public Corpus {
- friend void read_callback(const ContextsLexer::PhraseContextsType&, void*);
-
-public:
- ContextsCorpus() : m_backoff(new TermBackoff) {}
- virtual ~ContextsCorpus() {}
-
- virtual unsigned read_contexts(const std::string &filename,
- BackoffGenerator* backoff_gen=0,
- bool filter_singeltons=false,
- bool binary_contexts=false);
-
- TermBackoffPtr backoff_index() {
- return m_backoff;
- }
-
- std::vector<std::string> context2string(const WordID& id) const {
- std::vector<std::string> res;
- assert (id >= 0);
- m_dict.AsVector(id, &res);
- return res;
- }
-
- virtual int context_count(const WordID& id) const {
- return m_context_counts.find(id)->second;
- }
-
-
- const std::string& key(const int& i) const {
- return m_keys.at(i);
- }
-
- const Dict& dict() const { return m_dict; }
-
-protected:
- TermBackoffPtr m_backoff;
- Dict m_dict;
- std::vector<std::string> m_keys;
- std::tr1::unordered_map<int,int> m_context_counts;
-};
-
-#endif // _CONTEXTS_CORPUS_HH
diff --git a/gi/pyp-topics/src/contexts_lexer.h b/gi/pyp-topics/src/contexts_lexer.h
deleted file mode 100644
index 66004990..00000000
--- a/gi/pyp-topics/src/contexts_lexer.h
+++ /dev/null
@@ -1,22 +0,0 @@
-#ifndef _CONTEXTS_LEXER_H_
-#define _CONTEXTS_LEXER_H_
-
-#include <iostream>
-#include <vector>
-#include <string>
-
-#include "dict.h"
-
-struct ContextsLexer {
- typedef std::vector<std::string> Context;
- struct PhraseContextsType {
- std::string phrase;
- std::vector<Context> contexts;
- std::vector< std::pair<int,int> > counts;
- };
-
- typedef void (*ContextsCallback)(const PhraseContextsType& new_contexts, void* extra);
- static void ReadContexts(std::istream* in, ContextsCallback func, void* extra);
-};
-
-#endif
diff --git a/gi/pyp-topics/src/contexts_lexer.l b/gi/pyp-topics/src/contexts_lexer.l
deleted file mode 100644
index 64cd7ca3..00000000
--- a/gi/pyp-topics/src/contexts_lexer.l
+++ /dev/null
@@ -1,113 +0,0 @@
-%{
-#include "contexts_lexer.h"
-
-#include <string>
-#include <iostream>
-#include <sstream>
-#include <cstring>
-#include <cassert>
-#include <algorithm>
-
-int lex_line = 0;
-std::istream* contextslex_stream = NULL;
-ContextsLexer::ContextsCallback contexts_callback = NULL;
-void* contexts_callback_extra = NULL;
-
-#undef YY_INPUT
-#define YY_INPUT(buf, result, max_size) (result = contextslex_stream->read(buf, max_size).gcount())
-
-#define YY_SKIP_YYWRAP 1
-int num_phrases = 0;
-int yywrap() { return 1; }
-
-#define MAX_TOKEN_SIZE 255
-std::string contextslex_tmp_token(MAX_TOKEN_SIZE, '\0');
-ContextsLexer::PhraseContextsType current_contexts;
-
-#define MAX_CONTEXT_SIZE 255
-//std::string tmp_context[MAX_CONTEXT_SIZE];
-ContextsLexer::Context tmp_context;
-
-
-void contextslex_reset() {
- current_contexts.phrase.clear();
- current_contexts.contexts.clear();
- current_contexts.counts.clear();
- tmp_context.clear();
-}
-
-%}
-
-INT [\-+]?[0-9]+|inf|[\-+]inf
-
-%x CONTEXT COUNT COUNT_END
-%%
-
-<INITIAL>[^\t]+ {
- contextslex_reset();
- current_contexts.phrase.assign(yytext, yyleng);
- BEGIN(CONTEXT);
- }
-<INITIAL>\t {
- ;
- }
-
-<INITIAL,CONTEXT,COUNT>\n {
- std::cerr << "ERROR: contexts_lexer.l: unexpected newline while trying to read phrase|context|count." << std::endl;
- abort();
- }
-
-<CONTEXT>\|\|\| {
- current_contexts.contexts.push_back(tmp_context);
- tmp_context.clear();
- BEGIN(COUNT);
- }
-<CONTEXT>[^ \t]+ {
- contextslex_tmp_token.assign(yytext, yyleng);
- tmp_context.push_back(contextslex_tmp_token);
- }
-<CONTEXT>[ \t]+ { ; }
-
-<COUNT>[ \t]+ { ; }
-<COUNT>C={INT} {
- current_contexts.counts.push_back(std::make_pair(current_contexts.counts.size(), atoi(yytext+2)));
- BEGIN(COUNT_END);
- }
-<COUNT>. {
- std::cerr << "ERROR: contexts_lexer.l: unexpected content while reading count." << std::endl;
- abort();
- }
-
-<COUNT_END>[ \t]+ { ; }
-<COUNT_END>\|\|\| {
- BEGIN(CONTEXT);
- }
-<COUNT_END>\n {
- //std::cerr << "READ:" << current_contexts.phrase << " with " << current_contexts.contexts.size()
- // << " contexts, and " << current_contexts.counts.size() << " counts." << std::endl;
- std::sort(current_contexts.counts.rbegin(), current_contexts.counts.rend());
-
- contexts_callback(current_contexts, contexts_callback_extra);
- current_contexts.phrase.clear();
- current_contexts.contexts.clear();
- current_contexts.counts.clear();
- BEGIN(INITIAL);
- }
-<COUNT_END>. {
- contextslex_tmp_token.assign(yytext, yyleng);
- std::cerr << "ERROR: contexts_lexer.l: unexpected content while looking for ||| closing count." << std::endl;
- abort();
- }
-
-%%
-
-#include "filelib.h"
-
-void ContextsLexer::ReadContexts(std::istream* in, ContextsLexer::ContextsCallback func, void* extra) {
- lex_line = 1;
- contextslex_stream = in;
- contexts_callback_extra = extra,
- contexts_callback = func;
- yylex();
-}
-
diff --git a/gi/pyp-topics/src/corpus.cc b/gi/pyp-topics/src/corpus.cc
deleted file mode 100644
index f182381f..00000000
--- a/gi/pyp-topics/src/corpus.cc
+++ /dev/null
@@ -1,104 +0,0 @@
-#include <sstream>
-#include <iostream>
-#include <set>
-
-#include "corpus.hh"
-#include "gzstream.hh"
-
-using namespace std;
-
-//////////////////////////////////////////////////
-// Corpus
-//////////////////////////////////////////////////
-
-Corpus::Corpus() : m_num_terms(0), m_num_types(0) {}
-
-unsigned Corpus::read(const std::string &filename) {
- m_num_terms = 0;
- m_num_types = 0;
- std::set<int> seen_types;
-
- igzstream in(filename.c_str());
-
- string buf;
- int token;
- unsigned doc_count=0;
- while (getline(in, buf)) {
- Document* doc(new Document());
- istringstream ss(buf);
-
- ss >> token; // the number of unique terms
-
- char delimeter;
- int count;
- while(ss >> token >> delimeter >> count) {
- for (int i=0; i<count; ++i)
- doc->push_back(token);
- m_num_terms += count;
- seen_types.insert(token);
- }
-
- m_documents.push_back(doc);
- doc_count++;
- }
-
- m_num_types = seen_types.size();
-
- return doc_count;
-}
-
-
-//////////////////////////////////////////////////
-// TestCorpus
-//////////////////////////////////////////////////
-
-TestCorpus::TestCorpus() {}
-
-void TestCorpus::read(const std::string &filename) {
- igzstream in(filename.c_str());
-
- string buf;
- Term term;
- DocumentId doc;
- char delimeter;
- while (getline(in, buf)) {
- DocumentTerms* line(new DocumentTerms());
- istringstream ss(buf);
-
- while(ss >> doc >> delimeter >> term)
- line->push_back(DocumentTerm(doc, term));
-
- m_lines.push_back(line);
- }
-}
-
-//////////////////////////////////////////////////
-// TermBackoff
-//////////////////////////////////////////////////
-
-void TermBackoff::read(const std::string &filename) {
- igzstream in(filename.c_str());
-
- string buf;
- int num_terms;
- getline(in, buf);
- istringstream ss(buf);
- ss >> num_terms >> m_backoff_order;
-
- m_dict.resize(num_terms, -1);
- for (int i=0; i<m_backoff_order; ++i) {
- int count; ss >> count;
- m_terms_at_order.push_back(count);
- }
-
- Term term, backoff;
- while (getline(in, buf)) {
- istringstream ss(buf);
- ss >> term >> backoff;
-
- assert(term < num_terms);
- assert(term >= 0);
-
- m_dict[term] = backoff;
- }
-}
diff --git a/gi/pyp-topics/src/corpus.hh b/gi/pyp-topics/src/corpus.hh
deleted file mode 100644
index 2aa03527..00000000
--- a/gi/pyp-topics/src/corpus.hh
+++ /dev/null
@@ -1,133 +0,0 @@
-#ifndef _CORPUS_HH
-#define _CORPUS_HH
-
-#include <vector>
-#include <string>
-#include <map>
-#include <limits>
-
-#include <boost/shared_ptr.hpp>
-#include <boost/ptr_container/ptr_vector.hpp>
-
-////////////////////////////////////////////////////////////////
-// Corpus
-////////////////////////////////////////////////////////////////
-typedef int Term;
-
-typedef std::vector<Term> Document;
-typedef std::vector<Term> Terms;
-
-class Corpus {
-public:
- typedef boost::ptr_vector<Document>::const_iterator const_iterator;
-
-public:
- Corpus();
- virtual ~Corpus() {}
-
- virtual unsigned read(const std::string &filename);
-
- const_iterator begin() const { return m_documents.begin(); }
- const_iterator end() const { return m_documents.end(); }
-
- const Document& at(size_t i) const { return m_documents.at(i); }
-
- int num_documents() const { return m_documents.size(); }
- int num_terms() const { return m_num_terms; }
- int num_types() const { return m_num_types; }
-
- virtual int context_count(const int&) const {
- return std::numeric_limits<int>::max();
- }
-
-protected:
- int m_num_terms, m_num_types;
- boost::ptr_vector<Document> m_documents;
-};
-
-typedef int DocumentId;
-struct DocumentTerm {
- DocumentTerm(DocumentId d, Term t) : term(t), doc(d) {}
- Term term;
- DocumentId doc;
-};
-typedef std::vector<DocumentTerm> DocumentTerms;
-
-class TestCorpus {
-public:
- typedef boost::ptr_vector<DocumentTerms>::const_iterator const_iterator;
-
-public:
- TestCorpus();
- ~TestCorpus() {}
-
- void read(const std::string &filename);
-
- const_iterator begin() const { return m_lines.begin(); }
- const_iterator end() const { return m_lines.end(); }
-
- int num_instances() const { return m_lines.size(); }
-
-protected:
- boost::ptr_vector<DocumentTerms> m_lines;
-};
-
-class TermBackoff {
-public:
- typedef std::vector<Term> dictionary_type;
- typedef dictionary_type::const_iterator const_iterator;
- const static int NullBackoff=-1;
-
-public:
- TermBackoff() { order(1); }
- ~TermBackoff() {}
-
- void read(const std::string &filename);
-
- const_iterator begin() const { return m_dict.begin(); }
- const_iterator end() const { return m_dict.end(); }
-
- const Term& operator[](const Term& t) const {
- assert(t < static_cast<int>(m_dict.size()));
- return m_dict[t];
- }
-
- Term& operator[](const Term& t) {
- if (t >= static_cast<int>(m_dict.size()))
- m_dict.resize(t+1, -1);
- return m_dict[t];
- }
-
- bool has_backoff(const Term& t) {
- return t >= 0 && t < static_cast<int>(m_dict.size()) && m_dict[t] >= 0;
- }
-
- int order() const { return m_backoff_order; }
- void order(int o) {
- if (o >= (int)m_terms_at_order.size())
- m_terms_at_order.resize(o, 0);
- m_backoff_order = o;
- }
-
-// int levels() const { return m_terms_at_order.size(); }
- bool is_null(const Term& term) const { return term < 0; }
- int terms_at_level(int level) const {
- assert (level < (int)m_terms_at_order.size());
- return m_terms_at_order.at(level);
- }
-
- int& terms_at_level(int level) {
- assert (level < (int)m_terms_at_order.size());
- return m_terms_at_order.at(level);
- }
-
- int size() const { return m_dict.size(); }
-
-protected:
- dictionary_type m_dict;
- int m_backoff_order;
- std::vector<int> m_terms_at_order;
-};
-typedef boost::shared_ptr<TermBackoff> TermBackoffPtr;
-
-#endif // _CORPUS_HH
diff --git a/gi/pyp-topics/src/gammadist.c b/gi/pyp-topics/src/gammadist.c
deleted file mode 100644
index 4e260db8..00000000
--- a/gi/pyp-topics/src/gammadist.c
+++ /dev/null
@@ -1,247 +0,0 @@
-/* gammadist.c -- computes probability of samples under / produces samples from a Gamma distribution
- *
- * Mark Johnson, 22nd March 2008
- *
- * WARNING: you need to set the flag -std=c99 to compile
- *
- * gammavariate() was translated from random.py in Python library
- *
- * The Gamma distribution is:
- *
- * Gamma(x | alpha, beta) = pow(x/beta, alpha-1) * exp(-x/beta) / (gamma(alpha)*beta)
- *
- * shape parameter alpha > 0 (also called c), scale parameter beta > 0 (also called s);
- * mean is alpha*beta, variance is alpha*beta**2
- *
- * Note that many parameterizations of the Gamma function are in terms of an _inverse_
- * scale parameter beta, which is the inverse of the beta given here.
- *
- * To define a main() that tests the routines, uncomment the following #define:
- */
-/* #define GAMMATEST */
-
-#include <assert.h>
-#include <math.h>
-
-#include "gammadist.h"
-#include "mt19937ar.h"
-
-/* gammadist() returns the probability density of x under a Gamma(alpha,beta)
- * distribution
- */
-
-long double gammadist(long double x, long double alpha, long double beta) {
- assert(alpha > 0);
- assert(beta > 0);
- return pow(x/beta, alpha-1) * exp(-x/beta) / (tgamma(alpha)*beta);
-}
-
-/* lgammadist() returns the log probability density of x under a Gamma(alpha,beta)
- * distribution
- */
-
-long double lgammadist(long double x, long double alpha, long double beta) {
- assert(alpha > 0);
- assert(beta > 0);
- return (alpha-1)*log(x) - alpha*log(beta) - x/beta - lgamma(alpha);
-}
-
-/* This definition of gammavariate is from Python code in
- * the Python random module.
- */
-
-long double gammavariate(long double alpha, long double beta) {
-
- assert(alpha > 0);
- assert(beta > 0);
-
- if (alpha > 1.0) {
-
- /* Uses R.C.H. Cheng, "The generation of Gamma variables with
- non-integral shape parameters", Applied Statistics, (1977), 26,
- No. 1, p71-74 */
-
- long double ainv = sqrt(2.0 * alpha - 1.0);
- long double bbb = alpha - log(4.0);
- long double ccc = alpha + ainv;
-
- while (1) {
- long double u1 = mt_genrand_real3();
- if (u1 > 1e-7 || u1 < 0.9999999) {
- long double u2 = 1.0 - mt_genrand_real3();
- long double v = log(u1/(1.0-u1))/ainv;
- long double x = alpha*exp(v);
- long double z = u1*u1*u2;
- long double r = bbb+ccc*v-x;
- if (r + (1.0+log(4.5)) - 4.5*z >= 0.0 || r >= log(z))
- return x * beta;
- }
- }
- }
- else if (alpha == 1.0) {
- long double u = mt_genrand_real3();
- while (u <= 1e-7)
- u = mt_genrand_real3();
- return -log(u) * beta;
- }
- else {
- /* alpha is between 0 and 1 (exclusive)
- Uses ALGORITHM GS of Statistical Computing - Kennedy & Gentle */
-
- while (1) {
- long double u = mt_genrand_real3();
- long double b = (exp(1) + alpha)/exp(1);
- long double p = b*u;
- long double x = (p <= 1.0) ? pow(p, 1.0/alpha) : -log((b-p)/alpha);
- long double u1 = mt_genrand_real3();
- if (! (((p <= 1.0) && (u1 > exp(-x))) ||
- ((p > 1.0) && (u1 > pow(x, alpha - 1.0)))))
- return x * beta;
- }
- }
-}
-
-/* betadist() returns the probability density of x under a Beta(alpha,beta)
- * distribution.
- */
-
-long double betadist(long double x, long double alpha, long double beta) {
- assert(x >= 0);
- assert(x <= 1);
- assert(alpha > 0);
- assert(beta > 0);
- return pow(x,alpha-1)*pow(1-x,beta-1)*tgamma(alpha+beta)/(tgamma(alpha)*tgamma(beta));
-}
-
-/* lbetadist() returns the log probability density of x under a Beta(alpha,beta)
- * distribution.
- */
-
-long double lbetadist(long double x, long double alpha, long double beta) {
- assert(x > 0);
- assert(x < 1);
- assert(alpha > 0);
- assert(beta > 0);
- return (alpha-1)*log(x)+(beta-1)*log(1-x)+lgamma(alpha+beta)-lgamma(alpha)-lgamma(beta);
-}
-
-/* betavariate() generates a sample from a Beta distribution with
- * parameters alpha and beta.
- *
- * 0 < alpha < 1, 0 < beta < 1, mean is alpha/(alpha+beta)
- */
-
-long double betavariate(long double alpha, long double beta) {
- long double x = gammavariate(alpha, 1);
- long double y = gammavariate(beta, 1);
- return x/(x+y);
-}
-
-#ifdef GAMMATEST
-#include <stdio.h>
-
-int main(int argc, char **argv) {
- int iteration, niterations = 1000;
-
- for (iteration = 0; iteration < niterations; ++iteration) {
- long double alpha = 100*mt_genrand_real3();
- long double gv = gammavariate(alpha, 1);
- long double pgv = gammadist(gv, alpha, 1);
- long double pgvl = exp(lgammadist(gv, alpha, 1));
- fprintf(stderr, "iteration = %d, gammavariate(%lg,1) = %lg, gammadist(%lg,%lg,1) = %lg, exp(lgammadist(%lg,%lg,1) = %lg\n",
- iteration, alpha, gv, gv, alpha, pgv, gv, alpha, pgvl);
- }
- return 0;
-}
-
-#endif /* GAMMATEST */
-
-
-/* Other routines I tried, but which weren't as good as the ones above */
-
-#if 0
-
-/*! gammavariate() returns samples from a Gamma distribution
- *! where alpha is the shape parameter and beta is the scale
- *! parameter, using the algorithm described on p. 94 of
- *! Gentle (1998) Random Number Generation and Monte Carlo Methods,
- *! Springer.
- */
-
-long double gammavariate(long double alpha) {
-
- assert(alpha > 0);
-
- if (alpha > 1.0) {
- while (1) {
- long double u1 = mt_genrand_real3();
- long double u2 = mt_genrand_real3();
- long double v = (alpha - 1/(6*alpha))*u1/(alpha-1)*u2;
- if (2*(u2-1)/(alpha-1) + v + 1/v <= 2
- || 2*log(u2)/(alpha-1) - log(v) + v <= 1)
- return (alpha-1)*v;
- }
- } else if (alpha < 1.0) {
- while (1) {
- long double t = 0.07 + 0.75*sqrt(1-alpha);
- long double b = alpha + exp(-t)*alpha/t;
- long double u1 = mt_genrand_real3();
- long double u2 = mt_genrand_real3();
- long double v = b*u1;
- if (v <= 1) {
- long double x = t*pow(v, 1/alpha);
- if (u2 <= (2 - x)/(2 + x))
- return x;
- if (u2 <= exp(-x))
- return x;
- }
- else {
- long double x = log(t*(b-v)/alpha);
- long double y = x/t;
- if (u2*(alpha + y*(1-alpha)) <= 1)
- return x;
- if (u2 <= pow(y,alpha-1))
- return x;
- }
- }
- }
- else
- return -log(mt_genrand_real3());
-}
-
-
-/*! gammavariate() returns a deviate distributed as a gamma
- *! distribution of order alpha, beta, i.e., a waiting time to the alpha'th
- *! event in a Poisson process of unit mean.
- *!
- *! Code from Numerical Recipes
- */
-
-long double nr_gammavariate(long double ia) {
- int j;
- long double am,e,s,v1,v2,x,y;
- assert(ia > 0);
- if (ia < 10) {
- x=1.0;
- for (j=1;j<=ia;j++)
- x *= mt_genrand_real3();
- x = -log(x);
- } else {
- do {
- do {
- do {
- v1=mt_genrand_real3();
- v2=2.0*mt_genrand_real3()-1.0;
- } while (v1*v1+v2*v2 > 1.0);
- y=v2/v1;
- am=ia-1;
- s=sqrt(2.0*am+1.0);
- x=s*y+am;
- } while (x <= 0.0);
- e=(1.0+y*y)*exp(am*log(x/am)-s*y);
- } while (mt_genrand_real3() > e);
- }
- return x;
-}
-
-#endif
diff --git a/gi/pyp-topics/src/gammadist.h b/gi/pyp-topics/src/gammadist.h
deleted file mode 100644
index b6ad6c40..00000000
--- a/gi/pyp-topics/src/gammadist.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/* gammadist.h -- computes probability of samples under / produces samples from a Gamma distribution
- *
- * Mark Johnson, 22nd March 2008
- *
- * gammavariate() was translated from random.py in Python library
- *
- * The Gamma distribution is:
- *
- * Gamma(x | alpha, beta) = pow(x/beta, alpha-1) * exp(-x/beta) / (gamma(alpha)*beta)
- *
- * shape parameter alpha > 0 (also called c), scale parameter beta > 0 (also called s);
- * mean is alpha*beta, variance is alpha*beta**2
- *
- * Note that many parameterizations of the Gamma function are in terms of an _inverse_
- * scale parameter beta, which is the inverse of the beta given here.
- */
-
-#ifndef GAMMADIST_H
-#define GAMMADIST_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
- /* gammadist() returns the probability density of x under a Gamma(alpha,beta)
- * distribution
- */
-
- long double gammadist(long double x, long double alpha, long double beta);
-
- /* lgammadist() returns the log probability density of x under a Gamma(alpha,beta)
- * distribution
- */
-
- long double lgammadist(long double x, long double alpha, long double beta);
-
- /* gammavariate() generates samples from a Gamma distribution
- * conditioned on the parameters alpha and beta.
- *
- * alpha > 0, beta > 0, mean is alpha*beta, variance is alpha*beta**2
- *
- * Warning: a few older sources define the gamma distribution in terms
- * of alpha > -1.0
- */
-
- long double gammavariate(long double alpha, long double beta);
-
- /* betadist() returns the probability density of x under a Beta(alpha,beta)
- * distribution.
- */
-
- long double betadist(long double x, long double alpha, long double beta);
-
- /* lbetadist() returns the log probability density of x under a Beta(alpha,beta)
- * distribution.
- */
-
- long double lbetadist(long double x, long double alpha, long double beta);
-
- /* betavariate() generates a sample from a Beta distribution with
- * parameters alpha and beta.
- *
- * 0 < alpha < 1, 0 < beta < 1, mean is alpha/(alpha+beta)
- */
-
- long double betavariate(long double alpha, long double beta);
-
-#ifdef __cplusplus
-};
-#endif
-
-#endif /* GAMMADIST_H */
diff --git a/gi/pyp-topics/src/gzstream.cc b/gi/pyp-topics/src/gzstream.cc
deleted file mode 100644
index 7c4d3a12..00000000
--- a/gi/pyp-topics/src/gzstream.cc
+++ /dev/null
@@ -1,165 +0,0 @@
-// ============================================================================
-// gzstream, C++ iostream classes wrapping the zlib compression library.
-// Copyright (C) 2001 Deepak Bandyopadhyay, Lutz Kettner
-//
-// This library is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 2.1 of the License, or (at your option) any later version.
-//
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// Lesser General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public
-// License along with this library; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-// ============================================================================
-//
-// File : gzstream.C
-// Revision : $Revision: 1.1 $
-// Revision_date : $Date: 2006/03/30 04:05:52 $
-// Author(s) : Deepak Bandyopadhyay, Lutz Kettner
-//
-// Standard streambuf implementation following Nicolai Josuttis, "The
-// Standard C++ Library".
-// ============================================================================
-
-#include "gzstream.hh"
-#include <iostream>
-#include <string.h> // for memcpy
-
-#ifdef GZSTREAM_NAMESPACE
-namespace GZSTREAM_NAMESPACE {
-#endif
-
-// ----------------------------------------------------------------------------
-// Internal classes to implement gzstream. See header file for user classes.
-// ----------------------------------------------------------------------------
-
-// --------------------------------------
-// class gzstreambuf:
-// --------------------------------------
-
-gzstreambuf* gzstreambuf::open( const char* name, int open_mode) {
- if ( is_open())
- return (gzstreambuf*)0;
- mode = open_mode;
- // no append nor read/write mode
- if ((mode & std::ios::ate) || (mode & std::ios::app)
- || ((mode & std::ios::in) && (mode & std::ios::out)))
- return (gzstreambuf*)0;
- char fmode[10];
- char* fmodeptr = fmode;
- if ( mode & std::ios::in)
- *fmodeptr++ = 'r';
- else if ( mode & std::ios::out)
- *fmodeptr++ = 'w';
- *fmodeptr++ = 'b';
- *fmodeptr = '\0';
- file = gzopen( name, fmode);
- if (file == 0)
- return (gzstreambuf*)0;
- opened = 1;
- return this;
-}
-
-gzstreambuf * gzstreambuf::close() {
- if ( is_open()) {
- sync();
- opened = 0;
- if ( gzclose( file) == Z_OK)
- return this;
- }
- return (gzstreambuf*)0;
-}
-
-int gzstreambuf::underflow() { // used for input buffer only
- if ( gptr() && ( gptr() < egptr()))
- return * reinterpret_cast<unsigned char *>( gptr());
-
- if ( ! (mode & std::ios::in) || ! opened)
- return EOF;
- // Josuttis' implementation of inbuf
- int n_putback = gptr() - eback();
- if ( n_putback > 4)
- n_putback = 4;
- memcpy( buffer + (4 - n_putback), gptr() - n_putback, n_putback);
-
- int num = gzread( file, buffer+4, bufferSize-4);
- if (num <= 0) // ERROR or EOF
- return EOF;
-
- // reset buffer pointers
- setg( buffer + (4 - n_putback), // beginning of putback area
- buffer + 4, // read position
- buffer + 4 + num); // end of buffer
-
- // return next character
- return * reinterpret_cast<unsigned char *>( gptr());
-}
-
-int gzstreambuf::flush_buffer() {
- // Separate the writing of the buffer from overflow() and
- // sync() operation.
- int w = pptr() - pbase();
- if ( gzwrite( file, pbase(), w) != w)
- return EOF;
- pbump( -w);
- return w;
-}
-
-int gzstreambuf::overflow( int c) { // used for output buffer only
- if ( ! ( mode & std::ios::out) || ! opened)
- return EOF;
- if (c != EOF) {
- *pptr() = c;
- pbump(1);
- }
- if ( flush_buffer() == EOF)
- return EOF;
- return c;
-}
-
-int gzstreambuf::sync() {
- // Changed to use flush_buffer() instead of overflow( EOF)
- // which caused improper behavior with std::endl and flush(),
- // bug reported by Vincent Ricard.
- if ( pptr() && pptr() > pbase()) {
- if ( flush_buffer() == EOF)
- return -1;
- }
- return 0;
-}
-
-// --------------------------------------
-// class gzstreambase:
-// --------------------------------------
-
-gzstreambase::gzstreambase( const char* name, int mode) {
- init( &buf);
- open( name, mode);
-}
-
-gzstreambase::~gzstreambase() {
- buf.close();
-}
-
-void gzstreambase::open( const char* name, int open_mode) {
- if ( ! buf.open( name, open_mode))
- clear( rdstate() | std::ios::badbit);
-}
-
-void gzstreambase::close() {
- if ( buf.is_open())
- if ( ! buf.close())
- clear( rdstate() | std::ios::badbit);
-}
-
-#ifdef GZSTREAM_NAMESPACE
-} // namespace GZSTREAM_NAMESPACE
-#endif
-
-// ============================================================================
-// EOF //
diff --git a/gi/pyp-topics/src/gzstream.hh b/gi/pyp-topics/src/gzstream.hh
deleted file mode 100644
index ad9785fd..00000000
--- a/gi/pyp-topics/src/gzstream.hh
+++ /dev/null
@@ -1,121 +0,0 @@
-// ============================================================================
-// gzstream, C++ iostream classes wrapping the zlib compression library.
-// Copyright (C) 2001 Deepak Bandyopadhyay, Lutz Kettner
-//
-// This library is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 2.1 of the License, or (at your option) any later version.
-//
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// Lesser General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public
-// License along with this library; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-// ============================================================================
-//
-// File : gzstream.h
-// Revision : $Revision: 1.1 $
-// Revision_date : $Date: 2006/03/30 04:05:52 $
-// Author(s) : Deepak Bandyopadhyay, Lutz Kettner
-//
-// Standard streambuf implementation following Nicolai Josuttis, "The
-// Standard C++ Library".
-// ============================================================================
-
-#ifndef GZSTREAM_H
-#define GZSTREAM_H 1
-
-// standard C++ with new header file names and std:: namespace
-#include <iostream>
-#include <fstream>
-#include <zlib.h>
-
-#ifdef GZSTREAM_NAMESPACE
-namespace GZSTREAM_NAMESPACE {
-#endif
-
-// ----------------------------------------------------------------------------
-// Internal classes to implement gzstream. See below for user classes.
-// ----------------------------------------------------------------------------
-
-class gzstreambuf : public std::streambuf {
-private:
- static const int bufferSize = 47+256; // size of data buff
- // totals 512 bytes under g++ for igzstream at the end.
-
- gzFile file; // file handle for compressed file
- char buffer[bufferSize]; // data buffer
- char opened; // open/close state of stream
- int mode; // I/O mode
-
- int flush_buffer();
-public:
- gzstreambuf() : opened(0) {
- setp( buffer, buffer + (bufferSize-1));
- setg( buffer + 4, // beginning of putback area
- buffer + 4, // read position
- buffer + 4); // end position
- // ASSERT: both input & output capabilities will not be used together
- }
- int is_open() { return opened; }
- gzstreambuf* open( const char* name, int open_mode);
- gzstreambuf* close();
- ~gzstreambuf() { close(); }
-
- virtual int overflow( int c = EOF);
- virtual int underflow();
- virtual int sync();
-};
-
-class gzstreambase : virtual public std::ios {
-protected:
- gzstreambuf buf;
-public:
- gzstreambase() { init(&buf); }
- gzstreambase( const char* name, int open_mode);
- ~gzstreambase();
- void open( const char* name, int open_mode);
- void close();
- gzstreambuf* rdbuf() { return &buf; }
-};
-
-// ----------------------------------------------------------------------------
-// User classes. Use igzstream and ogzstream analogously to ifstream and
-// ofstream respectively. They read and write files based on the gz*
-// function interface of the zlib. Files are compatible with gzip compression.
-// ----------------------------------------------------------------------------
-
-class igzstream : public gzstreambase, public std::istream {
-public:
- igzstream() : std::istream( &buf) {}
- igzstream( const char* name, int open_mode = std::ios::in)
- : gzstreambase( name, open_mode), std::istream( &buf) {}
- gzstreambuf* rdbuf() { return gzstreambase::rdbuf(); }
- void open( const char* name, int open_mode = std::ios::in) {
- gzstreambase::open( name, open_mode);
- }
-};
-
-class ogzstream : public gzstreambase, public std::ostream {
-public:
- ogzstream() : std::ostream( &buf) {}
- ogzstream( const char* name, int mode = std::ios::out)
- : gzstreambase( name, mode), std::ostream( &buf) {}
- gzstreambuf* rdbuf() { return gzstreambase::rdbuf(); }
- void open( const char* name, int open_mode = std::ios::out) {
- gzstreambase::open( name, open_mode);
- }
-};
-
-#ifdef GZSTREAM_NAMESPACE
-} // namespace GZSTREAM_NAMESPACE
-#endif
-
-#endif // GZSTREAM_H
-// ============================================================================
-// EOF //
-
diff --git a/gi/pyp-topics/src/log_add.h b/gi/pyp-topics/src/log_add.h
deleted file mode 100644
index e0620c5a..00000000
--- a/gi/pyp-topics/src/log_add.h
+++ /dev/null
@@ -1,30 +0,0 @@
-#ifndef log_add_hh
-#define log_add_hh
-
-#include <limits>
-#include <iostream>
-#include <cassert>
-#include <cmath>
-
-template <typename T>
-struct Log
-{
- static T zero() { return -std::numeric_limits<T>::infinity(); }
-
- static T add(T l1, T l2)
- {
- if (l1 == zero()) return l2;
- if (l1 > l2)
- return l1 + std::log(1 + exp(l2 - l1));
- else
- return l2 + std::log(1 + exp(l1 - l2));
- }
-
- static T subtract(T l1, T l2)
- {
- //std::assert(l1 >= l2);
- return l1 + log(1 - exp(l2 - l1));
- }
-};
-
-#endif
diff --git a/gi/pyp-topics/src/macros.Linux b/gi/pyp-topics/src/macros.Linux
deleted file mode 100644
index 7c6e7fa7..00000000
--- a/gi/pyp-topics/src/macros.Linux
+++ /dev/null
@@ -1,18 +0,0 @@
-CC = /home/pblunsom/software/bin/mpicc
-CXX = /home/pblunsom/software/bin/mpicxx
-LD = /home/pblunsom/software/bin/mpicxx
-FC = /home/pblunsom/software/bin/mpif77
-
-SOFTWARE_DIR=/export/ws10smt/software
-
-CXXFLAGS = -Wall -I${SOFTWARE_DIR}/include
-CFLAGS = -Wall -I${SOFTWARE_DIR}/include
-FFLAGS = -Wall
-LDFLAGS = -lm -lz -L${SOFTWARE_DIR}/lib \
- -lboost_program_options -lboost_mpi -lboost_serialization \
- -lboost_regex -L../../../decoder -lcdec
-
-FFLAGS += -g -O6 -march=native
-CFLAGS += -g -O6 -march=native
-CXXFLAGS += -g -O6 -march=native
-LDFLAGS += -g -O6 -march=native
diff --git a/gi/pyp-topics/src/makefile.darwin b/gi/pyp-topics/src/makefile.darwin
deleted file mode 100644
index af608fd8..00000000
--- a/gi/pyp-topics/src/makefile.darwin
+++ /dev/null
@@ -1,15 +0,0 @@
-CC = /usr/bin/gcc
-CXX = /usr/bin/g++
-LD = /usr/bin/g++
-FC=/usr/bin/g77
-
-ARCH=i686-m64
-CXXFLAGS = -m64 -Wall -I/Users/pblunsom/packages/include
-CFLAGS = -m64 -Wall -I/Users/pblunsom/packages/include
-FFLAGS = -m64 -Wall
-LDFLAGS = -L/Users/pblunsom/packages/lib -lboost_program_options -lm -lz
-
-FFLAGS += -g -O3 -funroll-loops #-pg
-CFLAGS += -g -O3 -funroll-loops #-pg
-CXXFLAGS += -g -O3 -funroll-loops #-pg
-LDFLAGS += -g -O3 -funroll-loops #-pg
diff --git a/gi/pyp-topics/src/makefile.depend b/gi/pyp-topics/src/makefile.depend
deleted file mode 100644
index 9b8e306c..00000000
--- a/gi/pyp-topics/src/makefile.depend
+++ /dev/null
@@ -1,4042 +0,0 @@
-contexts_corpus.o: contexts_corpus.cc contexts_corpus.hh \
- /home/pblunsom/packages/include/boost/ptr_container/ptr_vector.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/ptr_sequence_adapter.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/reversible_ptr_container.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/throw_exception.hpp \
- /home/pblunsom/packages/include/boost/assert.hpp \
- /home/pblunsom/packages/include/boost/config.hpp \
- /home/pblunsom/packages/include/boost/config/user.hpp \
- /home/pblunsom/packages/include/boost/config/select_compiler_config.hpp \
- /home/pblunsom/packages/include/boost/config/compiler/gcc.hpp \
- /home/pblunsom/packages/include/boost/config/select_stdlib_config.hpp \
- /home/pblunsom/packages/include/boost/config/no_tr1/utility.hpp \
- /home/pblunsom/packages/include/boost/config/stdlib/libstdcpp3.hpp \
- /home/pblunsom/packages/include/boost/config/select_platform_config.hpp \
- /home/pblunsom/packages/include/boost/config/platform/linux.hpp \
- /home/pblunsom/packages/include/boost/config/posix_features.hpp \
- /home/pblunsom/packages/include/boost/config/suffix.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/scoped_deleter.hpp \
- /home/pblunsom/packages/include/boost/scoped_array.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/scoped_array.hpp \
- /home/pblunsom/packages/include/boost/checked_delete.hpp \
- /home/pblunsom/packages/include/boost/detail/workaround.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/operator_bool.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/static_move_ptr.hpp \
- /home/pblunsom/packages/include/boost/compressed_pair.hpp \
- /home/pblunsom/packages/include/boost/detail/compressed_pair.hpp \
- /home/pblunsom/packages/include/boost/type_traits/remove_cv.hpp \
- /home/pblunsom/packages/include/boost/type_traits/broken_compiler_spec.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/lambda_support.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/lambda.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/ttp.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/msvc.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/gcc.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/workaround.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/ctps.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/cv_traits_impl.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/type_trait_def.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/template_arity_spec.hpp \
- /home/pblunsom/packages/include/boost/mpl/int.hpp \
- /home/pblunsom/packages/include/boost/mpl/int_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/adl_barrier.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/adl.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/intel.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/nttp_decl.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/nttp.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/integral_wrapper.hpp \
- /home/pblunsom/packages/include/boost/mpl/integral_c_tag.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/static_constant.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/static_cast.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/cat.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/config/config.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/template_arity_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessor/params.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/preprocessor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comma_if.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/punctuation/comma_if.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/if.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/iif.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/bool.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/facilities/empty.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/punctuation/comma.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repeat.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/repeat.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/debug/error.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/detail/auto_rec.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple/eat.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/inc.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/inc.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/overload_resolution.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/type_trait_undef.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_empty.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_convertible.hpp \
- /home/pblunsom/packages/include/boost/type_traits/intrinsics.hpp \
- /home/pblunsom/packages/include/boost/type_traits/config.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_same.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/bool_trait_def.hpp \
- /home/pblunsom/packages/include/boost/type_traits/integral_constant.hpp \
- /home/pblunsom/packages/include/boost/mpl/bool.hpp \
- /home/pblunsom/packages/include/boost/mpl/bool_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/integral_c.hpp \
- /home/pblunsom/packages/include/boost/mpl/integral_c_fwd.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/bool_trait_undef.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_reference.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_volatile.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/yes_no_type.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_array.hpp \
- /home/pblunsom/packages/include/boost/type_traits/add_reference.hpp \
- /home/pblunsom/packages/include/boost/type_traits/ice.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/ice_or.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/ice_and.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/ice_not.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/ice_eq.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_arithmetic.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_integral.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_float.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_void.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_abstract.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_class.hpp \
- /home/pblunsom/packages/include/boost/call_traits.hpp \
- /home/pblunsom/packages/include/boost/detail/call_traits.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_pointer.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_member_pointer.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_member_function_pointer.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/is_mem_fun_pointer_impl.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/default_deleter.hpp \
- /home/pblunsom/packages/include/boost/mpl/if.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/value_wknd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/integral.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/eti.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/na_spec.hpp \
- /home/pblunsom/packages/include/boost/mpl/lambda_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/void_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/na.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/na_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/lambda_arity_param.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/arity.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/dtp.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessor/enum.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessor/def_params_tail.hpp \
- /home/pblunsom/packages/include/boost/mpl/limits/arity.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/and.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/bitand.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/identity.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/facilities/identity.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/empty.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/add.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/dec.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/while.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/fold_left.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/detail/fold_left.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/expr_iif.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/adt.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/detail/is_binary.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/detail/check.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/compl.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/fold_right.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/detail/fold_right.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/reverse.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/detail/while.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple/elem.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/sub.hpp \
- /home/pblunsom/packages/include/boost/type_traits/remove_bounds.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/is_convertible.hpp \
- /home/pblunsom/packages/include/boost/mpl/and.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/use_preprocessed.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/nested_type_wknd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/include_preprocessed.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/compiler.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/stringize.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/and.hpp \
- /home/pblunsom/packages/include/boost/mpl/identity.hpp \
- /home/pblunsom/packages/include/boost/utility/enable_if.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/move.hpp \
- /home/pblunsom/packages/include/boost/static_assert.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/exception.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/clone_allocator.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/nullable.hpp \
- /home/pblunsom/packages/include/boost/mpl/eval_if.hpp \
- /home/pblunsom/packages/include/boost/range/functions.hpp \
- /home/pblunsom/packages/include/boost/range/begin.hpp \
- /home/pblunsom/packages/include/boost/range/config.hpp \
- /home/pblunsom/packages/include/boost/range/iterator.hpp \
- /home/pblunsom/packages/include/boost/range/mutable_iterator.hpp \
- /home/pblunsom/packages/include/boost/range/detail/extract_optional_type.hpp \
- /home/pblunsom/packages/include/boost/iterator/iterator_traits.hpp \
- /home/pblunsom/packages/include/boost/detail/iterator.hpp \
- /home/pblunsom/packages/include/boost/range/const_iterator.hpp \
- /home/pblunsom/packages/include/boost/type_traits/remove_const.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_const.hpp \
- /home/pblunsom/packages/include/boost/range/end.hpp \
- /home/pblunsom/packages/include/boost/range/detail/implementation_help.hpp \
- /home/pblunsom/packages/include/boost/range/detail/common.hpp \
- /home/pblunsom/packages/include/boost/range/detail/sfinae.hpp \
- /home/pblunsom/packages/include/boost/range/size.hpp \
- /home/pblunsom/packages/include/boost/range/difference_type.hpp \
- /home/pblunsom/packages/include/boost/range/distance.hpp \
- /home/pblunsom/packages/include/boost/range/empty.hpp \
- /home/pblunsom/packages/include/boost/range/rbegin.hpp \
- /home/pblunsom/packages/include/boost/range/reverse_iterator.hpp \
- /home/pblunsom/packages/include/boost/iterator/reverse_iterator.hpp \
- /home/pblunsom/packages/include/boost/iterator.hpp \
- /home/pblunsom/packages/include/boost/utility.hpp \
- /home/pblunsom/packages/include/boost/utility/addressof.hpp \
- /home/pblunsom/packages/include/boost/utility/base_from_member.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_binary_params.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple/rem.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_params.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/repeat_from_to.hpp \
- /home/pblunsom/packages/include/boost/utility/binary.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/deduce_d.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/cat.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/fold_left.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/seq.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/elem.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/size.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/transform.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/mod.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/detail/div_base.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison/less_equal.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/not.hpp \
- /home/pblunsom/packages/include/boost/next_prior.hpp \
- /home/pblunsom/packages/include/boost/noncopyable.hpp \
- /home/pblunsom/packages/include/boost/iterator/iterator_adaptor.hpp \
- /home/pblunsom/packages/include/boost/iterator/iterator_categories.hpp \
- /home/pblunsom/packages/include/boost/iterator/detail/config_def.hpp \
- /home/pblunsom/packages/include/boost/mpl/placeholders.hpp \
- /home/pblunsom/packages/include/boost/mpl/arg.hpp \
- /home/pblunsom/packages/include/boost/mpl/arg_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/na_assert.hpp \
- /home/pblunsom/packages/include/boost/mpl/assert.hpp \
- /home/pblunsom/packages/include/boost/mpl/not.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/yes_no.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/arrays.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/pp_counter.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/arity_spec.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/arg_typedef.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/arg.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/placeholders.hpp \
- /home/pblunsom/packages/include/boost/iterator/detail/config_undef.hpp \
- /home/pblunsom/packages/include/boost/iterator/iterator_facade.hpp \
- /home/pblunsom/packages/include/boost/iterator/interoperable.hpp \
- /home/pblunsom/packages/include/boost/mpl/or.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/or.hpp \
- /home/pblunsom/packages/include/boost/iterator/detail/facade_iterator_category.hpp \
- /home/pblunsom/packages/include/boost/detail/indirect_traits.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_function.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/false_result.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/is_function_ptr_helper.hpp \
- /home/pblunsom/packages/include/boost/type_traits/remove_reference.hpp \
- /home/pblunsom/packages/include/boost/type_traits/remove_pointer.hpp \
- /home/pblunsom/packages/include/boost/iterator/detail/enable_if.hpp \
- /home/pblunsom/packages/include/boost/implicit_cast.hpp \
- /home/pblunsom/packages/include/boost/type_traits/add_const.hpp \
- /home/pblunsom/packages/include/boost/type_traits/add_pointer.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_pod.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_scalar.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_enum.hpp \
- /home/pblunsom/packages/include/boost/mpl/always.hpp \
- /home/pblunsom/packages/include/boost/mpl/apply.hpp \
- /home/pblunsom/packages/include/boost/mpl/apply_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/apply_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/apply_wrap.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/has_apply.hpp \
- /home/pblunsom/packages/include/boost/mpl/has_xxx.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/type_wrapper.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/has_xxx.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/msvc_typename.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/has_apply.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/msvc_never_true.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/apply_wrap.hpp \
- /home/pblunsom/packages/include/boost/mpl/lambda.hpp \
- /home/pblunsom/packages/include/boost/mpl/bind.hpp \
- /home/pblunsom/packages/include/boost/mpl/bind_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/bind.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/bind_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/next.hpp \
- /home/pblunsom/packages/include/boost/mpl/next_prior.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/common_name_wknd.hpp \
- /home/pblunsom/packages/include/boost/mpl/protect.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/bind.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/full_lambda.hpp \
- /home/pblunsom/packages/include/boost/mpl/quote.hpp \
- /home/pblunsom/packages/include/boost/mpl/void.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/has_type.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/bcc.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/quote.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/template_arity.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/template_arity.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/full_lambda.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/apply.hpp \
- /home/pblunsom/packages/include/boost/range/rend.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/indirect_fun.hpp \
- /home/pblunsom/packages/include/boost/utility/result_of.hpp \
- /home/pblunsom/packages/include/boost/type.hpp \
- /home/pblunsom/packages/include/boost/preprocessor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/library.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/div.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/mul.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/data.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/elem.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/size.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/insert.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/push_back.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison/not_equal.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/pop_back.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/deduce_z.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/pop_front.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/push_front.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/remove.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/replace.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/reverse.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple/reverse.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison/equal.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison/greater.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison/less.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison/greater_equal.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/config/limits.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/expr_if.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/debug.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/debug/assert.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/debug/line.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration/iterate.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/slot/slot.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/slot/detail/def.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/facilities.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/facilities/apply.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/detail/is_unary.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/facilities/expand.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/facilities/intercept.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration/local.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration/self.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/append.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/at.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/rest_n.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/cat.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/enum.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/for_each_i.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/for.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/detail/for.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/filter.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/first_n.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/for_each.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/for_each_product.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/to_tuple.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple/to_list.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/size.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/transform.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/bitnor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/bitor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/bitxor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/nor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/or.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/xor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/punctuation.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/punctuation/paren.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/punctuation/paren_if.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/deduce_r.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_params_with_a_default.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_params_with_defaults.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_shifted.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_shifted_binary_params.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_shifted_params.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_trailing.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_trailing_binary_params.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_trailing_params.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/selection.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/selection/max.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/selection/min.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/enum.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/filter.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/first_n.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/detail/split.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/fold_right.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/reverse.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/for_each.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/for_each_i.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/for_each_product.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/insert.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/rest_n.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/pop_back.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/pop_front.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/push_back.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/push_front.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/remove.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/replace.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/subseq.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/to_array.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/to_tuple.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/slot.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple/to_seq.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration/detail/iter/forward1.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration/detail/bounds/lower1.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/slot/detail/shared.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration/detail/bounds/upper1.hpp \
- /home/pblunsom/packages/include/boost/utility/detail/result_of_iterate.hpp \
- /home/pblunsom/packages/include/boost/pointee.hpp \
- /home/pblunsom/packages/include/boost/detail/is_incrementable.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/void_ptr_iterator.hpp \
- corpus.hh /home/pblunsom/packages/include/boost/shared_ptr.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/shared_ptr.hpp \
- /home/pblunsom/packages/include/boost/config/no_tr1/memory.hpp \
- /home/pblunsom/packages/include/boost/throw_exception.hpp \
- /home/pblunsom/packages/include/boost/exception/detail/attribute_noreturn.hpp \
- /home/pblunsom/packages/include/boost/exception/exception.hpp \
- /home/pblunsom/packages/include/boost/current_function.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/shared_count.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/bad_weak_ptr.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_counted_base.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_has_sync.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp \
- /home/pblunsom/packages/include/boost/detail/sp_typeinfo.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_counted_impl.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_convertible.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/spinlock_pool.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/spinlock.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/spinlock_sync.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/yield_k.hpp \
- /home/pblunsom/packages/include/boost/memory_order.hpp contexts_lexer.h \
- ../../../decoder/dict.h \
- /home/pblunsom/packages/include/boost/functional/hash.hpp \
- /home/pblunsom/packages/include/boost/functional/hash/hash.hpp \
- /home/pblunsom/packages/include/boost/functional/hash/hash_fwd.hpp \
- /home/pblunsom/packages/include/boost/functional/hash/detail/hash_float.hpp \
- /home/pblunsom/packages/include/boost/functional/hash/detail/float_functions.hpp \
- /home/pblunsom/packages/include/boost/config/no_tr1/cmath.hpp \
- /home/pblunsom/packages/include/boost/functional/hash/detail/limits.hpp \
- /home/pblunsom/packages/include/boost/limits.hpp \
- /home/pblunsom/packages/include/boost/integer/static_log2.hpp \
- /home/pblunsom/packages/include/boost/integer_fwd.hpp \
- /home/pblunsom/packages/include/boost/cstdint.hpp \
- /home/pblunsom/packages/include/boost/functional/hash/detail/hash_float_generic.hpp \
- /home/pblunsom/packages/include/boost/functional/hash/extensions.hpp \
- /home/pblunsom/packages/include/boost/detail/container_fwd.hpp \
- ../../../decoder/wordid.h gzstream.hh \
- /home/pblunsom/packages/include/boost/tuple/tuple.hpp \
- /home/pblunsom/packages/include/boost/ref.hpp \
- /home/pblunsom/packages/include/boost/tuple/detail/tuple_basic.hpp \
- /home/pblunsom/packages/include/boost/type_traits/cv_traits.hpp \
- /home/pblunsom/packages/include/boost/type_traits/add_volatile.hpp \
- /home/pblunsom/packages/include/boost/type_traits/add_cv.hpp \
- /home/pblunsom/packages/include/boost/type_traits/remove_volatile.hpp \
- /home/pblunsom/packages/include/boost/type_traits/function_traits.hpp
-contexts_lexer.o: contexts_lexer.cc contexts_lexer.h \
- ../../../decoder/dict.h \
- /home/pblunsom/packages/include/boost/functional/hash.hpp \
- /home/pblunsom/packages/include/boost/functional/hash/hash.hpp \
- /home/pblunsom/packages/include/boost/functional/hash/hash_fwd.hpp \
- /home/pblunsom/packages/include/boost/config.hpp \
- /home/pblunsom/packages/include/boost/config/user.hpp \
- /home/pblunsom/packages/include/boost/config/select_compiler_config.hpp \
- /home/pblunsom/packages/include/boost/config/compiler/gcc.hpp \
- /home/pblunsom/packages/include/boost/config/select_stdlib_config.hpp \
- /home/pblunsom/packages/include/boost/config/no_tr1/utility.hpp \
- /home/pblunsom/packages/include/boost/config/stdlib/libstdcpp3.hpp \
- /home/pblunsom/packages/include/boost/config/select_platform_config.hpp \
- /home/pblunsom/packages/include/boost/config/platform/linux.hpp \
- /home/pblunsom/packages/include/boost/config/posix_features.hpp \
- /home/pblunsom/packages/include/boost/config/suffix.hpp \
- /home/pblunsom/packages/include/boost/detail/workaround.hpp \
- /home/pblunsom/packages/include/boost/functional/hash/detail/hash_float.hpp \
- /home/pblunsom/packages/include/boost/functional/hash/detail/float_functions.hpp \
- /home/pblunsom/packages/include/boost/config/no_tr1/cmath.hpp \
- /home/pblunsom/packages/include/boost/functional/hash/detail/limits.hpp \
- /home/pblunsom/packages/include/boost/limits.hpp \
- /home/pblunsom/packages/include/boost/integer/static_log2.hpp \
- /home/pblunsom/packages/include/boost/integer_fwd.hpp \
- /home/pblunsom/packages/include/boost/cstdint.hpp \
- /home/pblunsom/packages/include/boost/assert.hpp \
- /home/pblunsom/packages/include/boost/functional/hash/detail/hash_float_generic.hpp \
- /home/pblunsom/packages/include/boost/functional/hash/extensions.hpp \
- /home/pblunsom/packages/include/boost/detail/container_fwd.hpp \
- ../../../decoder/wordid.h ../../../decoder/filelib.h \
- ../../../decoder/gzstream.h
-corpus.o: corpus.cc corpus.hh \
- /home/pblunsom/packages/include/boost/shared_ptr.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/shared_ptr.hpp \
- /home/pblunsom/packages/include/boost/config.hpp \
- /home/pblunsom/packages/include/boost/config/user.hpp \
- /home/pblunsom/packages/include/boost/config/select_compiler_config.hpp \
- /home/pblunsom/packages/include/boost/config/compiler/gcc.hpp \
- /home/pblunsom/packages/include/boost/config/select_stdlib_config.hpp \
- /home/pblunsom/packages/include/boost/config/no_tr1/utility.hpp \
- /home/pblunsom/packages/include/boost/config/stdlib/libstdcpp3.hpp \
- /home/pblunsom/packages/include/boost/config/select_platform_config.hpp \
- /home/pblunsom/packages/include/boost/config/platform/linux.hpp \
- /home/pblunsom/packages/include/boost/config/posix_features.hpp \
- /home/pblunsom/packages/include/boost/config/suffix.hpp \
- /home/pblunsom/packages/include/boost/config/no_tr1/memory.hpp \
- /home/pblunsom/packages/include/boost/assert.hpp \
- /home/pblunsom/packages/include/boost/checked_delete.hpp \
- /home/pblunsom/packages/include/boost/throw_exception.hpp \
- /home/pblunsom/packages/include/boost/exception/detail/attribute_noreturn.hpp \
- /home/pblunsom/packages/include/boost/detail/workaround.hpp \
- /home/pblunsom/packages/include/boost/exception/exception.hpp \
- /home/pblunsom/packages/include/boost/current_function.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/shared_count.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/bad_weak_ptr.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_counted_base.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_has_sync.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp \
- /home/pblunsom/packages/include/boost/detail/sp_typeinfo.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_counted_impl.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_convertible.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/spinlock_pool.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/spinlock.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/spinlock_sync.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/yield_k.hpp \
- /home/pblunsom/packages/include/boost/memory_order.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/operator_bool.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/ptr_vector.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/ptr_sequence_adapter.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/reversible_ptr_container.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/throw_exception.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/scoped_deleter.hpp \
- /home/pblunsom/packages/include/boost/scoped_array.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/scoped_array.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/static_move_ptr.hpp \
- /home/pblunsom/packages/include/boost/compressed_pair.hpp \
- /home/pblunsom/packages/include/boost/detail/compressed_pair.hpp \
- /home/pblunsom/packages/include/boost/type_traits/remove_cv.hpp \
- /home/pblunsom/packages/include/boost/type_traits/broken_compiler_spec.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/lambda_support.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/lambda.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/ttp.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/msvc.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/gcc.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/workaround.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/ctps.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/cv_traits_impl.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/type_trait_def.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/template_arity_spec.hpp \
- /home/pblunsom/packages/include/boost/mpl/int.hpp \
- /home/pblunsom/packages/include/boost/mpl/int_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/adl_barrier.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/adl.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/intel.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/nttp_decl.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/nttp.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/integral_wrapper.hpp \
- /home/pblunsom/packages/include/boost/mpl/integral_c_tag.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/static_constant.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/static_cast.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/cat.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/config/config.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/template_arity_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessor/params.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/preprocessor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comma_if.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/punctuation/comma_if.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/if.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/iif.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/bool.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/facilities/empty.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/punctuation/comma.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repeat.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/repeat.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/debug/error.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/detail/auto_rec.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple/eat.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/inc.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/inc.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/overload_resolution.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/type_trait_undef.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_empty.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_convertible.hpp \
- /home/pblunsom/packages/include/boost/type_traits/intrinsics.hpp \
- /home/pblunsom/packages/include/boost/type_traits/config.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_same.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/bool_trait_def.hpp \
- /home/pblunsom/packages/include/boost/type_traits/integral_constant.hpp \
- /home/pblunsom/packages/include/boost/mpl/bool.hpp \
- /home/pblunsom/packages/include/boost/mpl/bool_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/integral_c.hpp \
- /home/pblunsom/packages/include/boost/mpl/integral_c_fwd.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/bool_trait_undef.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_reference.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_volatile.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/yes_no_type.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_array.hpp \
- /home/pblunsom/packages/include/boost/type_traits/add_reference.hpp \
- /home/pblunsom/packages/include/boost/type_traits/ice.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/ice_or.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/ice_and.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/ice_not.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/ice_eq.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_arithmetic.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_integral.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_float.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_void.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_abstract.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_class.hpp \
- /home/pblunsom/packages/include/boost/call_traits.hpp \
- /home/pblunsom/packages/include/boost/detail/call_traits.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_pointer.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_member_pointer.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_member_function_pointer.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/is_mem_fun_pointer_impl.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/default_deleter.hpp \
- /home/pblunsom/packages/include/boost/mpl/if.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/value_wknd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/integral.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/eti.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/na_spec.hpp \
- /home/pblunsom/packages/include/boost/mpl/lambda_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/void_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/na.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/na_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/lambda_arity_param.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/arity.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/dtp.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessor/enum.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessor/def_params_tail.hpp \
- /home/pblunsom/packages/include/boost/mpl/limits/arity.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/and.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/bitand.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/identity.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/facilities/identity.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/empty.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/add.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/dec.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/while.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/fold_left.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/detail/fold_left.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/expr_iif.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/adt.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/detail/is_binary.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/detail/check.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/compl.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/fold_right.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/detail/fold_right.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/reverse.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/detail/while.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple/elem.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/sub.hpp \
- /home/pblunsom/packages/include/boost/type_traits/remove_bounds.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/is_convertible.hpp \
- /home/pblunsom/packages/include/boost/mpl/and.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/use_preprocessed.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/nested_type_wknd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/include_preprocessed.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/compiler.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/stringize.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/and.hpp \
- /home/pblunsom/packages/include/boost/mpl/identity.hpp \
- /home/pblunsom/packages/include/boost/utility/enable_if.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/move.hpp \
- /home/pblunsom/packages/include/boost/static_assert.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/exception.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/clone_allocator.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/nullable.hpp \
- /home/pblunsom/packages/include/boost/mpl/eval_if.hpp \
- /home/pblunsom/packages/include/boost/range/functions.hpp \
- /home/pblunsom/packages/include/boost/range/begin.hpp \
- /home/pblunsom/packages/include/boost/range/config.hpp \
- /home/pblunsom/packages/include/boost/range/iterator.hpp \
- /home/pblunsom/packages/include/boost/range/mutable_iterator.hpp \
- /home/pblunsom/packages/include/boost/range/detail/extract_optional_type.hpp \
- /home/pblunsom/packages/include/boost/iterator/iterator_traits.hpp \
- /home/pblunsom/packages/include/boost/detail/iterator.hpp \
- /home/pblunsom/packages/include/boost/range/const_iterator.hpp \
- /home/pblunsom/packages/include/boost/type_traits/remove_const.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_const.hpp \
- /home/pblunsom/packages/include/boost/range/end.hpp \
- /home/pblunsom/packages/include/boost/range/detail/implementation_help.hpp \
- /home/pblunsom/packages/include/boost/range/detail/common.hpp \
- /home/pblunsom/packages/include/boost/range/detail/sfinae.hpp \
- /home/pblunsom/packages/include/boost/range/size.hpp \
- /home/pblunsom/packages/include/boost/range/difference_type.hpp \
- /home/pblunsom/packages/include/boost/range/distance.hpp \
- /home/pblunsom/packages/include/boost/range/empty.hpp \
- /home/pblunsom/packages/include/boost/range/rbegin.hpp \
- /home/pblunsom/packages/include/boost/range/reverse_iterator.hpp \
- /home/pblunsom/packages/include/boost/iterator/reverse_iterator.hpp \
- /home/pblunsom/packages/include/boost/iterator.hpp \
- /home/pblunsom/packages/include/boost/utility.hpp \
- /home/pblunsom/packages/include/boost/utility/addressof.hpp \
- /home/pblunsom/packages/include/boost/utility/base_from_member.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_binary_params.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple/rem.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_params.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/repeat_from_to.hpp \
- /home/pblunsom/packages/include/boost/utility/binary.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/deduce_d.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/cat.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/fold_left.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/seq.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/elem.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/size.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/transform.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/mod.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/detail/div_base.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison/less_equal.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/not.hpp \
- /home/pblunsom/packages/include/boost/next_prior.hpp \
- /home/pblunsom/packages/include/boost/noncopyable.hpp \
- /home/pblunsom/packages/include/boost/iterator/iterator_adaptor.hpp \
- /home/pblunsom/packages/include/boost/iterator/iterator_categories.hpp \
- /home/pblunsom/packages/include/boost/iterator/detail/config_def.hpp \
- /home/pblunsom/packages/include/boost/mpl/placeholders.hpp \
- /home/pblunsom/packages/include/boost/mpl/arg.hpp \
- /home/pblunsom/packages/include/boost/mpl/arg_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/na_assert.hpp \
- /home/pblunsom/packages/include/boost/mpl/assert.hpp \
- /home/pblunsom/packages/include/boost/mpl/not.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/yes_no.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/arrays.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/pp_counter.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/arity_spec.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/arg_typedef.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/arg.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/placeholders.hpp \
- /home/pblunsom/packages/include/boost/iterator/detail/config_undef.hpp \
- /home/pblunsom/packages/include/boost/iterator/iterator_facade.hpp \
- /home/pblunsom/packages/include/boost/iterator/interoperable.hpp \
- /home/pblunsom/packages/include/boost/mpl/or.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/or.hpp \
- /home/pblunsom/packages/include/boost/iterator/detail/facade_iterator_category.hpp \
- /home/pblunsom/packages/include/boost/detail/indirect_traits.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_function.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/false_result.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/is_function_ptr_helper.hpp \
- /home/pblunsom/packages/include/boost/type_traits/remove_reference.hpp \
- /home/pblunsom/packages/include/boost/type_traits/remove_pointer.hpp \
- /home/pblunsom/packages/include/boost/iterator/detail/enable_if.hpp \
- /home/pblunsom/packages/include/boost/implicit_cast.hpp \
- /home/pblunsom/packages/include/boost/type_traits/add_const.hpp \
- /home/pblunsom/packages/include/boost/type_traits/add_pointer.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_pod.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_scalar.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_enum.hpp \
- /home/pblunsom/packages/include/boost/mpl/always.hpp \
- /home/pblunsom/packages/include/boost/mpl/apply.hpp \
- /home/pblunsom/packages/include/boost/mpl/apply_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/apply_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/apply_wrap.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/has_apply.hpp \
- /home/pblunsom/packages/include/boost/mpl/has_xxx.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/type_wrapper.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/has_xxx.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/msvc_typename.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/has_apply.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/msvc_never_true.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/apply_wrap.hpp \
- /home/pblunsom/packages/include/boost/mpl/lambda.hpp \
- /home/pblunsom/packages/include/boost/mpl/bind.hpp \
- /home/pblunsom/packages/include/boost/mpl/bind_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/bind.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/bind_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/next.hpp \
- /home/pblunsom/packages/include/boost/mpl/next_prior.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/common_name_wknd.hpp \
- /home/pblunsom/packages/include/boost/mpl/protect.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/bind.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/full_lambda.hpp \
- /home/pblunsom/packages/include/boost/mpl/quote.hpp \
- /home/pblunsom/packages/include/boost/mpl/void.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/has_type.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/bcc.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/quote.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/template_arity.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/template_arity.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/full_lambda.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/apply.hpp \
- /home/pblunsom/packages/include/boost/range/rend.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/indirect_fun.hpp \
- /home/pblunsom/packages/include/boost/utility/result_of.hpp \
- /home/pblunsom/packages/include/boost/type.hpp \
- /home/pblunsom/packages/include/boost/preprocessor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/library.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/div.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/mul.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/data.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/elem.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/size.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/insert.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/push_back.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison/not_equal.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/pop_back.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/deduce_z.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/pop_front.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/push_front.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/remove.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/replace.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/reverse.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple/reverse.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison/equal.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison/greater.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison/less.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison/greater_equal.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/config/limits.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/expr_if.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/debug.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/debug/assert.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/debug/line.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration/iterate.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/slot/slot.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/slot/detail/def.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/facilities.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/facilities/apply.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/detail/is_unary.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/facilities/expand.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/facilities/intercept.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration/local.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration/self.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/append.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/at.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/rest_n.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/cat.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/enum.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/for_each_i.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/for.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/detail/for.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/filter.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/first_n.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/for_each.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/for_each_product.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/to_tuple.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple/to_list.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/size.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/transform.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/bitnor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/bitor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/bitxor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/nor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/or.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/xor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/punctuation.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/punctuation/paren.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/punctuation/paren_if.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/deduce_r.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_params_with_a_default.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_params_with_defaults.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_shifted.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_shifted_binary_params.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_shifted_params.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_trailing.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_trailing_binary_params.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_trailing_params.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/selection.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/selection/max.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/selection/min.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/enum.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/filter.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/first_n.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/detail/split.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/fold_right.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/reverse.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/for_each.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/for_each_i.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/for_each_product.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/insert.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/rest_n.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/pop_back.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/pop_front.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/push_back.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/push_front.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/remove.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/replace.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/subseq.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/to_array.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/to_tuple.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/slot.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple/to_seq.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration/detail/iter/forward1.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration/detail/bounds/lower1.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/slot/detail/shared.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration/detail/bounds/upper1.hpp \
- /home/pblunsom/packages/include/boost/utility/detail/result_of_iterate.hpp \
- /home/pblunsom/packages/include/boost/pointee.hpp \
- /home/pblunsom/packages/include/boost/detail/is_incrementable.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/void_ptr_iterator.hpp \
- gzstream.hh
-gzstream.o: gzstream.cc gzstream.hh
-mpi-pyp-topics.o: mpi-pyp-topics.cc \
- /home/pblunsom/packages/include/boost/mpi/communicator.hpp \
- /home/pblunsom/packages/include/boost/mpi/config.hpp \
- /home/pblunsom/packages/include/mpi.h \
- /home/pblunsom/packages/include/mpio.h \
- /home/pblunsom/packages/include/mpi.h \
- /home/pblunsom/packages/include/mpicxx.h \
- /home/pblunsom/packages/include/boost/config.hpp \
- /home/pblunsom/packages/include/boost/config/user.hpp \
- /home/pblunsom/packages/include/boost/config/select_compiler_config.hpp \
- /home/pblunsom/packages/include/boost/config/compiler/gcc.hpp \
- /home/pblunsom/packages/include/boost/config/select_stdlib_config.hpp \
- /home/pblunsom/packages/include/boost/config/no_tr1/utility.hpp \
- /home/pblunsom/packages/include/boost/config/stdlib/libstdcpp3.hpp \
- /home/pblunsom/packages/include/boost/config/select_platform_config.hpp \
- /home/pblunsom/packages/include/boost/config/platform/linux.hpp \
- /home/pblunsom/packages/include/boost/config/posix_features.hpp \
- /home/pblunsom/packages/include/boost/config/suffix.hpp \
- /home/pblunsom/packages/include/boost/config/auto_link.hpp \
- /home/pblunsom/packages/include/boost/mpi/exception.hpp \
- /home/pblunsom/packages/include/boost/throw_exception.hpp \
- /home/pblunsom/packages/include/boost/exception/detail/attribute_noreturn.hpp \
- /home/pblunsom/packages/include/boost/detail/workaround.hpp \
- /home/pblunsom/packages/include/boost/exception/exception.hpp \
- /home/pblunsom/packages/include/boost/current_function.hpp \
- /home/pblunsom/packages/include/boost/optional.hpp \
- /home/pblunsom/packages/include/boost/optional/optional.hpp \
- /home/pblunsom/packages/include/boost/assert.hpp \
- /home/pblunsom/packages/include/boost/type.hpp \
- /home/pblunsom/packages/include/boost/type_traits/alignment_of.hpp \
- /home/pblunsom/packages/include/boost/type_traits/intrinsics.hpp \
- /home/pblunsom/packages/include/boost/type_traits/config.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_same.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/bool_trait_def.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/template_arity_spec.hpp \
- /home/pblunsom/packages/include/boost/mpl/int.hpp \
- /home/pblunsom/packages/include/boost/mpl/int_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/adl_barrier.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/adl.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/msvc.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/intel.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/gcc.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/workaround.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/nttp_decl.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/nttp.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/integral_wrapper.hpp \
- /home/pblunsom/packages/include/boost/mpl/integral_c_tag.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/static_constant.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/static_cast.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/cat.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/config/config.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/template_arity_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessor/params.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/preprocessor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comma_if.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/punctuation/comma_if.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/if.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/iif.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/bool.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/facilities/empty.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/punctuation/comma.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repeat.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/repeat.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/debug/error.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/detail/auto_rec.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple/eat.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/inc.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/inc.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/lambda.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/ttp.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/ctps.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/overload_resolution.hpp \
- /home/pblunsom/packages/include/boost/type_traits/integral_constant.hpp \
- /home/pblunsom/packages/include/boost/mpl/bool.hpp \
- /home/pblunsom/packages/include/boost/mpl/bool_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/integral_c.hpp \
- /home/pblunsom/packages/include/boost/mpl/integral_c_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/lambda_support.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/bool_trait_undef.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_reference.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_volatile.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/cv_traits_impl.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/size_t_trait_def.hpp \
- /home/pblunsom/packages/include/boost/mpl/size_t.hpp \
- /home/pblunsom/packages/include/boost/mpl/size_t_fwd.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/size_t_trait_undef.hpp \
- /home/pblunsom/packages/include/boost/type_traits/type_with_alignment.hpp \
- /home/pblunsom/packages/include/boost/mpl/if.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/value_wknd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/integral.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/eti.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/na_spec.hpp \
- /home/pblunsom/packages/include/boost/mpl/lambda_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/void_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/na.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/na_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/lambda_arity_param.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/arity.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/dtp.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessor/enum.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessor/def_params_tail.hpp \
- /home/pblunsom/packages/include/boost/mpl/limits/arity.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/and.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/bitand.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/identity.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/facilities/identity.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/empty.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/add.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/dec.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/while.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/fold_left.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/detail/fold_left.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/expr_iif.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/adt.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/detail/is_binary.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/detail/check.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/compl.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/fold_right.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/detail/fold_right.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/reverse.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/detail/while.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple/elem.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/sub.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/for_each_i.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/for.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/detail/for.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple/rem.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple/to_list.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/transform.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/append.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_pod.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_void.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_scalar.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_arithmetic.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_integral.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_float.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/ice_or.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_enum.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_pointer.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_member_pointer.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_member_function_pointer.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/is_mem_fun_pointer_impl.hpp \
- /home/pblunsom/packages/include/boost/type_traits/remove_cv.hpp \
- /home/pblunsom/packages/include/boost/type_traits/broken_compiler_spec.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/type_trait_def.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/type_trait_undef.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/ice_and.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/ice_not.hpp \
- /home/pblunsom/packages/include/boost/static_assert.hpp \
- /home/pblunsom/packages/include/boost/type_traits/remove_reference.hpp \
- /home/pblunsom/packages/include/boost/mpl/not.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/nested_type_wknd.hpp \
- /home/pblunsom/packages/include/boost/detail/reference_content.hpp \
- /home/pblunsom/packages/include/boost/type_traits/has_nothrow_copy.hpp \
- /home/pblunsom/packages/include/boost/type_traits/has_trivial_copy.hpp \
- /home/pblunsom/packages/include/boost/mpl/void.hpp \
- /home/pblunsom/packages/include/boost/none.hpp \
- /home/pblunsom/packages/include/boost/none_t.hpp \
- /home/pblunsom/packages/include/boost/utility/compare_pointees.hpp \
- /home/pblunsom/packages/include/boost/optional/optional_fwd.hpp \
- /home/pblunsom/packages/include/boost/shared_ptr.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/shared_ptr.hpp \
- /home/pblunsom/packages/include/boost/config/no_tr1/memory.hpp \
- /home/pblunsom/packages/include/boost/checked_delete.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/shared_count.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/bad_weak_ptr.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_counted_base.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_has_sync.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp \
- /home/pblunsom/packages/include/boost/detail/sp_typeinfo.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_counted_impl.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_convertible.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/spinlock_pool.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/spinlock.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/spinlock_sync.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/yield_k.hpp \
- /home/pblunsom/packages/include/boost/memory_order.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/operator_bool.hpp \
- /home/pblunsom/packages/include/boost/mpi/datatype.hpp \
- /home/pblunsom/packages/include/boost/mpi/datatype_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/or.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/use_preprocessed.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/include_preprocessed.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/compiler.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/stringize.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/or.hpp \
- /home/pblunsom/packages/include/boost/mpl/and.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/and.hpp \
- /home/pblunsom/packages/include/boost/mpi/detail/mpi_datatype_cache.hpp \
- /home/pblunsom/packages/include/boost/mpi/detail/mpi_datatype_oarchive.hpp \
- /home/pblunsom/packages/include/boost/archive/detail/oserializer.hpp \
- /home/pblunsom/packages/include/boost/mpl/eval_if.hpp \
- /home/pblunsom/packages/include/boost/mpl/equal_to.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/comparison_op.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/numeric_op.hpp \
- /home/pblunsom/packages/include/boost/mpl/numeric_cast.hpp \
- /home/pblunsom/packages/include/boost/mpl/apply_wrap.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/has_apply.hpp \
- /home/pblunsom/packages/include/boost/mpl/has_xxx.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/type_wrapper.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/yes_no.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/arrays.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/has_xxx.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/msvc_typename.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/has_apply.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/msvc_never_true.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/apply_wrap.hpp \
- /home/pblunsom/packages/include/boost/mpl/tag.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/has_tag.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/numeric_cast_utils.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/forwarding.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/msvc_eti_base.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/is_msvc_eti_arg.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/equal_to.hpp \
- /home/pblunsom/packages/include/boost/mpl/greater_equal.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/greater_equal.hpp \
- /home/pblunsom/packages/include/boost/mpl/identity.hpp \
- /home/pblunsom/packages/include/boost/serialization/extended_type_info_typeid.hpp \
- /home/pblunsom/packages/include/boost/serialization/static_warning.hpp \
- /home/pblunsom/packages/include/boost/mpl/print.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_polymorphic.hpp \
- /home/pblunsom/packages/include/boost/type_traits/remove_const.hpp \
- /home/pblunsom/packages/include/boost/serialization/singleton.hpp \
- /home/pblunsom/packages/include/boost/noncopyable.hpp \
- /home/pblunsom/packages/include/boost/serialization/force_include.hpp \
- /home/pblunsom/packages/include/boost/serialization/extended_type_info.hpp \
- /home/pblunsom/packages/include/boost/serialization/config.hpp \
- /home/pblunsom/packages/include/boost/config/abi_prefix.hpp \
- /home/pblunsom/packages/include/boost/config/abi_suffix.hpp \
- /home/pblunsom/packages/include/boost/serialization/factory.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison/greater.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison/less.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison/less_equal.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/not.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison/not_equal.hpp \
- /home/pblunsom/packages/include/boost/serialization/access.hpp \
- /home/pblunsom/packages/include/boost/serialization/pfto.hpp \
- /home/pblunsom/packages/include/boost/serialization/throw_exception.hpp \
- /home/pblunsom/packages/include/boost/serialization/smart_cast.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_base_and_derived.hpp \
- /home/pblunsom/packages/include/boost/type_traits/remove_pointer.hpp \
- /home/pblunsom/packages/include/boost/serialization/assume_abstract.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_abstract.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_const.hpp \
- /home/pblunsom/packages/include/boost/type_traits/remove_extent.hpp \
- /home/pblunsom/packages/include/boost/serialization/serialization.hpp \
- /home/pblunsom/packages/include/boost/serialization/strong_typedef.hpp \
- /home/pblunsom/packages/include/boost/operators.hpp \
- /home/pblunsom/packages/include/boost/iterator.hpp \
- /home/pblunsom/packages/include/boost/serialization/nvp.hpp \
- /home/pblunsom/packages/include/boost/serialization/level.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_fundamental.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_array.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_class.hpp \
- /home/pblunsom/packages/include/boost/serialization/level_enum.hpp \
- /home/pblunsom/packages/include/boost/serialization/tracking.hpp \
- /home/pblunsom/packages/include/boost/mpl/greater.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/greater.hpp \
- /home/pblunsom/packages/include/boost/serialization/tracking_enum.hpp \
- /home/pblunsom/packages/include/boost/serialization/type_info_implementation.hpp \
- /home/pblunsom/packages/include/boost/serialization/traits.hpp \
- /home/pblunsom/packages/include/boost/serialization/split_member.hpp \
- /home/pblunsom/packages/include/boost/serialization/base_object.hpp \
- /home/pblunsom/packages/include/boost/serialization/void_cast_fwd.hpp \
- /home/pblunsom/packages/include/boost/serialization/wrapper.hpp \
- /home/pblunsom/packages/include/boost/serialization/version.hpp \
- /home/pblunsom/packages/include/boost/mpl/assert.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/pp_counter.hpp \
- /home/pblunsom/packages/include/boost/mpl/less.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/less.hpp \
- /home/pblunsom/packages/include/boost/mpl/comparison.hpp \
- /home/pblunsom/packages/include/boost/mpl/not_equal_to.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/not_equal_to.hpp \
- /home/pblunsom/packages/include/boost/mpl/less_equal.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/less_equal.hpp \
- /home/pblunsom/packages/include/boost/serialization/void_cast.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_virtual_base_of.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_base_of.hpp \
- /home/pblunsom/packages/include/boost/serialization/array.hpp \
- /home/pblunsom/packages/include/boost/mpl/always.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/arity_spec.hpp \
- /home/pblunsom/packages/include/boost/mpl/apply.hpp \
- /home/pblunsom/packages/include/boost/mpl/apply_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/apply_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/placeholders.hpp \
- /home/pblunsom/packages/include/boost/mpl/arg.hpp \
- /home/pblunsom/packages/include/boost/mpl/arg_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/na_assert.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/arg_typedef.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/arg.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/placeholders.hpp \
- /home/pblunsom/packages/include/boost/mpl/lambda.hpp \
- /home/pblunsom/packages/include/boost/mpl/bind.hpp \
- /home/pblunsom/packages/include/boost/mpl/bind_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/bind.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/bind_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/next.hpp \
- /home/pblunsom/packages/include/boost/mpl/next_prior.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/common_name_wknd.hpp \
- /home/pblunsom/packages/include/boost/mpl/protect.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/bind.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/full_lambda.hpp \
- /home/pblunsom/packages/include/boost/mpl/quote.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/has_type.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/bcc.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/quote.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/template_arity.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/template_arity.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/full_lambda.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/apply.hpp \
- /home/pblunsom/packages/include/boost/array.hpp \
- /home/pblunsom/packages/include/boost/swap.hpp \
- /home/pblunsom/packages/include/boost/utility/swap.hpp \
- /home/pblunsom/packages/include/boost/detail/iterator.hpp \
- /home/pblunsom/packages/include/boost/serialization/collection_size_type.hpp \
- /home/pblunsom/packages/include/boost/archive/archive_exception.hpp \
- /home/pblunsom/packages/include/boost/archive/detail/decl.hpp \
- /home/pblunsom/packages/include/boost/archive/detail/abi_prefix.hpp \
- /home/pblunsom/packages/include/boost/archive/detail/abi_suffix.hpp \
- /home/pblunsom/packages/include/boost/archive/detail/basic_oarchive.hpp \
- /home/pblunsom/packages/include/boost/archive/basic_archive.hpp \
- /home/pblunsom/packages/include/boost/cstdint.hpp \
- /home/pblunsom/packages/include/boost/integer_traits.hpp \
- /home/pblunsom/packages/include/boost/limits.hpp \
- /home/pblunsom/packages/include/boost/archive/detail/auto_link_archive.hpp \
- /home/pblunsom/packages/include/boost/archive/detail/basic_oserializer.hpp \
- /home/pblunsom/packages/include/boost/archive/detail/basic_serializer.hpp \
- /home/pblunsom/packages/include/boost/archive/detail/basic_pointer_oserializer.hpp \
- /home/pblunsom/packages/include/boost/archive/detail/archive_serializer_map.hpp \
- /home/pblunsom/packages/include/boost/archive/detail/check.hpp \
- /home/pblunsom/packages/include/boost/mpi/detail/ignore_skeleton_oarchive.hpp \
- /home/pblunsom/packages/include/boost/archive/detail/common_oarchive.hpp \
- /home/pblunsom/packages/include/boost/archive/detail/interface_oarchive.hpp \
- /home/pblunsom/packages/include/boost/mpi/detail/mpi_datatype_primitive.hpp \
- /home/pblunsom/packages/include/boost/serialization/detail/get_data.hpp \
- /home/pblunsom/packages/include/boost/integer.hpp \
- /home/pblunsom/packages/include/boost/integer_fwd.hpp \
- /home/pblunsom/packages/include/boost/archive/detail/register_archive.hpp \
- /home/pblunsom/packages/include/boost/utility/enable_if.hpp \
- /home/pblunsom/packages/include/boost/mpi/packed_oarchive.hpp \
- /home/pblunsom/packages/include/boost/archive/basic_binary_oarchive.hpp \
- /home/pblunsom/packages/include/boost/serialization/string.hpp \
- /home/pblunsom/packages/include/boost/mpi/detail/packed_oprimitive.hpp \
- /home/pblunsom/packages/include/boost/mpi/allocator.hpp \
- /home/pblunsom/packages/include/boost/mpi/detail/binary_buffer_oprimitive.hpp \
- /home/pblunsom/packages/include/boost/serialization/is_bitwise_serializable.hpp \
- /home/pblunsom/packages/include/boost/mpi/packed_iarchive.hpp \
- /home/pblunsom/packages/include/boost/archive/basic_binary_iarchive.hpp \
- /home/pblunsom/packages/include/boost/archive/detail/common_iarchive.hpp \
- /home/pblunsom/packages/include/boost/archive/detail/basic_iarchive.hpp \
- /home/pblunsom/packages/include/boost/archive/detail/basic_pointer_iserializer.hpp \
- /home/pblunsom/packages/include/boost/archive/detail/interface_iarchive.hpp \
- /home/pblunsom/packages/include/boost/archive/detail/iserializer.hpp \
- /home/pblunsom/packages/include/boost/detail/no_exceptions_support.hpp \
- /home/pblunsom/packages/include/boost/type_traits/has_new_operator.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/yes_no_type.hpp \
- /home/pblunsom/packages/include/boost/archive/detail/basic_iserializer.hpp \
- /home/pblunsom/packages/include/boost/archive/shared_ptr_helper.hpp \
- /home/pblunsom/packages/include/boost/serialization/shared_ptr_132.hpp \
- /home/pblunsom/packages/include/boost/serialization/split_free.hpp \
- /home/pblunsom/packages/include/boost/serialization/detail/shared_ptr_132.hpp \
- /home/pblunsom/packages/include/boost/serialization/detail/shared_count_132.hpp \
- /home/pblunsom/packages/include/boost/detail/lightweight_mutex.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/lightweight_mutex.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/lwm_pthreads.hpp \
- /home/pblunsom/packages/include/boost/mpi/detail/packed_iprimitive.hpp \
- /home/pblunsom/packages/include/boost/mpi/detail/binary_buffer_iprimitive.hpp \
- /home/pblunsom/packages/include/boost/mpi/skeleton_and_content_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpi/detail/point_to_point.hpp \
- /home/pblunsom/packages/include/boost/mpi/status.hpp \
- /home/pblunsom/packages/include/boost/mpi/request.hpp timing.h \
- clock_gettime_stub.c mpi-pyp-topics.hh \
- /home/pblunsom/packages/include/boost/ptr_container/ptr_vector.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/ptr_sequence_adapter.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/reversible_ptr_container.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/throw_exception.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/scoped_deleter.hpp \
- /home/pblunsom/packages/include/boost/scoped_array.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/scoped_array.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/static_move_ptr.hpp \
- /home/pblunsom/packages/include/boost/compressed_pair.hpp \
- /home/pblunsom/packages/include/boost/detail/compressed_pair.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_empty.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_convertible.hpp \
- /home/pblunsom/packages/include/boost/type_traits/add_reference.hpp \
- /home/pblunsom/packages/include/boost/type_traits/ice.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/ice_eq.hpp \
- /home/pblunsom/packages/include/boost/call_traits.hpp \
- /home/pblunsom/packages/include/boost/detail/call_traits.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/default_deleter.hpp \
- /home/pblunsom/packages/include/boost/type_traits/remove_bounds.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/is_convertible.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/move.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/exception.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/clone_allocator.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/nullable.hpp \
- /home/pblunsom/packages/include/boost/range/functions.hpp \
- /home/pblunsom/packages/include/boost/range/begin.hpp \
- /home/pblunsom/packages/include/boost/range/config.hpp \
- /home/pblunsom/packages/include/boost/range/iterator.hpp \
- /home/pblunsom/packages/include/boost/range/mutable_iterator.hpp \
- /home/pblunsom/packages/include/boost/range/detail/extract_optional_type.hpp \
- /home/pblunsom/packages/include/boost/iterator/iterator_traits.hpp \
- /home/pblunsom/packages/include/boost/range/const_iterator.hpp \
- /home/pblunsom/packages/include/boost/range/end.hpp \
- /home/pblunsom/packages/include/boost/range/detail/implementation_help.hpp \
- /home/pblunsom/packages/include/boost/range/detail/common.hpp \
- /home/pblunsom/packages/include/boost/range/detail/sfinae.hpp \
- /home/pblunsom/packages/include/boost/range/size.hpp \
- /home/pblunsom/packages/include/boost/range/difference_type.hpp \
- /home/pblunsom/packages/include/boost/range/distance.hpp \
- /home/pblunsom/packages/include/boost/range/empty.hpp \
- /home/pblunsom/packages/include/boost/range/rbegin.hpp \
- /home/pblunsom/packages/include/boost/range/reverse_iterator.hpp \
- /home/pblunsom/packages/include/boost/iterator/reverse_iterator.hpp \
- /home/pblunsom/packages/include/boost/utility.hpp \
- /home/pblunsom/packages/include/boost/utility/addressof.hpp \
- /home/pblunsom/packages/include/boost/utility/base_from_member.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_binary_params.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_params.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/repeat_from_to.hpp \
- /home/pblunsom/packages/include/boost/utility/binary.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/deduce_d.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/cat.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/fold_left.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/seq.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/elem.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/size.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/transform.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/mod.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/detail/div_base.hpp \
- /home/pblunsom/packages/include/boost/next_prior.hpp \
- /home/pblunsom/packages/include/boost/iterator/iterator_adaptor.hpp \
- /home/pblunsom/packages/include/boost/iterator/iterator_categories.hpp \
- /home/pblunsom/packages/include/boost/iterator/detail/config_def.hpp \
- /home/pblunsom/packages/include/boost/iterator/detail/config_undef.hpp \
- /home/pblunsom/packages/include/boost/iterator/iterator_facade.hpp \
- /home/pblunsom/packages/include/boost/iterator/interoperable.hpp \
- /home/pblunsom/packages/include/boost/iterator/detail/facade_iterator_category.hpp \
- /home/pblunsom/packages/include/boost/detail/indirect_traits.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_function.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/false_result.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/is_function_ptr_helper.hpp \
- /home/pblunsom/packages/include/boost/iterator/detail/enable_if.hpp \
- /home/pblunsom/packages/include/boost/implicit_cast.hpp \
- /home/pblunsom/packages/include/boost/type_traits/add_const.hpp \
- /home/pblunsom/packages/include/boost/type_traits/add_pointer.hpp \
- /home/pblunsom/packages/include/boost/range/rend.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/indirect_fun.hpp \
- /home/pblunsom/packages/include/boost/utility/result_of.hpp \
- /home/pblunsom/packages/include/boost/preprocessor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/library.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/div.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/mul.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/data.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/elem.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/size.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/insert.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/push_back.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/pop_back.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/deduce_z.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/pop_front.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/push_front.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/remove.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/replace.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/reverse.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple/reverse.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison/equal.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison/greater_equal.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/config/limits.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/expr_if.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/debug.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/debug/assert.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/debug/line.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration/iterate.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/slot/slot.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/slot/detail/def.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/facilities.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/facilities/apply.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/detail/is_unary.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/facilities/expand.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/facilities/intercept.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration/local.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration/self.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/at.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/rest_n.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/cat.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/enum.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/filter.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/first_n.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/for_each.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/for_each_product.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/to_tuple.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/size.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/bitnor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/bitor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/bitxor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/nor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/or.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/xor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/punctuation.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/punctuation/paren.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/punctuation/paren_if.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/deduce_r.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_params_with_a_default.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_params_with_defaults.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_shifted.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_shifted_binary_params.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_shifted_params.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_trailing.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_trailing_binary_params.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_trailing_params.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/selection.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/selection/max.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/selection/min.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/enum.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/filter.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/first_n.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/detail/split.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/fold_right.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/reverse.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/for_each.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/for_each_i.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/for_each_product.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/insert.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/rest_n.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/pop_back.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/pop_front.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/push_back.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/push_front.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/remove.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/replace.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/subseq.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/to_array.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/to_tuple.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/slot.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple/to_seq.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration/detail/iter/forward1.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration/detail/bounds/lower1.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/slot/detail/shared.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration/detail/bounds/upper1.hpp \
- /home/pblunsom/packages/include/boost/utility/detail/result_of_iterate.hpp \
- /home/pblunsom/packages/include/boost/pointee.hpp \
- /home/pblunsom/packages/include/boost/detail/is_incrementable.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/void_ptr_iterator.hpp \
- /home/pblunsom/packages/include/boost/random/uniform_real.hpp \
- /home/pblunsom/packages/include/boost/random/detail/config.hpp \
- /home/pblunsom/packages/include/boost/random/variate_generator.hpp \
- /home/pblunsom/packages/include/boost/random/uniform_01.hpp \
- /home/pblunsom/packages/include/boost/random/detail/pass_through_engine.hpp \
- /home/pblunsom/packages/include/boost/random/detail/ptr_helper.hpp \
- /home/pblunsom/packages/include/boost/random/detail/disable_warnings.hpp \
- /home/pblunsom/packages/include/boost/random/detail/enable_warnings.hpp \
- /home/pblunsom/packages/include/boost/random/detail/uniform_int_float.hpp \
- /home/pblunsom/packages/include/boost/random/mersenne_twister.hpp \
- /home/pblunsom/packages/include/boost/random/linear_congruential.hpp \
- /home/pblunsom/packages/include/boost/random/detail/const_mod.hpp \
- /home/pblunsom/packages/include/boost/random/detail/seed.hpp \
- /home/pblunsom/packages/include/boost/random/inversive_congruential.hpp \
- /home/pblunsom/packages/include/boost/random/lagged_fibonacci.hpp \
- /home/pblunsom/packages/include/boost/config/no_tr1/cmath.hpp \
- /home/pblunsom/packages/include/boost/mpi/environment.hpp mpi-pyp.hh \
- /home/pblunsom/packages/include/boost/tuple/tuple.hpp \
- /home/pblunsom/packages/include/boost/ref.hpp \
- /home/pblunsom/packages/include/boost/tuple/detail/tuple_basic.hpp \
- /home/pblunsom/packages/include/boost/type_traits/cv_traits.hpp \
- /home/pblunsom/packages/include/boost/type_traits/add_volatile.hpp \
- /home/pblunsom/packages/include/boost/type_traits/add_cv.hpp \
- /home/pblunsom/packages/include/boost/type_traits/remove_volatile.hpp \
- /home/pblunsom/packages/include/boost/type_traits/function_traits.hpp \
- /home/pblunsom/packages/include/boost/serialization/map.hpp \
- /home/pblunsom/packages/include/boost/serialization/utility.hpp \
- /home/pblunsom/packages/include/boost/serialization/collections_save_imp.hpp \
- /home/pblunsom/packages/include/boost/serialization/collections_load_imp.hpp \
- /home/pblunsom/packages/include/boost/serialization/detail/stack_constructor.hpp \
- /home/pblunsom/packages/include/boost/aligned_storage.hpp \
- /home/pblunsom/packages/include/boost/mpi.hpp \
- /home/pblunsom/packages/include/boost/mpi/collectives.hpp \
- /home/pblunsom/packages/include/boost/mpi/collectives/all_gather.hpp \
- /home/pblunsom/packages/include/boost/serialization/vector.hpp \
- /home/pblunsom/packages/include/boost/serialization/collection_traits.hpp \
- /home/pblunsom/packages/include/boost/mpi/collectives/broadcast.hpp \
- /home/pblunsom/packages/include/boost/mpi/collectives_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpi/collectives/gather.hpp \
- /home/pblunsom/packages/include/boost/mpi/collectives/all_reduce.hpp \
- /home/pblunsom/packages/include/boost/mpi/collectives/reduce.hpp \
- /home/pblunsom/packages/include/boost/mpi/detail/computation_tree.hpp \
- /home/pblunsom/packages/include/boost/mpi/operations.hpp \
- /home/pblunsom/packages/include/boost/mpi/collectives/all_to_all.hpp \
- /home/pblunsom/packages/include/boost/mpi/collectives/scatter.hpp \
- /home/pblunsom/packages/include/boost/mpi/collectives/scan.hpp \
- /home/pblunsom/packages/include/boost/mpi/graph_communicator.hpp \
- /home/pblunsom/packages/include/boost/graph/graph_traits.hpp \
- /home/pblunsom/packages/include/boost/pending/property.hpp \
- /home/pblunsom/packages/include/boost/pending/detail/property.hpp \
- /home/pblunsom/packages/include/boost/type_traits/same_traits.hpp \
- /home/pblunsom/packages/include/boost/graph/properties.hpp \
- /home/pblunsom/packages/include/boost/property_map/property_map.hpp \
- /home/pblunsom/packages/include/boost/pending/cstddef.hpp \
- /home/pblunsom/packages/include/boost/concept_check.hpp \
- /home/pblunsom/packages/include/boost/concept/assert.hpp \
- /home/pblunsom/packages/include/boost/concept/detail/general.hpp \
- /home/pblunsom/packages/include/boost/concept/detail/has_constraints.hpp \
- /home/pblunsom/packages/include/boost/type_traits/conversion_traits.hpp \
- /home/pblunsom/packages/include/boost/concept/usage.hpp \
- /home/pblunsom/packages/include/boost/concept/detail/concept_def.hpp \
- /home/pblunsom/packages/include/boost/concept/detail/concept_undef.hpp \
- /home/pblunsom/packages/include/boost/concept_archetype.hpp \
- /home/pblunsom/packages/include/boost/property_map/vector_property_map.hpp \
- /home/pblunsom/packages/include/boost/graph/property_maps/constant_property_map.hpp \
- /home/pblunsom/packages/include/boost/graph/property_maps/null_property_map.hpp \
- /home/pblunsom/packages/include/boost/iterator/counting_iterator.hpp \
- /home/pblunsom/packages/include/boost/detail/numeric_traits.hpp \
- /home/pblunsom/packages/include/boost/type_traits.hpp \
- /home/pblunsom/packages/include/boost/type_traits/has_nothrow_assign.hpp \
- /home/pblunsom/packages/include/boost/type_traits/has_trivial_assign.hpp \
- /home/pblunsom/packages/include/boost/type_traits/has_nothrow_constructor.hpp \
- /home/pblunsom/packages/include/boost/type_traits/has_trivial_constructor.hpp \
- /home/pblunsom/packages/include/boost/type_traits/has_nothrow_destructor.hpp \
- /home/pblunsom/packages/include/boost/type_traits/has_trivial_destructor.hpp \
- /home/pblunsom/packages/include/boost/type_traits/has_virtual_destructor.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_signed.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_unsigned.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_compound.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_floating_point.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_member_object_pointer.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_object.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_stateless.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_union.hpp \
- /home/pblunsom/packages/include/boost/type_traits/rank.hpp \
- /home/pblunsom/packages/include/boost/type_traits/extent.hpp \
- /home/pblunsom/packages/include/boost/type_traits/remove_all_extents.hpp \
- /home/pblunsom/packages/include/boost/type_traits/aligned_storage.hpp \
- /home/pblunsom/packages/include/boost/type_traits/floating_point_promotion.hpp \
- /home/pblunsom/packages/include/boost/type_traits/integral_promotion.hpp \
- /home/pblunsom/packages/include/boost/type_traits/promote.hpp \
- /home/pblunsom/packages/include/boost/type_traits/make_unsigned.hpp \
- /home/pblunsom/packages/include/boost/type_traits/make_signed.hpp \
- /home/pblunsom/packages/include/boost/type_traits/decay.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_complex.hpp \
- /home/pblunsom/packages/include/boost/detail/select_type.hpp \
- /home/pblunsom/packages/include/boost/graph/iteration_macros.hpp \
- /home/pblunsom/packages/include/boost/shared_array.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/shared_array.hpp \
- /home/pblunsom/packages/include/boost/mpi/group.hpp \
- /home/pblunsom/packages/include/boost/mpi/intercommunicator.hpp \
- /home/pblunsom/packages/include/boost/mpi/nonblocking.hpp \
- /home/pblunsom/packages/include/boost/mpi/skeleton_and_content.hpp \
- /home/pblunsom/packages/include/boost/mpi/detail/forward_skeleton_iarchive.hpp \
- /home/pblunsom/packages/include/boost/mpi/detail/forward_skeleton_oarchive.hpp \
- /home/pblunsom/packages/include/boost/mpi/detail/ignore_iprimitive.hpp \
- /home/pblunsom/packages/include/boost/mpi/detail/ignore_oprimitive.hpp \
- /home/pblunsom/packages/include/boost/mpi/detail/content_oarchive.hpp \
- /home/pblunsom/packages/include/boost/mpi/detail/broadcast_sc.hpp \
- /home/pblunsom/packages/include/boost/mpi/detail/communicator_sc.hpp \
- /home/pblunsom/packages/include/boost/mpi/timer.hpp pyp.hh \
- slice-sampler.h log_add.h mt19937ar.h corpus.hh
-mpi-train-contexts.o: mpi-train-contexts.cc \
- /home/pblunsom/packages/include/boost/program_options/parsers.hpp \
- /home/pblunsom/packages/include/boost/program_options/config.hpp \
- /home/pblunsom/packages/include/boost/config.hpp \
- /home/pblunsom/packages/include/boost/config/user.hpp \
- /home/pblunsom/packages/include/boost/config/select_compiler_config.hpp \
- /home/pblunsom/packages/include/boost/config/compiler/gcc.hpp \
- /home/pblunsom/packages/include/boost/config/select_stdlib_config.hpp \
- /home/pblunsom/packages/include/boost/config/no_tr1/utility.hpp \
- /home/pblunsom/packages/include/boost/config/stdlib/libstdcpp3.hpp \
- /home/pblunsom/packages/include/boost/config/select_platform_config.hpp \
- /home/pblunsom/packages/include/boost/config/platform/linux.hpp \
- /home/pblunsom/packages/include/boost/config/posix_features.hpp \
- /home/pblunsom/packages/include/boost/config/suffix.hpp \
- /home/pblunsom/packages/include/boost/version.hpp \
- /home/pblunsom/packages/include/boost/config/auto_link.hpp \
- /home/pblunsom/packages/include/boost/program_options/option.hpp \
- /home/pblunsom/packages/include/boost/program_options/detail/cmdline.hpp \
- /home/pblunsom/packages/include/boost/program_options/errors.hpp \
- /home/pblunsom/packages/include/boost/program_options/cmdline.hpp \
- /home/pblunsom/packages/include/boost/program_options/options_description.hpp \
- /home/pblunsom/packages/include/boost/program_options/value_semantic.hpp \
- /home/pblunsom/packages/include/boost/any.hpp \
- /home/pblunsom/packages/include/boost/type_traits/remove_reference.hpp \
- /home/pblunsom/packages/include/boost/type_traits/broken_compiler_spec.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/lambda_support.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/lambda.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/ttp.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/msvc.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/gcc.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/workaround.hpp \
- /home/pblunsom/packages/include/boost/detail/workaround.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/ctps.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/type_trait_def.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/template_arity_spec.hpp \
- /home/pblunsom/packages/include/boost/mpl/int.hpp \
- /home/pblunsom/packages/include/boost/mpl/int_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/adl_barrier.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/adl.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/intel.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/nttp_decl.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/nttp.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/integral_wrapper.hpp \
- /home/pblunsom/packages/include/boost/mpl/integral_c_tag.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/static_constant.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/static_cast.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/cat.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/config/config.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/template_arity_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessor/params.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/preprocessor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comma_if.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/punctuation/comma_if.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/if.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/iif.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/bool.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/facilities/empty.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/punctuation/comma.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repeat.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/repeat.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/debug/error.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/detail/auto_rec.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple/eat.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/inc.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/inc.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/overload_resolution.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/type_trait_undef.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_reference.hpp \
- /home/pblunsom/packages/include/boost/type_traits/config.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/bool_trait_def.hpp \
- /home/pblunsom/packages/include/boost/type_traits/integral_constant.hpp \
- /home/pblunsom/packages/include/boost/mpl/bool.hpp \
- /home/pblunsom/packages/include/boost/mpl/bool_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/integral_c.hpp \
- /home/pblunsom/packages/include/boost/mpl/integral_c_fwd.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/bool_trait_undef.hpp \
- /home/pblunsom/packages/include/boost/throw_exception.hpp \
- /home/pblunsom/packages/include/boost/exception/detail/attribute_noreturn.hpp \
- /home/pblunsom/packages/include/boost/exception/exception.hpp \
- /home/pblunsom/packages/include/boost/current_function.hpp \
- /home/pblunsom/packages/include/boost/static_assert.hpp \
- /home/pblunsom/packages/include/boost/function/function1.hpp \
- /home/pblunsom/packages/include/boost/function/detail/maybe_include.hpp \
- /home/pblunsom/packages/include/boost/function/function_template.hpp \
- /home/pblunsom/packages/include/boost/function/detail/prologue.hpp \
- /home/pblunsom/packages/include/boost/config/no_tr1/functional.hpp \
- /home/pblunsom/packages/include/boost/function/function_base.hpp \
- /home/pblunsom/packages/include/boost/detail/sp_typeinfo.hpp \
- /home/pblunsom/packages/include/boost/assert.hpp \
- /home/pblunsom/packages/include/boost/integer.hpp \
- /home/pblunsom/packages/include/boost/integer_fwd.hpp \
- /home/pblunsom/packages/include/boost/limits.hpp \
- /home/pblunsom/packages/include/boost/cstdint.hpp \
- /home/pblunsom/packages/include/boost/integer_traits.hpp \
- /home/pblunsom/packages/include/boost/type_traits/has_trivial_copy.hpp \
- /home/pblunsom/packages/include/boost/type_traits/intrinsics.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_same.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_volatile.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/cv_traits_impl.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_pod.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_void.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_scalar.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_arithmetic.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_integral.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_float.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/ice_or.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_enum.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_pointer.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_member_pointer.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_member_function_pointer.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/is_mem_fun_pointer_impl.hpp \
- /home/pblunsom/packages/include/boost/type_traits/remove_cv.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/ice_and.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/ice_not.hpp \
- /home/pblunsom/packages/include/boost/type_traits/has_trivial_destructor.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_const.hpp \
- /home/pblunsom/packages/include/boost/type_traits/composite_traits.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_array.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_union.hpp \
- /home/pblunsom/packages/include/boost/type_traits/ice.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/yes_no_type.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/ice_eq.hpp \
- /home/pblunsom/packages/include/boost/ref.hpp \
- /home/pblunsom/packages/include/boost/utility/addressof.hpp \
- /home/pblunsom/packages/include/boost/mpl/if.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/value_wknd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/integral.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/eti.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/na_spec.hpp \
- /home/pblunsom/packages/include/boost/mpl/lambda_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/void_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/na.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/na_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/lambda_arity_param.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/arity.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/dtp.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessor/enum.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessor/def_params_tail.hpp \
- /home/pblunsom/packages/include/boost/mpl/limits/arity.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/and.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/bitand.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/identity.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/facilities/identity.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/empty.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/add.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/dec.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/while.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/fold_left.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/detail/fold_left.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/expr_iif.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/adt.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/detail/is_binary.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/detail/check.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/compl.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/fold_right.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/detail/fold_right.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/reverse.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/detail/while.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple/elem.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/sub.hpp \
- /home/pblunsom/packages/include/boost/type_traits/alignment_of.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/size_t_trait_def.hpp \
- /home/pblunsom/packages/include/boost/mpl/size_t.hpp \
- /home/pblunsom/packages/include/boost/mpl/size_t_fwd.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/size_t_trait_undef.hpp \
- /home/pblunsom/packages/include/boost/utility/enable_if.hpp \
- /home/pblunsom/packages/include/boost/function_equal.hpp \
- /home/pblunsom/packages/include/boost/function/function_fwd.hpp \
- /home/pblunsom/packages/include/boost/mem_fn.hpp \
- /home/pblunsom/packages/include/boost/bind/mem_fn.hpp \
- /home/pblunsom/packages/include/boost/get_pointer.hpp \
- /home/pblunsom/packages/include/boost/config/no_tr1/memory.hpp \
- /home/pblunsom/packages/include/boost/bind/mem_fn_template.hpp \
- /home/pblunsom/packages/include/boost/bind/mem_fn_cc.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/enum.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple/rem.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/enum_params.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_params.hpp \
- /home/pblunsom/packages/include/boost/detail/no_exceptions_support.hpp \
- /home/pblunsom/packages/include/boost/lexical_cast.hpp \
- /home/pblunsom/packages/include/boost/type_traits/make_unsigned.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_signed.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_unsigned.hpp \
- /home/pblunsom/packages/include/boost/type_traits/add_const.hpp \
- /home/pblunsom/packages/include/boost/type_traits/add_volatile.hpp \
- /home/pblunsom/packages/include/boost/call_traits.hpp \
- /home/pblunsom/packages/include/boost/detail/call_traits.hpp \
- /home/pblunsom/packages/include/boost/detail/lcast_precision.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_abstract.hpp \
- /home/pblunsom/packages/include/boost/program_options/detail/value_semantic.hpp \
- /home/pblunsom/packages/include/boost/function.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iterate.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration/iterate.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/elem.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/data.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/size.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/slot/slot.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/slot/detail/def.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration/detail/iter/forward1.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration/detail/bounds/lower1.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/slot/detail/shared.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration/detail/bounds/upper1.hpp \
- /home/pblunsom/packages/include/boost/function/detail/function_iterate.hpp \
- /home/pblunsom/packages/include/boost/shared_ptr.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/shared_ptr.hpp \
- /home/pblunsom/packages/include/boost/checked_delete.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/shared_count.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/bad_weak_ptr.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_counted_base.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_has_sync.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_counted_impl.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_convertible.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/spinlock_pool.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/spinlock.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/spinlock_sync.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/yield_k.hpp \
- /home/pblunsom/packages/include/boost/memory_order.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/operator_bool.hpp \
- /home/pblunsom/packages/include/boost/program_options/positional_options.hpp \
- /home/pblunsom/packages/include/boost/program_options/detail/parsers.hpp \
- /home/pblunsom/packages/include/boost/program_options/detail/convert.hpp \
- /home/pblunsom/packages/include/boost/program_options/variables_map.hpp \
- /home/pblunsom/packages/include/boost/scoped_ptr.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/scoped_ptr.hpp \
- /home/pblunsom/packages/include/boost/mpi/environment.hpp \
- /home/pblunsom/packages/include/boost/mpi/config.hpp \
- /home/pblunsom/packages/include/mpi.h \
- /home/pblunsom/packages/include/mpio.h \
- /home/pblunsom/packages/include/mpi.h \
- /home/pblunsom/packages/include/mpicxx.h \
- /home/pblunsom/packages/include/boost/noncopyable.hpp \
- /home/pblunsom/packages/include/boost/optional.hpp \
- /home/pblunsom/packages/include/boost/optional/optional.hpp \
- /home/pblunsom/packages/include/boost/type.hpp \
- /home/pblunsom/packages/include/boost/type_traits/type_with_alignment.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/for_each_i.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/for.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/detail/for.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple/to_list.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/transform.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/append.hpp \
- /home/pblunsom/packages/include/boost/mpl/not.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/nested_type_wknd.hpp \
- /home/pblunsom/packages/include/boost/detail/reference_content.hpp \
- /home/pblunsom/packages/include/boost/type_traits/has_nothrow_copy.hpp \
- /home/pblunsom/packages/include/boost/mpl/void.hpp \
- /home/pblunsom/packages/include/boost/none.hpp \
- /home/pblunsom/packages/include/boost/none_t.hpp \
- /home/pblunsom/packages/include/boost/utility/compare_pointees.hpp \
- /home/pblunsom/packages/include/boost/optional/optional_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpi/communicator.hpp \
- /home/pblunsom/packages/include/boost/mpi/exception.hpp \
- /home/pblunsom/packages/include/boost/mpi/datatype.hpp \
- /home/pblunsom/packages/include/boost/mpi/datatype_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/or.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/use_preprocessed.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/include_preprocessed.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/compiler.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/stringize.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/or.hpp \
- /home/pblunsom/packages/include/boost/mpl/and.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/and.hpp \
- /home/pblunsom/packages/include/boost/mpi/detail/mpi_datatype_cache.hpp \
- /home/pblunsom/packages/include/boost/mpi/detail/mpi_datatype_oarchive.hpp \
- /home/pblunsom/packages/include/boost/archive/detail/oserializer.hpp \
- /home/pblunsom/packages/include/boost/mpl/eval_if.hpp \
- /home/pblunsom/packages/include/boost/mpl/equal_to.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/comparison_op.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/numeric_op.hpp \
- /home/pblunsom/packages/include/boost/mpl/numeric_cast.hpp \
- /home/pblunsom/packages/include/boost/mpl/apply_wrap.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/has_apply.hpp \
- /home/pblunsom/packages/include/boost/mpl/has_xxx.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/type_wrapper.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/yes_no.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/arrays.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/has_xxx.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/msvc_typename.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/has_apply.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/msvc_never_true.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/apply_wrap.hpp \
- /home/pblunsom/packages/include/boost/mpl/tag.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/has_tag.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/numeric_cast_utils.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/forwarding.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/msvc_eti_base.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/is_msvc_eti_arg.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/equal_to.hpp \
- /home/pblunsom/packages/include/boost/mpl/greater_equal.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/greater_equal.hpp \
- /home/pblunsom/packages/include/boost/mpl/identity.hpp \
- /home/pblunsom/packages/include/boost/serialization/extended_type_info_typeid.hpp \
- /home/pblunsom/packages/include/boost/serialization/static_warning.hpp \
- /home/pblunsom/packages/include/boost/mpl/print.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_polymorphic.hpp \
- /home/pblunsom/packages/include/boost/type_traits/remove_const.hpp \
- /home/pblunsom/packages/include/boost/serialization/singleton.hpp \
- /home/pblunsom/packages/include/boost/serialization/force_include.hpp \
- /home/pblunsom/packages/include/boost/serialization/extended_type_info.hpp \
- /home/pblunsom/packages/include/boost/serialization/config.hpp \
- /home/pblunsom/packages/include/boost/config/abi_prefix.hpp \
- /home/pblunsom/packages/include/boost/config/abi_suffix.hpp \
- /home/pblunsom/packages/include/boost/serialization/factory.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison/greater.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison/less.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison/less_equal.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/not.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison/not_equal.hpp \
- /home/pblunsom/packages/include/boost/serialization/access.hpp \
- /home/pblunsom/packages/include/boost/serialization/pfto.hpp \
- /home/pblunsom/packages/include/boost/serialization/throw_exception.hpp \
- /home/pblunsom/packages/include/boost/serialization/smart_cast.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_base_and_derived.hpp \
- /home/pblunsom/packages/include/boost/type_traits/remove_pointer.hpp \
- /home/pblunsom/packages/include/boost/serialization/assume_abstract.hpp \
- /home/pblunsom/packages/include/boost/type_traits/remove_extent.hpp \
- /home/pblunsom/packages/include/boost/serialization/serialization.hpp \
- /home/pblunsom/packages/include/boost/serialization/strong_typedef.hpp \
- /home/pblunsom/packages/include/boost/operators.hpp \
- /home/pblunsom/packages/include/boost/iterator.hpp \
- /home/pblunsom/packages/include/boost/serialization/nvp.hpp \
- /home/pblunsom/packages/include/boost/serialization/level.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_fundamental.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_class.hpp \
- /home/pblunsom/packages/include/boost/serialization/level_enum.hpp \
- /home/pblunsom/packages/include/boost/serialization/tracking.hpp \
- /home/pblunsom/packages/include/boost/mpl/greater.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/greater.hpp \
- /home/pblunsom/packages/include/boost/serialization/tracking_enum.hpp \
- /home/pblunsom/packages/include/boost/serialization/type_info_implementation.hpp \
- /home/pblunsom/packages/include/boost/serialization/traits.hpp \
- /home/pblunsom/packages/include/boost/serialization/split_member.hpp \
- /home/pblunsom/packages/include/boost/serialization/base_object.hpp \
- /home/pblunsom/packages/include/boost/serialization/void_cast_fwd.hpp \
- /home/pblunsom/packages/include/boost/serialization/wrapper.hpp \
- /home/pblunsom/packages/include/boost/serialization/version.hpp \
- /home/pblunsom/packages/include/boost/mpl/assert.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/pp_counter.hpp \
- /home/pblunsom/packages/include/boost/mpl/less.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/less.hpp \
- /home/pblunsom/packages/include/boost/mpl/comparison.hpp \
- /home/pblunsom/packages/include/boost/mpl/not_equal_to.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/not_equal_to.hpp \
- /home/pblunsom/packages/include/boost/mpl/less_equal.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/less_equal.hpp \
- /home/pblunsom/packages/include/boost/serialization/void_cast.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_virtual_base_of.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_base_of.hpp \
- /home/pblunsom/packages/include/boost/serialization/array.hpp \
- /home/pblunsom/packages/include/boost/mpl/always.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/arity_spec.hpp \
- /home/pblunsom/packages/include/boost/mpl/apply.hpp \
- /home/pblunsom/packages/include/boost/mpl/apply_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/apply_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/placeholders.hpp \
- /home/pblunsom/packages/include/boost/mpl/arg.hpp \
- /home/pblunsom/packages/include/boost/mpl/arg_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/na_assert.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/arg_typedef.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/arg.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/placeholders.hpp \
- /home/pblunsom/packages/include/boost/mpl/lambda.hpp \
- /home/pblunsom/packages/include/boost/mpl/bind.hpp \
- /home/pblunsom/packages/include/boost/mpl/bind_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/bind.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/bind_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/next.hpp \
- /home/pblunsom/packages/include/boost/mpl/next_prior.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/common_name_wknd.hpp \
- /home/pblunsom/packages/include/boost/mpl/protect.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/bind.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/full_lambda.hpp \
- /home/pblunsom/packages/include/boost/mpl/quote.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/has_type.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/bcc.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/quote.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/template_arity.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/template_arity.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/full_lambda.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/apply.hpp \
- /home/pblunsom/packages/include/boost/array.hpp \
- /home/pblunsom/packages/include/boost/swap.hpp \
- /home/pblunsom/packages/include/boost/utility/swap.hpp \
- /home/pblunsom/packages/include/boost/detail/iterator.hpp \
- /home/pblunsom/packages/include/boost/serialization/collection_size_type.hpp \
- /home/pblunsom/packages/include/boost/archive/archive_exception.hpp \
- /home/pblunsom/packages/include/boost/archive/detail/decl.hpp \
- /home/pblunsom/packages/include/boost/archive/detail/abi_prefix.hpp \
- /home/pblunsom/packages/include/boost/archive/detail/abi_suffix.hpp \
- /home/pblunsom/packages/include/boost/archive/detail/basic_oarchive.hpp \
- /home/pblunsom/packages/include/boost/archive/basic_archive.hpp \
- /home/pblunsom/packages/include/boost/archive/detail/auto_link_archive.hpp \
- /home/pblunsom/packages/include/boost/archive/detail/basic_oserializer.hpp \
- /home/pblunsom/packages/include/boost/archive/detail/basic_serializer.hpp \
- /home/pblunsom/packages/include/boost/archive/detail/basic_pointer_oserializer.hpp \
- /home/pblunsom/packages/include/boost/archive/detail/archive_serializer_map.hpp \
- /home/pblunsom/packages/include/boost/archive/detail/check.hpp \
- /home/pblunsom/packages/include/boost/mpi/detail/ignore_skeleton_oarchive.hpp \
- /home/pblunsom/packages/include/boost/archive/detail/common_oarchive.hpp \
- /home/pblunsom/packages/include/boost/archive/detail/interface_oarchive.hpp \
- /home/pblunsom/packages/include/boost/mpi/detail/mpi_datatype_primitive.hpp \
- /home/pblunsom/packages/include/boost/serialization/detail/get_data.hpp \
- /home/pblunsom/packages/include/boost/archive/detail/register_archive.hpp \
- /home/pblunsom/packages/include/boost/mpi/packed_oarchive.hpp \
- /home/pblunsom/packages/include/boost/archive/basic_binary_oarchive.hpp \
- /home/pblunsom/packages/include/boost/serialization/string.hpp \
- /home/pblunsom/packages/include/boost/mpi/detail/packed_oprimitive.hpp \
- /home/pblunsom/packages/include/boost/mpi/allocator.hpp \
- /home/pblunsom/packages/include/boost/mpi/detail/binary_buffer_oprimitive.hpp \
- /home/pblunsom/packages/include/boost/serialization/is_bitwise_serializable.hpp \
- /home/pblunsom/packages/include/boost/mpi/packed_iarchive.hpp \
- /home/pblunsom/packages/include/boost/archive/basic_binary_iarchive.hpp \
- /home/pblunsom/packages/include/boost/archive/detail/common_iarchive.hpp \
- /home/pblunsom/packages/include/boost/archive/detail/basic_iarchive.hpp \
- /home/pblunsom/packages/include/boost/archive/detail/basic_pointer_iserializer.hpp \
- /home/pblunsom/packages/include/boost/archive/detail/interface_iarchive.hpp \
- /home/pblunsom/packages/include/boost/archive/detail/iserializer.hpp \
- /home/pblunsom/packages/include/boost/type_traits/has_new_operator.hpp \
- /home/pblunsom/packages/include/boost/archive/detail/basic_iserializer.hpp \
- /home/pblunsom/packages/include/boost/archive/shared_ptr_helper.hpp \
- /home/pblunsom/packages/include/boost/serialization/shared_ptr_132.hpp \
- /home/pblunsom/packages/include/boost/serialization/split_free.hpp \
- /home/pblunsom/packages/include/boost/serialization/detail/shared_ptr_132.hpp \
- /home/pblunsom/packages/include/boost/serialization/detail/shared_count_132.hpp \
- /home/pblunsom/packages/include/boost/detail/lightweight_mutex.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/lightweight_mutex.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/lwm_pthreads.hpp \
- /home/pblunsom/packages/include/boost/mpi/detail/packed_iprimitive.hpp \
- /home/pblunsom/packages/include/boost/mpi/detail/binary_buffer_iprimitive.hpp \
- /home/pblunsom/packages/include/boost/mpi/skeleton_and_content_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpi/detail/point_to_point.hpp \
- /home/pblunsom/packages/include/boost/mpi/status.hpp \
- /home/pblunsom/packages/include/boost/mpi/request.hpp mpi-pyp-topics.hh \
- /home/pblunsom/packages/include/boost/ptr_container/ptr_vector.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/ptr_sequence_adapter.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/reversible_ptr_container.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/throw_exception.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/scoped_deleter.hpp \
- /home/pblunsom/packages/include/boost/scoped_array.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/scoped_array.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/static_move_ptr.hpp \
- /home/pblunsom/packages/include/boost/compressed_pair.hpp \
- /home/pblunsom/packages/include/boost/detail/compressed_pair.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_empty.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_convertible.hpp \
- /home/pblunsom/packages/include/boost/type_traits/add_reference.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/default_deleter.hpp \
- /home/pblunsom/packages/include/boost/type_traits/remove_bounds.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/is_convertible.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/move.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/exception.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/clone_allocator.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/nullable.hpp \
- /home/pblunsom/packages/include/boost/range/functions.hpp \
- /home/pblunsom/packages/include/boost/range/begin.hpp \
- /home/pblunsom/packages/include/boost/range/config.hpp \
- /home/pblunsom/packages/include/boost/range/iterator.hpp \
- /home/pblunsom/packages/include/boost/range/mutable_iterator.hpp \
- /home/pblunsom/packages/include/boost/range/detail/extract_optional_type.hpp \
- /home/pblunsom/packages/include/boost/iterator/iterator_traits.hpp \
- /home/pblunsom/packages/include/boost/range/const_iterator.hpp \
- /home/pblunsom/packages/include/boost/range/end.hpp \
- /home/pblunsom/packages/include/boost/range/detail/implementation_help.hpp \
- /home/pblunsom/packages/include/boost/range/detail/common.hpp \
- /home/pblunsom/packages/include/boost/range/detail/sfinae.hpp \
- /home/pblunsom/packages/include/boost/range/size.hpp \
- /home/pblunsom/packages/include/boost/range/difference_type.hpp \
- /home/pblunsom/packages/include/boost/range/distance.hpp \
- /home/pblunsom/packages/include/boost/range/empty.hpp \
- /home/pblunsom/packages/include/boost/range/rbegin.hpp \
- /home/pblunsom/packages/include/boost/range/reverse_iterator.hpp \
- /home/pblunsom/packages/include/boost/iterator/reverse_iterator.hpp \
- /home/pblunsom/packages/include/boost/utility.hpp \
- /home/pblunsom/packages/include/boost/utility/base_from_member.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_binary_params.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/repeat_from_to.hpp \
- /home/pblunsom/packages/include/boost/utility/binary.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/deduce_d.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/cat.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/fold_left.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/seq.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/elem.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/size.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/transform.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/mod.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/detail/div_base.hpp \
- /home/pblunsom/packages/include/boost/next_prior.hpp \
- /home/pblunsom/packages/include/boost/iterator/iterator_adaptor.hpp \
- /home/pblunsom/packages/include/boost/iterator/iterator_categories.hpp \
- /home/pblunsom/packages/include/boost/iterator/detail/config_def.hpp \
- /home/pblunsom/packages/include/boost/iterator/detail/config_undef.hpp \
- /home/pblunsom/packages/include/boost/iterator/iterator_facade.hpp \
- /home/pblunsom/packages/include/boost/iterator/interoperable.hpp \
- /home/pblunsom/packages/include/boost/iterator/detail/facade_iterator_category.hpp \
- /home/pblunsom/packages/include/boost/detail/indirect_traits.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_function.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/false_result.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/is_function_ptr_helper.hpp \
- /home/pblunsom/packages/include/boost/iterator/detail/enable_if.hpp \
- /home/pblunsom/packages/include/boost/implicit_cast.hpp \
- /home/pblunsom/packages/include/boost/type_traits/add_pointer.hpp \
- /home/pblunsom/packages/include/boost/range/rend.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/indirect_fun.hpp \
- /home/pblunsom/packages/include/boost/utility/result_of.hpp \
- /home/pblunsom/packages/include/boost/preprocessor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/library.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/div.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/mul.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/insert.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/push_back.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/pop_back.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/deduce_z.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/pop_front.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/push_front.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/remove.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/replace.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/reverse.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple/reverse.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison/equal.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison/greater_equal.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/config/limits.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/expr_if.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/debug.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/debug/assert.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/debug/line.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/facilities.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/facilities/apply.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/detail/is_unary.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/facilities/expand.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/facilities/intercept.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration/local.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration/self.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/at.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/rest_n.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/cat.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/enum.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/filter.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/first_n.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/for_each.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/for_each_product.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/to_tuple.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/size.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/bitnor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/bitor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/bitxor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/nor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/or.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/xor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/punctuation.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/punctuation/paren.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/punctuation/paren_if.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/deduce_r.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_params_with_a_default.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_params_with_defaults.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_shifted.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_shifted_binary_params.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_shifted_params.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_trailing.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_trailing_binary_params.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_trailing_params.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/selection.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/selection/max.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/selection/min.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/enum.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/filter.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/first_n.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/detail/split.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/fold_right.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/reverse.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/for_each.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/for_each_i.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/for_each_product.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/insert.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/rest_n.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/pop_back.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/pop_front.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/push_back.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/push_front.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/remove.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/replace.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/subseq.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/to_array.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/to_tuple.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/slot.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple/to_seq.hpp \
- /home/pblunsom/packages/include/boost/utility/detail/result_of_iterate.hpp \
- /home/pblunsom/packages/include/boost/pointee.hpp \
- /home/pblunsom/packages/include/boost/detail/is_incrementable.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/void_ptr_iterator.hpp \
- /home/pblunsom/packages/include/boost/random/uniform_real.hpp \
- /home/pblunsom/packages/include/boost/random/detail/config.hpp \
- /home/pblunsom/packages/include/boost/random/variate_generator.hpp \
- /home/pblunsom/packages/include/boost/random/uniform_01.hpp \
- /home/pblunsom/packages/include/boost/random/detail/pass_through_engine.hpp \
- /home/pblunsom/packages/include/boost/random/detail/ptr_helper.hpp \
- /home/pblunsom/packages/include/boost/random/detail/disable_warnings.hpp \
- /home/pblunsom/packages/include/boost/random/detail/enable_warnings.hpp \
- /home/pblunsom/packages/include/boost/random/detail/uniform_int_float.hpp \
- /home/pblunsom/packages/include/boost/random/mersenne_twister.hpp \
- /home/pblunsom/packages/include/boost/random/linear_congruential.hpp \
- /home/pblunsom/packages/include/boost/random/detail/const_mod.hpp \
- /home/pblunsom/packages/include/boost/random/detail/seed.hpp \
- /home/pblunsom/packages/include/boost/random/inversive_congruential.hpp \
- /home/pblunsom/packages/include/boost/random/lagged_fibonacci.hpp \
- /home/pblunsom/packages/include/boost/config/no_tr1/cmath.hpp mpi-pyp.hh \
- /home/pblunsom/packages/include/boost/tuple/tuple.hpp \
- /home/pblunsom/packages/include/boost/tuple/detail/tuple_basic.hpp \
- /home/pblunsom/packages/include/boost/type_traits/cv_traits.hpp \
- /home/pblunsom/packages/include/boost/type_traits/add_cv.hpp \
- /home/pblunsom/packages/include/boost/type_traits/remove_volatile.hpp \
- /home/pblunsom/packages/include/boost/type_traits/function_traits.hpp \
- /home/pblunsom/packages/include/boost/serialization/map.hpp \
- /home/pblunsom/packages/include/boost/serialization/utility.hpp \
- /home/pblunsom/packages/include/boost/serialization/collections_save_imp.hpp \
- /home/pblunsom/packages/include/boost/serialization/collections_load_imp.hpp \
- /home/pblunsom/packages/include/boost/serialization/detail/stack_constructor.hpp \
- /home/pblunsom/packages/include/boost/aligned_storage.hpp \
- /home/pblunsom/packages/include/boost/mpi.hpp \
- /home/pblunsom/packages/include/boost/mpi/collectives.hpp \
- /home/pblunsom/packages/include/boost/mpi/collectives/all_gather.hpp \
- /home/pblunsom/packages/include/boost/serialization/vector.hpp \
- /home/pblunsom/packages/include/boost/serialization/collection_traits.hpp \
- /home/pblunsom/packages/include/boost/mpi/collectives/broadcast.hpp \
- /home/pblunsom/packages/include/boost/mpi/collectives_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpi/collectives/gather.hpp \
- /home/pblunsom/packages/include/boost/mpi/collectives/all_reduce.hpp \
- /home/pblunsom/packages/include/boost/mpi/collectives/reduce.hpp \
- /home/pblunsom/packages/include/boost/mpi/detail/computation_tree.hpp \
- /home/pblunsom/packages/include/boost/mpi/operations.hpp \
- /home/pblunsom/packages/include/boost/mpi/collectives/all_to_all.hpp \
- /home/pblunsom/packages/include/boost/mpi/collectives/scatter.hpp \
- /home/pblunsom/packages/include/boost/mpi/collectives/scan.hpp \
- /home/pblunsom/packages/include/boost/mpi/graph_communicator.hpp \
- /home/pblunsom/packages/include/boost/graph/graph_traits.hpp \
- /home/pblunsom/packages/include/boost/pending/property.hpp \
- /home/pblunsom/packages/include/boost/pending/detail/property.hpp \
- /home/pblunsom/packages/include/boost/type_traits/same_traits.hpp \
- /home/pblunsom/packages/include/boost/graph/properties.hpp \
- /home/pblunsom/packages/include/boost/property_map/property_map.hpp \
- /home/pblunsom/packages/include/boost/pending/cstddef.hpp \
- /home/pblunsom/packages/include/boost/concept_check.hpp \
- /home/pblunsom/packages/include/boost/concept/assert.hpp \
- /home/pblunsom/packages/include/boost/concept/detail/general.hpp \
- /home/pblunsom/packages/include/boost/concept/detail/has_constraints.hpp \
- /home/pblunsom/packages/include/boost/type_traits/conversion_traits.hpp \
- /home/pblunsom/packages/include/boost/concept/usage.hpp \
- /home/pblunsom/packages/include/boost/concept/detail/concept_def.hpp \
- /home/pblunsom/packages/include/boost/concept/detail/concept_undef.hpp \
- /home/pblunsom/packages/include/boost/concept_archetype.hpp \
- /home/pblunsom/packages/include/boost/property_map/vector_property_map.hpp \
- /home/pblunsom/packages/include/boost/graph/property_maps/constant_property_map.hpp \
- /home/pblunsom/packages/include/boost/graph/property_maps/null_property_map.hpp \
- /home/pblunsom/packages/include/boost/iterator/counting_iterator.hpp \
- /home/pblunsom/packages/include/boost/detail/numeric_traits.hpp \
- /home/pblunsom/packages/include/boost/type_traits.hpp \
- /home/pblunsom/packages/include/boost/type_traits/has_nothrow_assign.hpp \
- /home/pblunsom/packages/include/boost/type_traits/has_trivial_assign.hpp \
- /home/pblunsom/packages/include/boost/type_traits/has_nothrow_constructor.hpp \
- /home/pblunsom/packages/include/boost/type_traits/has_trivial_constructor.hpp \
- /home/pblunsom/packages/include/boost/type_traits/has_nothrow_destructor.hpp \
- /home/pblunsom/packages/include/boost/type_traits/has_virtual_destructor.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_compound.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_floating_point.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_member_object_pointer.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_object.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_stateless.hpp \
- /home/pblunsom/packages/include/boost/type_traits/rank.hpp \
- /home/pblunsom/packages/include/boost/type_traits/extent.hpp \
- /home/pblunsom/packages/include/boost/type_traits/remove_all_extents.hpp \
- /home/pblunsom/packages/include/boost/type_traits/aligned_storage.hpp \
- /home/pblunsom/packages/include/boost/type_traits/floating_point_promotion.hpp \
- /home/pblunsom/packages/include/boost/type_traits/integral_promotion.hpp \
- /home/pblunsom/packages/include/boost/type_traits/promote.hpp \
- /home/pblunsom/packages/include/boost/type_traits/make_signed.hpp \
- /home/pblunsom/packages/include/boost/type_traits/decay.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_complex.hpp \
- /home/pblunsom/packages/include/boost/detail/select_type.hpp \
- /home/pblunsom/packages/include/boost/graph/iteration_macros.hpp \
- /home/pblunsom/packages/include/boost/shared_array.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/shared_array.hpp \
- /home/pblunsom/packages/include/boost/mpi/group.hpp \
- /home/pblunsom/packages/include/boost/mpi/intercommunicator.hpp \
- /home/pblunsom/packages/include/boost/mpi/nonblocking.hpp \
- /home/pblunsom/packages/include/boost/mpi/skeleton_and_content.hpp \
- /home/pblunsom/packages/include/boost/mpi/detail/forward_skeleton_iarchive.hpp \
- /home/pblunsom/packages/include/boost/mpi/detail/forward_skeleton_oarchive.hpp \
- /home/pblunsom/packages/include/boost/mpi/detail/ignore_iprimitive.hpp \
- /home/pblunsom/packages/include/boost/mpi/detail/ignore_oprimitive.hpp \
- /home/pblunsom/packages/include/boost/mpi/detail/content_oarchive.hpp \
- /home/pblunsom/packages/include/boost/mpi/detail/broadcast_sc.hpp \
- /home/pblunsom/packages/include/boost/mpi/detail/communicator_sc.hpp \
- /home/pblunsom/packages/include/boost/mpi/timer.hpp pyp.hh \
- slice-sampler.h log_add.h mt19937ar.h corpus.hh mpi-corpus.hh \
- contexts_corpus.hh contexts_lexer.h ../../../decoder/dict.h \
- /home/pblunsom/packages/include/boost/functional/hash.hpp \
- /home/pblunsom/packages/include/boost/functional/hash/hash.hpp \
- /home/pblunsom/packages/include/boost/functional/hash/hash_fwd.hpp \
- /home/pblunsom/packages/include/boost/functional/hash/detail/hash_float.hpp \
- /home/pblunsom/packages/include/boost/functional/hash/detail/float_functions.hpp \
- /home/pblunsom/packages/include/boost/functional/hash/detail/limits.hpp \
- /home/pblunsom/packages/include/boost/integer/static_log2.hpp \
- /home/pblunsom/packages/include/boost/functional/hash/detail/hash_float_generic.hpp \
- /home/pblunsom/packages/include/boost/functional/hash/extensions.hpp \
- /home/pblunsom/packages/include/boost/detail/container_fwd.hpp \
- ../../../decoder/wordid.h gzstream.hh
-pyp-topics.o: pyp-topics.cc timing.h clock_gettime_stub.c pyp-topics.hh \
- /home/pblunsom/packages/include/boost/ptr_container/ptr_vector.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/ptr_sequence_adapter.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/reversible_ptr_container.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/throw_exception.hpp \
- /home/pblunsom/packages/include/boost/assert.hpp \
- /home/pblunsom/packages/include/boost/config.hpp \
- /home/pblunsom/packages/include/boost/config/user.hpp \
- /home/pblunsom/packages/include/boost/config/select_compiler_config.hpp \
- /home/pblunsom/packages/include/boost/config/compiler/gcc.hpp \
- /home/pblunsom/packages/include/boost/config/select_stdlib_config.hpp \
- /home/pblunsom/packages/include/boost/config/no_tr1/utility.hpp \
- /home/pblunsom/packages/include/boost/config/stdlib/libstdcpp3.hpp \
- /home/pblunsom/packages/include/boost/config/select_platform_config.hpp \
- /home/pblunsom/packages/include/boost/config/platform/linux.hpp \
- /home/pblunsom/packages/include/boost/config/posix_features.hpp \
- /home/pblunsom/packages/include/boost/config/suffix.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/scoped_deleter.hpp \
- /home/pblunsom/packages/include/boost/scoped_array.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/scoped_array.hpp \
- /home/pblunsom/packages/include/boost/checked_delete.hpp \
- /home/pblunsom/packages/include/boost/detail/workaround.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/operator_bool.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/static_move_ptr.hpp \
- /home/pblunsom/packages/include/boost/compressed_pair.hpp \
- /home/pblunsom/packages/include/boost/detail/compressed_pair.hpp \
- /home/pblunsom/packages/include/boost/type_traits/remove_cv.hpp \
- /home/pblunsom/packages/include/boost/type_traits/broken_compiler_spec.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/lambda_support.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/lambda.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/ttp.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/msvc.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/gcc.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/workaround.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/ctps.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/cv_traits_impl.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/type_trait_def.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/template_arity_spec.hpp \
- /home/pblunsom/packages/include/boost/mpl/int.hpp \
- /home/pblunsom/packages/include/boost/mpl/int_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/adl_barrier.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/adl.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/intel.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/nttp_decl.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/nttp.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/integral_wrapper.hpp \
- /home/pblunsom/packages/include/boost/mpl/integral_c_tag.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/static_constant.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/static_cast.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/cat.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/config/config.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/template_arity_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessor/params.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/preprocessor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comma_if.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/punctuation/comma_if.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/if.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/iif.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/bool.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/facilities/empty.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/punctuation/comma.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repeat.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/repeat.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/debug/error.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/detail/auto_rec.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple/eat.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/inc.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/inc.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/overload_resolution.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/type_trait_undef.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_empty.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_convertible.hpp \
- /home/pblunsom/packages/include/boost/type_traits/intrinsics.hpp \
- /home/pblunsom/packages/include/boost/type_traits/config.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_same.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/bool_trait_def.hpp \
- /home/pblunsom/packages/include/boost/type_traits/integral_constant.hpp \
- /home/pblunsom/packages/include/boost/mpl/bool.hpp \
- /home/pblunsom/packages/include/boost/mpl/bool_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/integral_c.hpp \
- /home/pblunsom/packages/include/boost/mpl/integral_c_fwd.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/bool_trait_undef.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_reference.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_volatile.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/yes_no_type.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_array.hpp \
- /home/pblunsom/packages/include/boost/type_traits/add_reference.hpp \
- /home/pblunsom/packages/include/boost/type_traits/ice.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/ice_or.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/ice_and.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/ice_not.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/ice_eq.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_arithmetic.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_integral.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_float.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_void.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_abstract.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_class.hpp \
- /home/pblunsom/packages/include/boost/call_traits.hpp \
- /home/pblunsom/packages/include/boost/detail/call_traits.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_pointer.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_member_pointer.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_member_function_pointer.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/is_mem_fun_pointer_impl.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/default_deleter.hpp \
- /home/pblunsom/packages/include/boost/mpl/if.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/value_wknd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/integral.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/eti.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/na_spec.hpp \
- /home/pblunsom/packages/include/boost/mpl/lambda_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/void_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/na.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/na_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/lambda_arity_param.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/arity.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/dtp.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessor/enum.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessor/def_params_tail.hpp \
- /home/pblunsom/packages/include/boost/mpl/limits/arity.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/and.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/bitand.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/identity.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/facilities/identity.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/empty.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/add.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/dec.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/while.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/fold_left.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/detail/fold_left.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/expr_iif.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/adt.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/detail/is_binary.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/detail/check.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/compl.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/fold_right.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/detail/fold_right.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/reverse.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/detail/while.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple/elem.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/sub.hpp \
- /home/pblunsom/packages/include/boost/type_traits/remove_bounds.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/is_convertible.hpp \
- /home/pblunsom/packages/include/boost/mpl/and.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/use_preprocessed.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/nested_type_wknd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/include_preprocessed.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/compiler.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/stringize.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/and.hpp \
- /home/pblunsom/packages/include/boost/mpl/identity.hpp \
- /home/pblunsom/packages/include/boost/utility/enable_if.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/move.hpp \
- /home/pblunsom/packages/include/boost/static_assert.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/exception.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/clone_allocator.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/nullable.hpp \
- /home/pblunsom/packages/include/boost/mpl/eval_if.hpp \
- /home/pblunsom/packages/include/boost/range/functions.hpp \
- /home/pblunsom/packages/include/boost/range/begin.hpp \
- /home/pblunsom/packages/include/boost/range/config.hpp \
- /home/pblunsom/packages/include/boost/range/iterator.hpp \
- /home/pblunsom/packages/include/boost/range/mutable_iterator.hpp \
- /home/pblunsom/packages/include/boost/range/detail/extract_optional_type.hpp \
- /home/pblunsom/packages/include/boost/iterator/iterator_traits.hpp \
- /home/pblunsom/packages/include/boost/detail/iterator.hpp \
- /home/pblunsom/packages/include/boost/range/const_iterator.hpp \
- /home/pblunsom/packages/include/boost/type_traits/remove_const.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_const.hpp \
- /home/pblunsom/packages/include/boost/range/end.hpp \
- /home/pblunsom/packages/include/boost/range/detail/implementation_help.hpp \
- /home/pblunsom/packages/include/boost/range/detail/common.hpp \
- /home/pblunsom/packages/include/boost/range/detail/sfinae.hpp \
- /home/pblunsom/packages/include/boost/range/size.hpp \
- /home/pblunsom/packages/include/boost/range/difference_type.hpp \
- /home/pblunsom/packages/include/boost/range/distance.hpp \
- /home/pblunsom/packages/include/boost/range/empty.hpp \
- /home/pblunsom/packages/include/boost/range/rbegin.hpp \
- /home/pblunsom/packages/include/boost/range/reverse_iterator.hpp \
- /home/pblunsom/packages/include/boost/iterator/reverse_iterator.hpp \
- /home/pblunsom/packages/include/boost/iterator.hpp \
- /home/pblunsom/packages/include/boost/utility.hpp \
- /home/pblunsom/packages/include/boost/utility/addressof.hpp \
- /home/pblunsom/packages/include/boost/utility/base_from_member.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_binary_params.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple/rem.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_params.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/repeat_from_to.hpp \
- /home/pblunsom/packages/include/boost/utility/binary.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/deduce_d.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/cat.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/fold_left.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/seq.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/elem.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/size.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/transform.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/mod.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/detail/div_base.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison/less_equal.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/not.hpp \
- /home/pblunsom/packages/include/boost/next_prior.hpp \
- /home/pblunsom/packages/include/boost/noncopyable.hpp \
- /home/pblunsom/packages/include/boost/iterator/iterator_adaptor.hpp \
- /home/pblunsom/packages/include/boost/iterator/iterator_categories.hpp \
- /home/pblunsom/packages/include/boost/iterator/detail/config_def.hpp \
- /home/pblunsom/packages/include/boost/mpl/placeholders.hpp \
- /home/pblunsom/packages/include/boost/mpl/arg.hpp \
- /home/pblunsom/packages/include/boost/mpl/arg_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/na_assert.hpp \
- /home/pblunsom/packages/include/boost/mpl/assert.hpp \
- /home/pblunsom/packages/include/boost/mpl/not.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/yes_no.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/arrays.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/pp_counter.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/arity_spec.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/arg_typedef.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/arg.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/placeholders.hpp \
- /home/pblunsom/packages/include/boost/iterator/detail/config_undef.hpp \
- /home/pblunsom/packages/include/boost/iterator/iterator_facade.hpp \
- /home/pblunsom/packages/include/boost/iterator/interoperable.hpp \
- /home/pblunsom/packages/include/boost/mpl/or.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/or.hpp \
- /home/pblunsom/packages/include/boost/iterator/detail/facade_iterator_category.hpp \
- /home/pblunsom/packages/include/boost/detail/indirect_traits.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_function.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/false_result.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/is_function_ptr_helper.hpp \
- /home/pblunsom/packages/include/boost/type_traits/remove_reference.hpp \
- /home/pblunsom/packages/include/boost/type_traits/remove_pointer.hpp \
- /home/pblunsom/packages/include/boost/iterator/detail/enable_if.hpp \
- /home/pblunsom/packages/include/boost/implicit_cast.hpp \
- /home/pblunsom/packages/include/boost/type_traits/add_const.hpp \
- /home/pblunsom/packages/include/boost/type_traits/add_pointer.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_pod.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_scalar.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_enum.hpp \
- /home/pblunsom/packages/include/boost/mpl/always.hpp \
- /home/pblunsom/packages/include/boost/mpl/apply.hpp \
- /home/pblunsom/packages/include/boost/mpl/apply_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/apply_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/apply_wrap.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/has_apply.hpp \
- /home/pblunsom/packages/include/boost/mpl/has_xxx.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/type_wrapper.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/has_xxx.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/msvc_typename.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/has_apply.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/msvc_never_true.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/apply_wrap.hpp \
- /home/pblunsom/packages/include/boost/mpl/lambda.hpp \
- /home/pblunsom/packages/include/boost/mpl/bind.hpp \
- /home/pblunsom/packages/include/boost/mpl/bind_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/bind.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/bind_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/next.hpp \
- /home/pblunsom/packages/include/boost/mpl/next_prior.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/common_name_wknd.hpp \
- /home/pblunsom/packages/include/boost/mpl/protect.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/bind.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/full_lambda.hpp \
- /home/pblunsom/packages/include/boost/mpl/quote.hpp \
- /home/pblunsom/packages/include/boost/mpl/void.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/has_type.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/bcc.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/quote.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/template_arity.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/template_arity.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/full_lambda.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/apply.hpp \
- /home/pblunsom/packages/include/boost/range/rend.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/indirect_fun.hpp \
- /home/pblunsom/packages/include/boost/utility/result_of.hpp \
- /home/pblunsom/packages/include/boost/type.hpp \
- /home/pblunsom/packages/include/boost/preprocessor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/library.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/div.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/mul.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/data.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/elem.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/size.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/insert.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/push_back.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison/not_equal.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/pop_back.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/deduce_z.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/pop_front.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/push_front.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/remove.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/replace.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/reverse.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple/reverse.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison/equal.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison/greater.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison/less.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison/greater_equal.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/config/limits.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/expr_if.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/debug.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/debug/assert.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/debug/line.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration/iterate.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/slot/slot.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/slot/detail/def.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/facilities.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/facilities/apply.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/detail/is_unary.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/facilities/expand.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/facilities/intercept.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration/local.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration/self.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/append.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/at.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/rest_n.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/cat.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/enum.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/for_each_i.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/for.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/detail/for.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/filter.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/first_n.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/for_each.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/for_each_product.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/to_tuple.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple/to_list.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/size.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/transform.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/bitnor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/bitor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/bitxor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/nor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/or.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/xor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/punctuation.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/punctuation/paren.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/punctuation/paren_if.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/deduce_r.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_params_with_a_default.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_params_with_defaults.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_shifted.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_shifted_binary_params.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_shifted_params.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_trailing.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_trailing_binary_params.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_trailing_params.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/selection.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/selection/max.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/selection/min.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/enum.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/filter.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/first_n.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/detail/split.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/fold_right.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/reverse.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/for_each.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/for_each_i.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/for_each_product.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/insert.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/rest_n.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/pop_back.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/pop_front.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/push_back.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/push_front.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/remove.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/replace.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/subseq.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/to_array.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/to_tuple.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/slot.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple/to_seq.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration/detail/iter/forward1.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration/detail/bounds/lower1.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/slot/detail/shared.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration/detail/bounds/upper1.hpp \
- /home/pblunsom/packages/include/boost/utility/detail/result_of_iterate.hpp \
- /home/pblunsom/packages/include/boost/pointee.hpp \
- /home/pblunsom/packages/include/boost/detail/is_incrementable.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/void_ptr_iterator.hpp \
- /home/pblunsom/packages/include/boost/random/uniform_real.hpp \
- /home/pblunsom/packages/include/boost/limits.hpp \
- /home/pblunsom/packages/include/boost/random/detail/config.hpp \
- /home/pblunsom/packages/include/boost/random/variate_generator.hpp \
- /home/pblunsom/packages/include/boost/random/uniform_01.hpp \
- /home/pblunsom/packages/include/boost/random/detail/pass_through_engine.hpp \
- /home/pblunsom/packages/include/boost/random/detail/ptr_helper.hpp \
- /home/pblunsom/packages/include/boost/random/detail/disable_warnings.hpp \
- /home/pblunsom/packages/include/boost/random/detail/enable_warnings.hpp \
- /home/pblunsom/packages/include/boost/random/detail/uniform_int_float.hpp \
- /home/pblunsom/packages/include/boost/random/mersenne_twister.hpp \
- /home/pblunsom/packages/include/boost/integer_traits.hpp \
- /home/pblunsom/packages/include/boost/cstdint.hpp \
- /home/pblunsom/packages/include/boost/random/linear_congruential.hpp \
- /home/pblunsom/packages/include/boost/random/detail/const_mod.hpp \
- /home/pblunsom/packages/include/boost/random/detail/seed.hpp pyp.hh \
- slice-sampler.h log_add.h mt19937ar.h corpus.hh \
- /home/pblunsom/packages/include/boost/shared_ptr.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/shared_ptr.hpp \
- /home/pblunsom/packages/include/boost/config/no_tr1/memory.hpp \
- /home/pblunsom/packages/include/boost/throw_exception.hpp \
- /home/pblunsom/packages/include/boost/exception/detail/attribute_noreturn.hpp \
- /home/pblunsom/packages/include/boost/exception/exception.hpp \
- /home/pblunsom/packages/include/boost/current_function.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/shared_count.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/bad_weak_ptr.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_counted_base.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_has_sync.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp \
- /home/pblunsom/packages/include/boost/detail/sp_typeinfo.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_counted_impl.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_convertible.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/spinlock_pool.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/spinlock.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/spinlock_sync.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/yield_k.hpp \
- /home/pblunsom/packages/include/boost/memory_order.hpp workers.hh \
- /home/pblunsom/packages/include/boost/bind.hpp \
- /home/pblunsom/packages/include/boost/bind/bind.hpp \
- /home/pblunsom/packages/include/boost/ref.hpp \
- /home/pblunsom/packages/include/boost/mem_fn.hpp \
- /home/pblunsom/packages/include/boost/bind/mem_fn.hpp \
- /home/pblunsom/packages/include/boost/get_pointer.hpp \
- /home/pblunsom/packages/include/boost/bind/mem_fn_template.hpp \
- /home/pblunsom/packages/include/boost/bind/mem_fn_cc.hpp \
- /home/pblunsom/packages/include/boost/is_placeholder.hpp \
- /home/pblunsom/packages/include/boost/bind/arg.hpp \
- /home/pblunsom/packages/include/boost/visit_each.hpp \
- /home/pblunsom/packages/include/boost/bind/storage.hpp \
- /home/pblunsom/packages/include/boost/bind/bind_template.hpp \
- /home/pblunsom/packages/include/boost/bind/bind_cc.hpp \
- /home/pblunsom/packages/include/boost/bind/bind_mf_cc.hpp \
- /home/pblunsom/packages/include/boost/bind/bind_mf2_cc.hpp \
- /home/pblunsom/packages/include/boost/bind/placeholders.hpp \
- /home/pblunsom/packages/include/boost/function.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iterate.hpp \
- /home/pblunsom/packages/include/boost/function/detail/prologue.hpp \
- /home/pblunsom/packages/include/boost/config/no_tr1/functional.hpp \
- /home/pblunsom/packages/include/boost/function/function_base.hpp \
- /home/pblunsom/packages/include/boost/integer.hpp \
- /home/pblunsom/packages/include/boost/integer_fwd.hpp \
- /home/pblunsom/packages/include/boost/type_traits/has_trivial_copy.hpp \
- /home/pblunsom/packages/include/boost/type_traits/has_trivial_destructor.hpp \
- /home/pblunsom/packages/include/boost/type_traits/composite_traits.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_union.hpp \
- /home/pblunsom/packages/include/boost/type_traits/alignment_of.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/size_t_trait_def.hpp \
- /home/pblunsom/packages/include/boost/mpl/size_t.hpp \
- /home/pblunsom/packages/include/boost/mpl/size_t_fwd.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/size_t_trait_undef.hpp \
- /home/pblunsom/packages/include/boost/function_equal.hpp \
- /home/pblunsom/packages/include/boost/function/function_fwd.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/enum.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/enum_params.hpp \
- /home/pblunsom/packages/include/boost/function/detail/function_iterate.hpp \
- /home/pblunsom/packages/include/boost/function/detail/maybe_include.hpp \
- /home/pblunsom/packages/include/boost/function/function_template.hpp \
- /home/pblunsom/packages/include/boost/detail/no_exceptions_support.hpp \
- /home/pblunsom/packages/include/boost/thread/thread.hpp \
- /home/pblunsom/packages/include/boost/thread/detail/platform.hpp \
- /home/pblunsom/packages/include/boost/config/requires_threads.hpp \
- /home/pblunsom/packages/include/boost/thread/pthread/thread_data.hpp \
- /home/pblunsom/packages/include/boost/thread/detail/config.hpp \
- /home/pblunsom/packages/include/boost/thread/detail/platform.hpp \
- /home/pblunsom/packages/include/boost/config/auto_link.hpp \
- /home/pblunsom/packages/include/boost/thread/exceptions.hpp \
- /home/pblunsom/packages/include/boost/config/abi_prefix.hpp \
- /home/pblunsom/packages/include/boost/config/abi_suffix.hpp \
- /home/pblunsom/packages/include/boost/enable_shared_from_this.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/enable_shared_from_this.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/weak_ptr.hpp \
- /home/pblunsom/packages/include/boost/thread/mutex.hpp \
- /home/pblunsom/packages/include/boost/thread/pthread/mutex.hpp \
- /home/pblunsom/packages/include/boost/thread/locks.hpp \
- /home/pblunsom/packages/include/boost/thread/detail/move.hpp \
- /home/pblunsom/packages/include/boost/thread/thread_time.hpp \
- /home/pblunsom/packages/include/boost/date_time/microsec_time_clock.hpp \
- /home/pblunsom/packages/include/boost/date_time/compiler_config.hpp \
- /home/pblunsom/packages/include/boost/date_time/locale_config.hpp \
- /home/pblunsom/packages/include/boost/date_time/c_time.hpp \
- /home/pblunsom/packages/include/boost/date_time/time_clock.hpp \
- /home/pblunsom/packages/include/boost/date_time/filetime_functions.hpp \
- /home/pblunsom/packages/include/boost/date_time/posix_time/posix_time_types.hpp \
- /home/pblunsom/packages/include/boost/date_time/posix_time/ptime.hpp \
- /home/pblunsom/packages/include/boost/date_time/posix_time/posix_time_system.hpp \
- /home/pblunsom/packages/include/boost/date_time/posix_time/posix_time_config.hpp \
- /home/pblunsom/packages/include/boost/config/no_tr1/cmath.hpp \
- /home/pblunsom/packages/include/boost/date_time/time_duration.hpp \
- /home/pblunsom/packages/include/boost/operators.hpp \
- /home/pblunsom/packages/include/boost/date_time/time_defs.hpp \
- /home/pblunsom/packages/include/boost/date_time/special_defs.hpp \
- /home/pblunsom/packages/include/boost/date_time/time_resolution_traits.hpp \
- /home/pblunsom/packages/include/boost/date_time/int_adapter.hpp \
- /home/pblunsom/packages/include/boost/date_time/gregorian/gregorian_types.hpp \
- /home/pblunsom/packages/include/boost/date_time/date.hpp \
- /home/pblunsom/packages/include/boost/date_time/year_month_day.hpp \
- /home/pblunsom/packages/include/boost/date_time/period.hpp \
- /home/pblunsom/packages/include/boost/date_time/gregorian/greg_calendar.hpp \
- /home/pblunsom/packages/include/boost/date_time/gregorian/greg_weekday.hpp \
- /home/pblunsom/packages/include/boost/date_time/constrained_value.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_base_of.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_base_and_derived.hpp \
- /home/pblunsom/packages/include/boost/date_time/date_defs.hpp \
- /home/pblunsom/packages/include/boost/date_time/gregorian/greg_day_of_year.hpp \
- /home/pblunsom/packages/include/boost/date_time/gregorian_calendar.hpp \
- /home/pblunsom/packages/include/boost/date_time/gregorian_calendar.ipp \
- /home/pblunsom/packages/include/boost/date_time/gregorian/greg_ymd.hpp \
- /home/pblunsom/packages/include/boost/date_time/gregorian/greg_day.hpp \
- /home/pblunsom/packages/include/boost/date_time/gregorian/greg_year.hpp \
- /home/pblunsom/packages/include/boost/date_time/gregorian/greg_month.hpp \
- /home/pblunsom/packages/include/boost/date_time/gregorian/greg_duration.hpp \
- /home/pblunsom/packages/include/boost/date_time/date_duration.hpp \
- /home/pblunsom/packages/include/boost/date_time/date_duration_types.hpp \
- /home/pblunsom/packages/include/boost/date_time/gregorian/greg_duration_types.hpp \
- /home/pblunsom/packages/include/boost/date_time/gregorian/greg_date.hpp \
- /home/pblunsom/packages/include/boost/date_time/adjust_functors.hpp \
- /home/pblunsom/packages/include/boost/date_time/wrapping_int.hpp \
- /home/pblunsom/packages/include/boost/date_time/date_generators.hpp \
- /home/pblunsom/packages/include/boost/date_time/date_clock_device.hpp \
- /home/pblunsom/packages/include/boost/date_time/date_iterator.hpp \
- /home/pblunsom/packages/include/boost/date_time/time_system_split.hpp \
- /home/pblunsom/packages/include/boost/date_time/time_system_counted.hpp \
- /home/pblunsom/packages/include/boost/date_time/time.hpp \
- /home/pblunsom/packages/include/boost/date_time/posix_time/date_duration_operators.hpp \
- /home/pblunsom/packages/include/boost/date_time/posix_time/posix_time_duration.hpp \
- /home/pblunsom/packages/include/boost/date_time/posix_time/time_period.hpp \
- /home/pblunsom/packages/include/boost/date_time/time_iterator.hpp \
- /home/pblunsom/packages/include/boost/date_time/dst_rules.hpp \
- /home/pblunsom/packages/include/boost/thread/xtime.hpp \
- /home/pblunsom/packages/include/boost/date_time/posix_time/conversion.hpp \
- /home/pblunsom/packages/include/boost/date_time/gregorian/conversion.hpp \
- /home/pblunsom/packages/include/boost/thread/pthread/timespec.hpp \
- /home/pblunsom/packages/include/boost/thread/pthread/pthread_mutex_scoped_lock.hpp \
- /home/pblunsom/packages/include/boost/optional.hpp \
- /home/pblunsom/packages/include/boost/optional/optional.hpp \
- /home/pblunsom/packages/include/boost/type_traits/type_with_alignment.hpp \
- /home/pblunsom/packages/include/boost/detail/reference_content.hpp \
- /home/pblunsom/packages/include/boost/type_traits/has_nothrow_copy.hpp \
- /home/pblunsom/packages/include/boost/none.hpp \
- /home/pblunsom/packages/include/boost/none_t.hpp \
- /home/pblunsom/packages/include/boost/utility/compare_pointees.hpp \
- /home/pblunsom/packages/include/boost/optional/optional_fwd.hpp \
- /home/pblunsom/packages/include/boost/thread/pthread/condition_variable_fwd.hpp \
- /home/pblunsom/packages/include/boost/thread/detail/thread.hpp \
- /home/pblunsom/packages/include/boost/thread/detail/thread_heap_alloc.hpp \
- /home/pblunsom/packages/include/boost/thread/pthread/thread_heap_alloc.hpp \
- /home/pblunsom/packages/include/boost/thread/detail/thread_interruption.hpp \
- /home/pblunsom/packages/include/boost/thread/detail/thread_group.hpp \
- /home/pblunsom/packages/include/boost/thread/shared_mutex.hpp \
- /home/pblunsom/packages/include/boost/thread/pthread/shared_mutex.hpp \
- /home/pblunsom/packages/include/boost/thread/condition_variable.hpp \
- /home/pblunsom/packages/include/boost/thread/pthread/condition_variable.hpp \
- /home/pblunsom/packages/include/boost/thread/pthread/thread_data.hpp \
- /home/pblunsom/packages/include/boost/thread/future.hpp \
- /home/pblunsom/packages/include/boost/exception_ptr.hpp \
- /home/pblunsom/packages/include/boost/exception/detail/exception_ptr.hpp \
- /home/pblunsom/packages/include/boost/scoped_ptr.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/scoped_ptr.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_fundamental.hpp \
- /home/pblunsom/packages/include/boost/thread/condition.hpp
-train-contexts.o: train-contexts.cc \
- /home/pblunsom/packages/include/boost/program_options/parsers.hpp \
- /home/pblunsom/packages/include/boost/program_options/config.hpp \
- /home/pblunsom/packages/include/boost/config.hpp \
- /home/pblunsom/packages/include/boost/config/user.hpp \
- /home/pblunsom/packages/include/boost/config/select_compiler_config.hpp \
- /home/pblunsom/packages/include/boost/config/compiler/gcc.hpp \
- /home/pblunsom/packages/include/boost/config/select_stdlib_config.hpp \
- /home/pblunsom/packages/include/boost/config/no_tr1/utility.hpp \
- /home/pblunsom/packages/include/boost/config/stdlib/libstdcpp3.hpp \
- /home/pblunsom/packages/include/boost/config/select_platform_config.hpp \
- /home/pblunsom/packages/include/boost/config/platform/linux.hpp \
- /home/pblunsom/packages/include/boost/config/posix_features.hpp \
- /home/pblunsom/packages/include/boost/config/suffix.hpp \
- /home/pblunsom/packages/include/boost/version.hpp \
- /home/pblunsom/packages/include/boost/config/auto_link.hpp \
- /home/pblunsom/packages/include/boost/program_options/option.hpp \
- /home/pblunsom/packages/include/boost/program_options/detail/cmdline.hpp \
- /home/pblunsom/packages/include/boost/program_options/errors.hpp \
- /home/pblunsom/packages/include/boost/program_options/cmdline.hpp \
- /home/pblunsom/packages/include/boost/program_options/options_description.hpp \
- /home/pblunsom/packages/include/boost/program_options/value_semantic.hpp \
- /home/pblunsom/packages/include/boost/any.hpp \
- /home/pblunsom/packages/include/boost/type_traits/remove_reference.hpp \
- /home/pblunsom/packages/include/boost/type_traits/broken_compiler_spec.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/lambda_support.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/lambda.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/ttp.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/msvc.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/gcc.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/workaround.hpp \
- /home/pblunsom/packages/include/boost/detail/workaround.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/ctps.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/type_trait_def.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/template_arity_spec.hpp \
- /home/pblunsom/packages/include/boost/mpl/int.hpp \
- /home/pblunsom/packages/include/boost/mpl/int_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/adl_barrier.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/adl.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/intel.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/nttp_decl.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/nttp.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/integral_wrapper.hpp \
- /home/pblunsom/packages/include/boost/mpl/integral_c_tag.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/static_constant.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/static_cast.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/cat.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/config/config.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/template_arity_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessor/params.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/preprocessor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comma_if.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/punctuation/comma_if.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/if.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/iif.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/bool.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/facilities/empty.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/punctuation/comma.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repeat.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/repeat.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/debug/error.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/detail/auto_rec.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple/eat.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/inc.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/inc.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/overload_resolution.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/type_trait_undef.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_reference.hpp \
- /home/pblunsom/packages/include/boost/type_traits/config.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/bool_trait_def.hpp \
- /home/pblunsom/packages/include/boost/type_traits/integral_constant.hpp \
- /home/pblunsom/packages/include/boost/mpl/bool.hpp \
- /home/pblunsom/packages/include/boost/mpl/bool_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/integral_c.hpp \
- /home/pblunsom/packages/include/boost/mpl/integral_c_fwd.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/bool_trait_undef.hpp \
- /home/pblunsom/packages/include/boost/throw_exception.hpp \
- /home/pblunsom/packages/include/boost/exception/detail/attribute_noreturn.hpp \
- /home/pblunsom/packages/include/boost/exception/exception.hpp \
- /home/pblunsom/packages/include/boost/current_function.hpp \
- /home/pblunsom/packages/include/boost/static_assert.hpp \
- /home/pblunsom/packages/include/boost/function/function1.hpp \
- /home/pblunsom/packages/include/boost/function/detail/maybe_include.hpp \
- /home/pblunsom/packages/include/boost/function/function_template.hpp \
- /home/pblunsom/packages/include/boost/function/detail/prologue.hpp \
- /home/pblunsom/packages/include/boost/config/no_tr1/functional.hpp \
- /home/pblunsom/packages/include/boost/function/function_base.hpp \
- /home/pblunsom/packages/include/boost/detail/sp_typeinfo.hpp \
- /home/pblunsom/packages/include/boost/assert.hpp \
- /home/pblunsom/packages/include/boost/integer.hpp \
- /home/pblunsom/packages/include/boost/integer_fwd.hpp \
- /home/pblunsom/packages/include/boost/limits.hpp \
- /home/pblunsom/packages/include/boost/cstdint.hpp \
- /home/pblunsom/packages/include/boost/integer_traits.hpp \
- /home/pblunsom/packages/include/boost/type_traits/has_trivial_copy.hpp \
- /home/pblunsom/packages/include/boost/type_traits/intrinsics.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_same.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_volatile.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/cv_traits_impl.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_pod.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_void.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_scalar.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_arithmetic.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_integral.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_float.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/ice_or.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_enum.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_pointer.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_member_pointer.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_member_function_pointer.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/is_mem_fun_pointer_impl.hpp \
- /home/pblunsom/packages/include/boost/type_traits/remove_cv.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/ice_and.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/ice_not.hpp \
- /home/pblunsom/packages/include/boost/type_traits/has_trivial_destructor.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_const.hpp \
- /home/pblunsom/packages/include/boost/type_traits/composite_traits.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_array.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_union.hpp \
- /home/pblunsom/packages/include/boost/type_traits/ice.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/yes_no_type.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/ice_eq.hpp \
- /home/pblunsom/packages/include/boost/ref.hpp \
- /home/pblunsom/packages/include/boost/utility/addressof.hpp \
- /home/pblunsom/packages/include/boost/mpl/if.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/value_wknd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/integral.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/eti.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/na_spec.hpp \
- /home/pblunsom/packages/include/boost/mpl/lambda_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/void_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/na.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/na_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/lambda_arity_param.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/arity.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/dtp.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessor/enum.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessor/def_params_tail.hpp \
- /home/pblunsom/packages/include/boost/mpl/limits/arity.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/and.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/bitand.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/identity.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/facilities/identity.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/empty.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/add.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/dec.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/while.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/fold_left.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/detail/fold_left.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/expr_iif.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/adt.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/detail/is_binary.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/detail/check.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/compl.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/fold_right.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/detail/fold_right.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/reverse.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/detail/while.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple/elem.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/sub.hpp \
- /home/pblunsom/packages/include/boost/type_traits/alignment_of.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/size_t_trait_def.hpp \
- /home/pblunsom/packages/include/boost/mpl/size_t.hpp \
- /home/pblunsom/packages/include/boost/mpl/size_t_fwd.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/size_t_trait_undef.hpp \
- /home/pblunsom/packages/include/boost/utility/enable_if.hpp \
- /home/pblunsom/packages/include/boost/function_equal.hpp \
- /home/pblunsom/packages/include/boost/function/function_fwd.hpp \
- /home/pblunsom/packages/include/boost/mem_fn.hpp \
- /home/pblunsom/packages/include/boost/bind/mem_fn.hpp \
- /home/pblunsom/packages/include/boost/get_pointer.hpp \
- /home/pblunsom/packages/include/boost/config/no_tr1/memory.hpp \
- /home/pblunsom/packages/include/boost/bind/mem_fn_template.hpp \
- /home/pblunsom/packages/include/boost/bind/mem_fn_cc.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/enum.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple/rem.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/enum_params.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_params.hpp \
- /home/pblunsom/packages/include/boost/detail/no_exceptions_support.hpp \
- /home/pblunsom/packages/include/boost/lexical_cast.hpp \
- /home/pblunsom/packages/include/boost/type_traits/make_unsigned.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_signed.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_unsigned.hpp \
- /home/pblunsom/packages/include/boost/type_traits/add_const.hpp \
- /home/pblunsom/packages/include/boost/type_traits/add_volatile.hpp \
- /home/pblunsom/packages/include/boost/call_traits.hpp \
- /home/pblunsom/packages/include/boost/detail/call_traits.hpp \
- /home/pblunsom/packages/include/boost/detail/lcast_precision.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_abstract.hpp \
- /home/pblunsom/packages/include/boost/program_options/detail/value_semantic.hpp \
- /home/pblunsom/packages/include/boost/function.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iterate.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration/iterate.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/elem.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/data.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/size.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/slot/slot.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/slot/detail/def.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration/detail/iter/forward1.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration/detail/bounds/lower1.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/slot/detail/shared.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration/detail/bounds/upper1.hpp \
- /home/pblunsom/packages/include/boost/function/detail/function_iterate.hpp \
- /home/pblunsom/packages/include/boost/shared_ptr.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/shared_ptr.hpp \
- /home/pblunsom/packages/include/boost/checked_delete.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/shared_count.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/bad_weak_ptr.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_counted_base.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_has_sync.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_counted_impl.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_convertible.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/spinlock_pool.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/spinlock.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/spinlock_sync.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/yield_k.hpp \
- /home/pblunsom/packages/include/boost/memory_order.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/operator_bool.hpp \
- /home/pblunsom/packages/include/boost/program_options/positional_options.hpp \
- /home/pblunsom/packages/include/boost/program_options/detail/parsers.hpp \
- /home/pblunsom/packages/include/boost/program_options/detail/convert.hpp \
- /home/pblunsom/packages/include/boost/program_options/variables_map.hpp \
- /home/pblunsom/packages/include/boost/scoped_ptr.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/scoped_ptr.hpp \
- pyp-topics.hh \
- /home/pblunsom/packages/include/boost/ptr_container/ptr_vector.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/ptr_sequence_adapter.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/reversible_ptr_container.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/throw_exception.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/scoped_deleter.hpp \
- /home/pblunsom/packages/include/boost/scoped_array.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/scoped_array.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/static_move_ptr.hpp \
- /home/pblunsom/packages/include/boost/compressed_pair.hpp \
- /home/pblunsom/packages/include/boost/detail/compressed_pair.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_empty.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_convertible.hpp \
- /home/pblunsom/packages/include/boost/type_traits/add_reference.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_class.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/default_deleter.hpp \
- /home/pblunsom/packages/include/boost/type_traits/remove_bounds.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/is_convertible.hpp \
- /home/pblunsom/packages/include/boost/mpl/and.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/use_preprocessed.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/nested_type_wknd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/include_preprocessed.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/compiler.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/stringize.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/and.hpp \
- /home/pblunsom/packages/include/boost/mpl/identity.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/move.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/exception.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/clone_allocator.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/nullable.hpp \
- /home/pblunsom/packages/include/boost/mpl/eval_if.hpp \
- /home/pblunsom/packages/include/boost/range/functions.hpp \
- /home/pblunsom/packages/include/boost/range/begin.hpp \
- /home/pblunsom/packages/include/boost/range/config.hpp \
- /home/pblunsom/packages/include/boost/range/iterator.hpp \
- /home/pblunsom/packages/include/boost/range/mutable_iterator.hpp \
- /home/pblunsom/packages/include/boost/range/detail/extract_optional_type.hpp \
- /home/pblunsom/packages/include/boost/iterator/iterator_traits.hpp \
- /home/pblunsom/packages/include/boost/detail/iterator.hpp \
- /home/pblunsom/packages/include/boost/range/const_iterator.hpp \
- /home/pblunsom/packages/include/boost/type_traits/remove_const.hpp \
- /home/pblunsom/packages/include/boost/range/end.hpp \
- /home/pblunsom/packages/include/boost/range/detail/implementation_help.hpp \
- /home/pblunsom/packages/include/boost/range/detail/common.hpp \
- /home/pblunsom/packages/include/boost/range/detail/sfinae.hpp \
- /home/pblunsom/packages/include/boost/range/size.hpp \
- /home/pblunsom/packages/include/boost/range/difference_type.hpp \
- /home/pblunsom/packages/include/boost/range/distance.hpp \
- /home/pblunsom/packages/include/boost/range/empty.hpp \
- /home/pblunsom/packages/include/boost/range/rbegin.hpp \
- /home/pblunsom/packages/include/boost/range/reverse_iterator.hpp \
- /home/pblunsom/packages/include/boost/iterator/reverse_iterator.hpp \
- /home/pblunsom/packages/include/boost/iterator.hpp \
- /home/pblunsom/packages/include/boost/utility.hpp \
- /home/pblunsom/packages/include/boost/utility/base_from_member.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_binary_params.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/repeat_from_to.hpp \
- /home/pblunsom/packages/include/boost/utility/binary.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/deduce_d.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/cat.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/fold_left.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/seq.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/elem.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/size.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/transform.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/mod.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/detail/div_base.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison/less_equal.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/not.hpp \
- /home/pblunsom/packages/include/boost/next_prior.hpp \
- /home/pblunsom/packages/include/boost/noncopyable.hpp \
- /home/pblunsom/packages/include/boost/iterator/iterator_adaptor.hpp \
- /home/pblunsom/packages/include/boost/iterator/iterator_categories.hpp \
- /home/pblunsom/packages/include/boost/iterator/detail/config_def.hpp \
- /home/pblunsom/packages/include/boost/mpl/placeholders.hpp \
- /home/pblunsom/packages/include/boost/mpl/arg.hpp \
- /home/pblunsom/packages/include/boost/mpl/arg_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/na_assert.hpp \
- /home/pblunsom/packages/include/boost/mpl/assert.hpp \
- /home/pblunsom/packages/include/boost/mpl/not.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/yes_no.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/arrays.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/pp_counter.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/arity_spec.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/arg_typedef.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/arg.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/placeholders.hpp \
- /home/pblunsom/packages/include/boost/iterator/detail/config_undef.hpp \
- /home/pblunsom/packages/include/boost/iterator/iterator_facade.hpp \
- /home/pblunsom/packages/include/boost/iterator/interoperable.hpp \
- /home/pblunsom/packages/include/boost/mpl/or.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/or.hpp \
- /home/pblunsom/packages/include/boost/iterator/detail/facade_iterator_category.hpp \
- /home/pblunsom/packages/include/boost/detail/indirect_traits.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_function.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/false_result.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/is_function_ptr_helper.hpp \
- /home/pblunsom/packages/include/boost/type_traits/remove_pointer.hpp \
- /home/pblunsom/packages/include/boost/iterator/detail/enable_if.hpp \
- /home/pblunsom/packages/include/boost/implicit_cast.hpp \
- /home/pblunsom/packages/include/boost/type_traits/add_pointer.hpp \
- /home/pblunsom/packages/include/boost/mpl/always.hpp \
- /home/pblunsom/packages/include/boost/mpl/apply.hpp \
- /home/pblunsom/packages/include/boost/mpl/apply_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/apply_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/apply_wrap.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/has_apply.hpp \
- /home/pblunsom/packages/include/boost/mpl/has_xxx.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/type_wrapper.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/has_xxx.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/msvc_typename.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/has_apply.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/msvc_never_true.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/apply_wrap.hpp \
- /home/pblunsom/packages/include/boost/mpl/lambda.hpp \
- /home/pblunsom/packages/include/boost/mpl/bind.hpp \
- /home/pblunsom/packages/include/boost/mpl/bind_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/bind.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/bind_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/next.hpp \
- /home/pblunsom/packages/include/boost/mpl/next_prior.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/common_name_wknd.hpp \
- /home/pblunsom/packages/include/boost/mpl/protect.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/bind.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/full_lambda.hpp \
- /home/pblunsom/packages/include/boost/mpl/quote.hpp \
- /home/pblunsom/packages/include/boost/mpl/void.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/has_type.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/bcc.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/quote.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/template_arity.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/template_arity.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/full_lambda.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/apply.hpp \
- /home/pblunsom/packages/include/boost/range/rend.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/indirect_fun.hpp \
- /home/pblunsom/packages/include/boost/utility/result_of.hpp \
- /home/pblunsom/packages/include/boost/type.hpp \
- /home/pblunsom/packages/include/boost/preprocessor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/library.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/div.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/mul.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/insert.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/push_back.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison/not_equal.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/pop_back.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/deduce_z.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/pop_front.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/push_front.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/remove.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/replace.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/reverse.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple/reverse.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison/equal.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison/greater.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison/less.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison/greater_equal.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/config/limits.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/expr_if.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/debug.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/debug/assert.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/debug/line.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/facilities.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/facilities/apply.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/detail/is_unary.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/facilities/expand.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/facilities/intercept.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration/local.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration/self.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/append.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/at.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/rest_n.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/cat.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/enum.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/for_each_i.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/for.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/detail/for.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/filter.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/first_n.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/for_each.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/for_each_product.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/to_tuple.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple/to_list.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/size.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/transform.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/bitnor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/bitor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/bitxor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/nor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/or.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/xor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/punctuation.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/punctuation/paren.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/punctuation/paren_if.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/deduce_r.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_params_with_a_default.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_params_with_defaults.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_shifted.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_shifted_binary_params.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_shifted_params.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_trailing.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_trailing_binary_params.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_trailing_params.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/selection.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/selection/max.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/selection/min.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/enum.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/filter.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/first_n.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/detail/split.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/fold_right.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/reverse.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/for_each.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/for_each_i.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/for_each_product.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/insert.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/rest_n.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/pop_back.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/pop_front.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/push_back.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/push_front.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/remove.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/replace.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/subseq.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/to_array.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/to_tuple.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/slot.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple/to_seq.hpp \
- /home/pblunsom/packages/include/boost/utility/detail/result_of_iterate.hpp \
- /home/pblunsom/packages/include/boost/pointee.hpp \
- /home/pblunsom/packages/include/boost/detail/is_incrementable.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/void_ptr_iterator.hpp \
- /home/pblunsom/packages/include/boost/random/uniform_real.hpp \
- /home/pblunsom/packages/include/boost/random/detail/config.hpp \
- /home/pblunsom/packages/include/boost/random/variate_generator.hpp \
- /home/pblunsom/packages/include/boost/random/uniform_01.hpp \
- /home/pblunsom/packages/include/boost/random/detail/pass_through_engine.hpp \
- /home/pblunsom/packages/include/boost/random/detail/ptr_helper.hpp \
- /home/pblunsom/packages/include/boost/random/detail/disable_warnings.hpp \
- /home/pblunsom/packages/include/boost/random/detail/enable_warnings.hpp \
- /home/pblunsom/packages/include/boost/random/detail/uniform_int_float.hpp \
- /home/pblunsom/packages/include/boost/random/mersenne_twister.hpp \
- /home/pblunsom/packages/include/boost/random/linear_congruential.hpp \
- /home/pblunsom/packages/include/boost/random/detail/const_mod.hpp \
- /home/pblunsom/packages/include/boost/random/detail/seed.hpp pyp.hh \
- slice-sampler.h log_add.h mt19937ar.h corpus.hh workers.hh \
- /home/pblunsom/packages/include/boost/bind.hpp \
- /home/pblunsom/packages/include/boost/bind/bind.hpp \
- /home/pblunsom/packages/include/boost/is_placeholder.hpp \
- /home/pblunsom/packages/include/boost/bind/arg.hpp \
- /home/pblunsom/packages/include/boost/visit_each.hpp \
- /home/pblunsom/packages/include/boost/bind/storage.hpp \
- /home/pblunsom/packages/include/boost/bind/bind_template.hpp \
- /home/pblunsom/packages/include/boost/bind/bind_cc.hpp \
- /home/pblunsom/packages/include/boost/bind/bind_mf_cc.hpp \
- /home/pblunsom/packages/include/boost/bind/bind_mf2_cc.hpp \
- /home/pblunsom/packages/include/boost/bind/placeholders.hpp \
- /home/pblunsom/packages/include/boost/thread/thread.hpp \
- /home/pblunsom/packages/include/boost/thread/detail/platform.hpp \
- /home/pblunsom/packages/include/boost/config/requires_threads.hpp \
- /home/pblunsom/packages/include/boost/thread/pthread/thread_data.hpp \
- /home/pblunsom/packages/include/boost/thread/detail/config.hpp \
- /home/pblunsom/packages/include/boost/thread/detail/platform.hpp \
- /home/pblunsom/packages/include/boost/thread/exceptions.hpp \
- /home/pblunsom/packages/include/boost/config/abi_prefix.hpp \
- /home/pblunsom/packages/include/boost/config/abi_suffix.hpp \
- /home/pblunsom/packages/include/boost/enable_shared_from_this.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/enable_shared_from_this.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/weak_ptr.hpp \
- /home/pblunsom/packages/include/boost/thread/mutex.hpp \
- /home/pblunsom/packages/include/boost/thread/pthread/mutex.hpp \
- /home/pblunsom/packages/include/boost/thread/locks.hpp \
- /home/pblunsom/packages/include/boost/thread/detail/move.hpp \
- /home/pblunsom/packages/include/boost/thread/thread_time.hpp \
- /home/pblunsom/packages/include/boost/date_time/microsec_time_clock.hpp \
- /home/pblunsom/packages/include/boost/date_time/compiler_config.hpp \
- /home/pblunsom/packages/include/boost/date_time/locale_config.hpp \
- /home/pblunsom/packages/include/boost/date_time/c_time.hpp \
- /home/pblunsom/packages/include/boost/date_time/time_clock.hpp \
- /home/pblunsom/packages/include/boost/date_time/filetime_functions.hpp \
- /home/pblunsom/packages/include/boost/date_time/posix_time/posix_time_types.hpp \
- /home/pblunsom/packages/include/boost/date_time/posix_time/ptime.hpp \
- /home/pblunsom/packages/include/boost/date_time/posix_time/posix_time_system.hpp \
- /home/pblunsom/packages/include/boost/date_time/posix_time/posix_time_config.hpp \
- /home/pblunsom/packages/include/boost/config/no_tr1/cmath.hpp \
- /home/pblunsom/packages/include/boost/date_time/time_duration.hpp \
- /home/pblunsom/packages/include/boost/operators.hpp \
- /home/pblunsom/packages/include/boost/date_time/time_defs.hpp \
- /home/pblunsom/packages/include/boost/date_time/special_defs.hpp \
- /home/pblunsom/packages/include/boost/date_time/time_resolution_traits.hpp \
- /home/pblunsom/packages/include/boost/date_time/int_adapter.hpp \
- /home/pblunsom/packages/include/boost/date_time/gregorian/gregorian_types.hpp \
- /home/pblunsom/packages/include/boost/date_time/date.hpp \
- /home/pblunsom/packages/include/boost/date_time/year_month_day.hpp \
- /home/pblunsom/packages/include/boost/date_time/period.hpp \
- /home/pblunsom/packages/include/boost/date_time/gregorian/greg_calendar.hpp \
- /home/pblunsom/packages/include/boost/date_time/gregorian/greg_weekday.hpp \
- /home/pblunsom/packages/include/boost/date_time/constrained_value.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_base_of.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_base_and_derived.hpp \
- /home/pblunsom/packages/include/boost/date_time/date_defs.hpp \
- /home/pblunsom/packages/include/boost/date_time/gregorian/greg_day_of_year.hpp \
- /home/pblunsom/packages/include/boost/date_time/gregorian_calendar.hpp \
- /home/pblunsom/packages/include/boost/date_time/gregorian_calendar.ipp \
- /home/pblunsom/packages/include/boost/date_time/gregorian/greg_ymd.hpp \
- /home/pblunsom/packages/include/boost/date_time/gregorian/greg_day.hpp \
- /home/pblunsom/packages/include/boost/date_time/gregorian/greg_year.hpp \
- /home/pblunsom/packages/include/boost/date_time/gregorian/greg_month.hpp \
- /home/pblunsom/packages/include/boost/date_time/gregorian/greg_duration.hpp \
- /home/pblunsom/packages/include/boost/date_time/date_duration.hpp \
- /home/pblunsom/packages/include/boost/date_time/date_duration_types.hpp \
- /home/pblunsom/packages/include/boost/date_time/gregorian/greg_duration_types.hpp \
- /home/pblunsom/packages/include/boost/date_time/gregorian/greg_date.hpp \
- /home/pblunsom/packages/include/boost/date_time/adjust_functors.hpp \
- /home/pblunsom/packages/include/boost/date_time/wrapping_int.hpp \
- /home/pblunsom/packages/include/boost/date_time/date_generators.hpp \
- /home/pblunsom/packages/include/boost/date_time/date_clock_device.hpp \
- /home/pblunsom/packages/include/boost/date_time/date_iterator.hpp \
- /home/pblunsom/packages/include/boost/date_time/time_system_split.hpp \
- /home/pblunsom/packages/include/boost/date_time/time_system_counted.hpp \
- /home/pblunsom/packages/include/boost/date_time/time.hpp \
- /home/pblunsom/packages/include/boost/date_time/posix_time/date_duration_operators.hpp \
- /home/pblunsom/packages/include/boost/date_time/posix_time/posix_time_duration.hpp \
- /home/pblunsom/packages/include/boost/date_time/posix_time/time_period.hpp \
- /home/pblunsom/packages/include/boost/date_time/time_iterator.hpp \
- /home/pblunsom/packages/include/boost/date_time/dst_rules.hpp \
- /home/pblunsom/packages/include/boost/thread/xtime.hpp \
- /home/pblunsom/packages/include/boost/date_time/posix_time/conversion.hpp \
- /home/pblunsom/packages/include/boost/date_time/gregorian/conversion.hpp \
- /home/pblunsom/packages/include/boost/thread/pthread/timespec.hpp \
- /home/pblunsom/packages/include/boost/thread/pthread/pthread_mutex_scoped_lock.hpp \
- /home/pblunsom/packages/include/boost/optional.hpp \
- /home/pblunsom/packages/include/boost/optional/optional.hpp \
- /home/pblunsom/packages/include/boost/type_traits/type_with_alignment.hpp \
- /home/pblunsom/packages/include/boost/detail/reference_content.hpp \
- /home/pblunsom/packages/include/boost/type_traits/has_nothrow_copy.hpp \
- /home/pblunsom/packages/include/boost/none.hpp \
- /home/pblunsom/packages/include/boost/none_t.hpp \
- /home/pblunsom/packages/include/boost/utility/compare_pointees.hpp \
- /home/pblunsom/packages/include/boost/optional/optional_fwd.hpp \
- /home/pblunsom/packages/include/boost/thread/pthread/condition_variable_fwd.hpp \
- /home/pblunsom/packages/include/boost/thread/detail/thread.hpp \
- /home/pblunsom/packages/include/boost/thread/detail/thread_heap_alloc.hpp \
- /home/pblunsom/packages/include/boost/thread/pthread/thread_heap_alloc.hpp \
- /home/pblunsom/packages/include/boost/thread/detail/thread_interruption.hpp \
- /home/pblunsom/packages/include/boost/thread/detail/thread_group.hpp \
- /home/pblunsom/packages/include/boost/thread/shared_mutex.hpp \
- /home/pblunsom/packages/include/boost/thread/pthread/shared_mutex.hpp \
- /home/pblunsom/packages/include/boost/thread/condition_variable.hpp \
- /home/pblunsom/packages/include/boost/thread/pthread/condition_variable.hpp \
- /home/pblunsom/packages/include/boost/thread/pthread/thread_data.hpp \
- /home/pblunsom/packages/include/boost/thread/future.hpp \
- /home/pblunsom/packages/include/boost/exception_ptr.hpp \
- /home/pblunsom/packages/include/boost/exception/detail/exception_ptr.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_fundamental.hpp \
- /home/pblunsom/packages/include/boost/thread/condition.hpp timing.h \
- clock_gettime_stub.c contexts_corpus.hh contexts_lexer.h \
- ../../../decoder/dict.h \
- /home/pblunsom/packages/include/boost/functional/hash.hpp \
- /home/pblunsom/packages/include/boost/functional/hash/hash.hpp \
- /home/pblunsom/packages/include/boost/functional/hash/hash_fwd.hpp \
- /home/pblunsom/packages/include/boost/functional/hash/detail/hash_float.hpp \
- /home/pblunsom/packages/include/boost/functional/hash/detail/float_functions.hpp \
- /home/pblunsom/packages/include/boost/functional/hash/detail/limits.hpp \
- /home/pblunsom/packages/include/boost/integer/static_log2.hpp \
- /home/pblunsom/packages/include/boost/functional/hash/detail/hash_float_generic.hpp \
- /home/pblunsom/packages/include/boost/functional/hash/extensions.hpp \
- /home/pblunsom/packages/include/boost/detail/container_fwd.hpp \
- ../../../decoder/wordid.h gzstream.hh
-train.o: train.cc \
- /home/pblunsom/packages/include/boost/program_options/parsers.hpp \
- /home/pblunsom/packages/include/boost/program_options/config.hpp \
- /home/pblunsom/packages/include/boost/config.hpp \
- /home/pblunsom/packages/include/boost/config/user.hpp \
- /home/pblunsom/packages/include/boost/config/select_compiler_config.hpp \
- /home/pblunsom/packages/include/boost/config/compiler/gcc.hpp \
- /home/pblunsom/packages/include/boost/config/select_stdlib_config.hpp \
- /home/pblunsom/packages/include/boost/config/no_tr1/utility.hpp \
- /home/pblunsom/packages/include/boost/config/stdlib/libstdcpp3.hpp \
- /home/pblunsom/packages/include/boost/config/select_platform_config.hpp \
- /home/pblunsom/packages/include/boost/config/platform/linux.hpp \
- /home/pblunsom/packages/include/boost/config/posix_features.hpp \
- /home/pblunsom/packages/include/boost/config/suffix.hpp \
- /home/pblunsom/packages/include/boost/version.hpp \
- /home/pblunsom/packages/include/boost/config/auto_link.hpp \
- /home/pblunsom/packages/include/boost/program_options/option.hpp \
- /home/pblunsom/packages/include/boost/program_options/detail/cmdline.hpp \
- /home/pblunsom/packages/include/boost/program_options/errors.hpp \
- /home/pblunsom/packages/include/boost/program_options/cmdline.hpp \
- /home/pblunsom/packages/include/boost/program_options/options_description.hpp \
- /home/pblunsom/packages/include/boost/program_options/value_semantic.hpp \
- /home/pblunsom/packages/include/boost/any.hpp \
- /home/pblunsom/packages/include/boost/type_traits/remove_reference.hpp \
- /home/pblunsom/packages/include/boost/type_traits/broken_compiler_spec.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/lambda_support.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/lambda.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/ttp.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/msvc.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/gcc.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/workaround.hpp \
- /home/pblunsom/packages/include/boost/detail/workaround.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/ctps.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/type_trait_def.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/template_arity_spec.hpp \
- /home/pblunsom/packages/include/boost/mpl/int.hpp \
- /home/pblunsom/packages/include/boost/mpl/int_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/adl_barrier.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/adl.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/intel.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/nttp_decl.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/nttp.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/integral_wrapper.hpp \
- /home/pblunsom/packages/include/boost/mpl/integral_c_tag.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/static_constant.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/static_cast.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/cat.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/config/config.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/template_arity_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessor/params.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/preprocessor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comma_if.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/punctuation/comma_if.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/if.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/iif.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/bool.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/facilities/empty.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/punctuation/comma.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repeat.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/repeat.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/debug/error.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/detail/auto_rec.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple/eat.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/inc.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/inc.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/overload_resolution.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/type_trait_undef.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_reference.hpp \
- /home/pblunsom/packages/include/boost/type_traits/config.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/bool_trait_def.hpp \
- /home/pblunsom/packages/include/boost/type_traits/integral_constant.hpp \
- /home/pblunsom/packages/include/boost/mpl/bool.hpp \
- /home/pblunsom/packages/include/boost/mpl/bool_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/integral_c.hpp \
- /home/pblunsom/packages/include/boost/mpl/integral_c_fwd.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/bool_trait_undef.hpp \
- /home/pblunsom/packages/include/boost/throw_exception.hpp \
- /home/pblunsom/packages/include/boost/exception/detail/attribute_noreturn.hpp \
- /home/pblunsom/packages/include/boost/exception/exception.hpp \
- /home/pblunsom/packages/include/boost/current_function.hpp \
- /home/pblunsom/packages/include/boost/static_assert.hpp \
- /home/pblunsom/packages/include/boost/function/function1.hpp \
- /home/pblunsom/packages/include/boost/function/detail/maybe_include.hpp \
- /home/pblunsom/packages/include/boost/function/function_template.hpp \
- /home/pblunsom/packages/include/boost/function/detail/prologue.hpp \
- /home/pblunsom/packages/include/boost/config/no_tr1/functional.hpp \
- /home/pblunsom/packages/include/boost/function/function_base.hpp \
- /home/pblunsom/packages/include/boost/detail/sp_typeinfo.hpp \
- /home/pblunsom/packages/include/boost/assert.hpp \
- /home/pblunsom/packages/include/boost/integer.hpp \
- /home/pblunsom/packages/include/boost/integer_fwd.hpp \
- /home/pblunsom/packages/include/boost/limits.hpp \
- /home/pblunsom/packages/include/boost/cstdint.hpp \
- /home/pblunsom/packages/include/boost/integer_traits.hpp \
- /home/pblunsom/packages/include/boost/type_traits/has_trivial_copy.hpp \
- /home/pblunsom/packages/include/boost/type_traits/intrinsics.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_same.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_volatile.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/cv_traits_impl.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_pod.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_void.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_scalar.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_arithmetic.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_integral.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_float.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/ice_or.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_enum.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_pointer.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_member_pointer.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_member_function_pointer.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/is_mem_fun_pointer_impl.hpp \
- /home/pblunsom/packages/include/boost/type_traits/remove_cv.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/ice_and.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/ice_not.hpp \
- /home/pblunsom/packages/include/boost/type_traits/has_trivial_destructor.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_const.hpp \
- /home/pblunsom/packages/include/boost/type_traits/composite_traits.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_array.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_union.hpp \
- /home/pblunsom/packages/include/boost/type_traits/ice.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/yes_no_type.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/ice_eq.hpp \
- /home/pblunsom/packages/include/boost/ref.hpp \
- /home/pblunsom/packages/include/boost/utility/addressof.hpp \
- /home/pblunsom/packages/include/boost/mpl/if.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/value_wknd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/integral.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/eti.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/na_spec.hpp \
- /home/pblunsom/packages/include/boost/mpl/lambda_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/void_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/na.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/na_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/lambda_arity_param.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/arity.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/dtp.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessor/enum.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessor/def_params_tail.hpp \
- /home/pblunsom/packages/include/boost/mpl/limits/arity.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/and.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/bitand.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/identity.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/facilities/identity.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/empty.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/add.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/dec.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/while.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/fold_left.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/detail/fold_left.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/expr_iif.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/adt.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/detail/is_binary.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/detail/check.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/compl.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/fold_right.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/detail/fold_right.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/reverse.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/detail/while.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple/elem.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/sub.hpp \
- /home/pblunsom/packages/include/boost/type_traits/alignment_of.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/size_t_trait_def.hpp \
- /home/pblunsom/packages/include/boost/mpl/size_t.hpp \
- /home/pblunsom/packages/include/boost/mpl/size_t_fwd.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/size_t_trait_undef.hpp \
- /home/pblunsom/packages/include/boost/utility/enable_if.hpp \
- /home/pblunsom/packages/include/boost/function_equal.hpp \
- /home/pblunsom/packages/include/boost/function/function_fwd.hpp \
- /home/pblunsom/packages/include/boost/mem_fn.hpp \
- /home/pblunsom/packages/include/boost/bind/mem_fn.hpp \
- /home/pblunsom/packages/include/boost/get_pointer.hpp \
- /home/pblunsom/packages/include/boost/config/no_tr1/memory.hpp \
- /home/pblunsom/packages/include/boost/bind/mem_fn_template.hpp \
- /home/pblunsom/packages/include/boost/bind/mem_fn_cc.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/enum.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple/rem.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/enum_params.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_params.hpp \
- /home/pblunsom/packages/include/boost/detail/no_exceptions_support.hpp \
- /home/pblunsom/packages/include/boost/lexical_cast.hpp \
- /home/pblunsom/packages/include/boost/type_traits/make_unsigned.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_signed.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_unsigned.hpp \
- /home/pblunsom/packages/include/boost/type_traits/add_const.hpp \
- /home/pblunsom/packages/include/boost/type_traits/add_volatile.hpp \
- /home/pblunsom/packages/include/boost/call_traits.hpp \
- /home/pblunsom/packages/include/boost/detail/call_traits.hpp \
- /home/pblunsom/packages/include/boost/detail/lcast_precision.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_abstract.hpp \
- /home/pblunsom/packages/include/boost/program_options/detail/value_semantic.hpp \
- /home/pblunsom/packages/include/boost/function.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iterate.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration/iterate.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/elem.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/data.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/size.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/slot/slot.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/slot/detail/def.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration/detail/iter/forward1.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration/detail/bounds/lower1.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/slot/detail/shared.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration/detail/bounds/upper1.hpp \
- /home/pblunsom/packages/include/boost/function/detail/function_iterate.hpp \
- /home/pblunsom/packages/include/boost/shared_ptr.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/shared_ptr.hpp \
- /home/pblunsom/packages/include/boost/checked_delete.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/shared_count.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/bad_weak_ptr.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_counted_base.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_has_sync.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_counted_impl.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/sp_convertible.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/spinlock_pool.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/spinlock.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/spinlock_sync.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/yield_k.hpp \
- /home/pblunsom/packages/include/boost/memory_order.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/detail/operator_bool.hpp \
- /home/pblunsom/packages/include/boost/program_options/positional_options.hpp \
- /home/pblunsom/packages/include/boost/program_options/detail/parsers.hpp \
- /home/pblunsom/packages/include/boost/program_options/detail/convert.hpp \
- /home/pblunsom/packages/include/boost/program_options/variables_map.hpp \
- /home/pblunsom/packages/include/boost/scoped_ptr.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/scoped_ptr.hpp \
- pyp-topics.hh \
- /home/pblunsom/packages/include/boost/ptr_container/ptr_vector.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/ptr_sequence_adapter.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/reversible_ptr_container.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/throw_exception.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/scoped_deleter.hpp \
- /home/pblunsom/packages/include/boost/scoped_array.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/scoped_array.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/static_move_ptr.hpp \
- /home/pblunsom/packages/include/boost/compressed_pair.hpp \
- /home/pblunsom/packages/include/boost/detail/compressed_pair.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_empty.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_convertible.hpp \
- /home/pblunsom/packages/include/boost/type_traits/add_reference.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_class.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/default_deleter.hpp \
- /home/pblunsom/packages/include/boost/type_traits/remove_bounds.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/is_convertible.hpp \
- /home/pblunsom/packages/include/boost/mpl/and.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/use_preprocessed.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/nested_type_wknd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/include_preprocessed.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/compiler.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/stringize.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/and.hpp \
- /home/pblunsom/packages/include/boost/mpl/identity.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/move.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/exception.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/clone_allocator.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/nullable.hpp \
- /home/pblunsom/packages/include/boost/mpl/eval_if.hpp \
- /home/pblunsom/packages/include/boost/range/functions.hpp \
- /home/pblunsom/packages/include/boost/range/begin.hpp \
- /home/pblunsom/packages/include/boost/range/config.hpp \
- /home/pblunsom/packages/include/boost/range/iterator.hpp \
- /home/pblunsom/packages/include/boost/range/mutable_iterator.hpp \
- /home/pblunsom/packages/include/boost/range/detail/extract_optional_type.hpp \
- /home/pblunsom/packages/include/boost/iterator/iterator_traits.hpp \
- /home/pblunsom/packages/include/boost/detail/iterator.hpp \
- /home/pblunsom/packages/include/boost/range/const_iterator.hpp \
- /home/pblunsom/packages/include/boost/type_traits/remove_const.hpp \
- /home/pblunsom/packages/include/boost/range/end.hpp \
- /home/pblunsom/packages/include/boost/range/detail/implementation_help.hpp \
- /home/pblunsom/packages/include/boost/range/detail/common.hpp \
- /home/pblunsom/packages/include/boost/range/detail/sfinae.hpp \
- /home/pblunsom/packages/include/boost/range/size.hpp \
- /home/pblunsom/packages/include/boost/range/difference_type.hpp \
- /home/pblunsom/packages/include/boost/range/distance.hpp \
- /home/pblunsom/packages/include/boost/range/empty.hpp \
- /home/pblunsom/packages/include/boost/range/rbegin.hpp \
- /home/pblunsom/packages/include/boost/range/reverse_iterator.hpp \
- /home/pblunsom/packages/include/boost/iterator/reverse_iterator.hpp \
- /home/pblunsom/packages/include/boost/iterator.hpp \
- /home/pblunsom/packages/include/boost/utility.hpp \
- /home/pblunsom/packages/include/boost/utility/base_from_member.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_binary_params.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/repeat_from_to.hpp \
- /home/pblunsom/packages/include/boost/utility/binary.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/deduce_d.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/cat.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/fold_left.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/seq.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/elem.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/size.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/transform.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/mod.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/detail/div_base.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison/less_equal.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/not.hpp \
- /home/pblunsom/packages/include/boost/next_prior.hpp \
- /home/pblunsom/packages/include/boost/noncopyable.hpp \
- /home/pblunsom/packages/include/boost/iterator/iterator_adaptor.hpp \
- /home/pblunsom/packages/include/boost/iterator/iterator_categories.hpp \
- /home/pblunsom/packages/include/boost/iterator/detail/config_def.hpp \
- /home/pblunsom/packages/include/boost/mpl/placeholders.hpp \
- /home/pblunsom/packages/include/boost/mpl/arg.hpp \
- /home/pblunsom/packages/include/boost/mpl/arg_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/na_assert.hpp \
- /home/pblunsom/packages/include/boost/mpl/assert.hpp \
- /home/pblunsom/packages/include/boost/mpl/not.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/yes_no.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/arrays.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/pp_counter.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/arity_spec.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/arg_typedef.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/arg.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/placeholders.hpp \
- /home/pblunsom/packages/include/boost/iterator/detail/config_undef.hpp \
- /home/pblunsom/packages/include/boost/iterator/iterator_facade.hpp \
- /home/pblunsom/packages/include/boost/iterator/interoperable.hpp \
- /home/pblunsom/packages/include/boost/mpl/or.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/or.hpp \
- /home/pblunsom/packages/include/boost/iterator/detail/facade_iterator_category.hpp \
- /home/pblunsom/packages/include/boost/detail/indirect_traits.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_function.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/false_result.hpp \
- /home/pblunsom/packages/include/boost/type_traits/detail/is_function_ptr_helper.hpp \
- /home/pblunsom/packages/include/boost/type_traits/remove_pointer.hpp \
- /home/pblunsom/packages/include/boost/iterator/detail/enable_if.hpp \
- /home/pblunsom/packages/include/boost/implicit_cast.hpp \
- /home/pblunsom/packages/include/boost/type_traits/add_pointer.hpp \
- /home/pblunsom/packages/include/boost/mpl/always.hpp \
- /home/pblunsom/packages/include/boost/mpl/apply.hpp \
- /home/pblunsom/packages/include/boost/mpl/apply_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/apply_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/apply_wrap.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/has_apply.hpp \
- /home/pblunsom/packages/include/boost/mpl/has_xxx.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/type_wrapper.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/has_xxx.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/msvc_typename.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/has_apply.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/msvc_never_true.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/apply_wrap.hpp \
- /home/pblunsom/packages/include/boost/mpl/lambda.hpp \
- /home/pblunsom/packages/include/boost/mpl/bind.hpp \
- /home/pblunsom/packages/include/boost/mpl/bind_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/bind.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/bind_fwd.hpp \
- /home/pblunsom/packages/include/boost/mpl/next.hpp \
- /home/pblunsom/packages/include/boost/mpl/next_prior.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/common_name_wknd.hpp \
- /home/pblunsom/packages/include/boost/mpl/protect.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/bind.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/full_lambda.hpp \
- /home/pblunsom/packages/include/boost/mpl/quote.hpp \
- /home/pblunsom/packages/include/boost/mpl/void.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/has_type.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/config/bcc.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/quote.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/template_arity.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/template_arity.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/full_lambda.hpp \
- /home/pblunsom/packages/include/boost/mpl/aux_/preprocessed/gcc/apply.hpp \
- /home/pblunsom/packages/include/boost/range/rend.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/indirect_fun.hpp \
- /home/pblunsom/packages/include/boost/utility/result_of.hpp \
- /home/pblunsom/packages/include/boost/type.hpp \
- /home/pblunsom/packages/include/boost/preprocessor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/library.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/div.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/arithmetic/mul.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/insert.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/push_back.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison/not_equal.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/pop_back.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/deduce_z.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/pop_front.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/push_front.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/remove.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/replace.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/array/reverse.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple/reverse.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison/equal.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison/greater.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison/less.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/comparison/greater_equal.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/config/limits.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/control/expr_if.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/debug.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/debug/assert.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/debug/line.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/facilities.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/facilities/apply.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/detail/is_unary.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/facilities/expand.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/facilities/intercept.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration/local.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/iteration/self.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/append.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/at.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/rest_n.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/cat.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/enum.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/for_each_i.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/for.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/detail/for.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/filter.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/first_n.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/for_each.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/for_each_product.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/to_tuple.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple/to_list.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/size.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/list/transform.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/bitnor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/bitor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/bitxor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/nor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/or.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/logical/xor.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/punctuation.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/punctuation/paren.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/punctuation/paren_if.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/deduce_r.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_params_with_a_default.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_params_with_defaults.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_shifted.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_shifted_binary_params.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_shifted_params.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_trailing.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_trailing_binary_params.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/repetition/enum_trailing_params.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/selection.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/selection/max.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/selection/min.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/enum.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/filter.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/first_n.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/detail/split.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/fold_right.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/reverse.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/for_each.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/for_each_i.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/for_each_product.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/insert.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/rest_n.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/pop_back.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/pop_front.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/push_back.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/push_front.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/remove.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/replace.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/subseq.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/to_array.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/seq/to_tuple.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/slot.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple.hpp \
- /home/pblunsom/packages/include/boost/preprocessor/tuple/to_seq.hpp \
- /home/pblunsom/packages/include/boost/utility/detail/result_of_iterate.hpp \
- /home/pblunsom/packages/include/boost/pointee.hpp \
- /home/pblunsom/packages/include/boost/detail/is_incrementable.hpp \
- /home/pblunsom/packages/include/boost/ptr_container/detail/void_ptr_iterator.hpp \
- /home/pblunsom/packages/include/boost/random/uniform_real.hpp \
- /home/pblunsom/packages/include/boost/random/detail/config.hpp \
- /home/pblunsom/packages/include/boost/random/variate_generator.hpp \
- /home/pblunsom/packages/include/boost/random/uniform_01.hpp \
- /home/pblunsom/packages/include/boost/random/detail/pass_through_engine.hpp \
- /home/pblunsom/packages/include/boost/random/detail/ptr_helper.hpp \
- /home/pblunsom/packages/include/boost/random/detail/disable_warnings.hpp \
- /home/pblunsom/packages/include/boost/random/detail/enable_warnings.hpp \
- /home/pblunsom/packages/include/boost/random/detail/uniform_int_float.hpp \
- /home/pblunsom/packages/include/boost/random/mersenne_twister.hpp \
- /home/pblunsom/packages/include/boost/random/linear_congruential.hpp \
- /home/pblunsom/packages/include/boost/random/detail/const_mod.hpp \
- /home/pblunsom/packages/include/boost/random/detail/seed.hpp pyp.hh \
- slice-sampler.h log_add.h mt19937ar.h corpus.hh workers.hh \
- /home/pblunsom/packages/include/boost/bind.hpp \
- /home/pblunsom/packages/include/boost/bind/bind.hpp \
- /home/pblunsom/packages/include/boost/is_placeholder.hpp \
- /home/pblunsom/packages/include/boost/bind/arg.hpp \
- /home/pblunsom/packages/include/boost/visit_each.hpp \
- /home/pblunsom/packages/include/boost/bind/storage.hpp \
- /home/pblunsom/packages/include/boost/bind/bind_template.hpp \
- /home/pblunsom/packages/include/boost/bind/bind_cc.hpp \
- /home/pblunsom/packages/include/boost/bind/bind_mf_cc.hpp \
- /home/pblunsom/packages/include/boost/bind/bind_mf2_cc.hpp \
- /home/pblunsom/packages/include/boost/bind/placeholders.hpp \
- /home/pblunsom/packages/include/boost/thread/thread.hpp \
- /home/pblunsom/packages/include/boost/thread/detail/platform.hpp \
- /home/pblunsom/packages/include/boost/config/requires_threads.hpp \
- /home/pblunsom/packages/include/boost/thread/pthread/thread_data.hpp \
- /home/pblunsom/packages/include/boost/thread/detail/config.hpp \
- /home/pblunsom/packages/include/boost/thread/detail/platform.hpp \
- /home/pblunsom/packages/include/boost/thread/exceptions.hpp \
- /home/pblunsom/packages/include/boost/config/abi_prefix.hpp \
- /home/pblunsom/packages/include/boost/config/abi_suffix.hpp \
- /home/pblunsom/packages/include/boost/enable_shared_from_this.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/enable_shared_from_this.hpp \
- /home/pblunsom/packages/include/boost/smart_ptr/weak_ptr.hpp \
- /home/pblunsom/packages/include/boost/thread/mutex.hpp \
- /home/pblunsom/packages/include/boost/thread/pthread/mutex.hpp \
- /home/pblunsom/packages/include/boost/thread/locks.hpp \
- /home/pblunsom/packages/include/boost/thread/detail/move.hpp \
- /home/pblunsom/packages/include/boost/thread/thread_time.hpp \
- /home/pblunsom/packages/include/boost/date_time/microsec_time_clock.hpp \
- /home/pblunsom/packages/include/boost/date_time/compiler_config.hpp \
- /home/pblunsom/packages/include/boost/date_time/locale_config.hpp \
- /home/pblunsom/packages/include/boost/date_time/c_time.hpp \
- /home/pblunsom/packages/include/boost/date_time/time_clock.hpp \
- /home/pblunsom/packages/include/boost/date_time/filetime_functions.hpp \
- /home/pblunsom/packages/include/boost/date_time/posix_time/posix_time_types.hpp \
- /home/pblunsom/packages/include/boost/date_time/posix_time/ptime.hpp \
- /home/pblunsom/packages/include/boost/date_time/posix_time/posix_time_system.hpp \
- /home/pblunsom/packages/include/boost/date_time/posix_time/posix_time_config.hpp \
- /home/pblunsom/packages/include/boost/config/no_tr1/cmath.hpp \
- /home/pblunsom/packages/include/boost/date_time/time_duration.hpp \
- /home/pblunsom/packages/include/boost/operators.hpp \
- /home/pblunsom/packages/include/boost/date_time/time_defs.hpp \
- /home/pblunsom/packages/include/boost/date_time/special_defs.hpp \
- /home/pblunsom/packages/include/boost/date_time/time_resolution_traits.hpp \
- /home/pblunsom/packages/include/boost/date_time/int_adapter.hpp \
- /home/pblunsom/packages/include/boost/date_time/gregorian/gregorian_types.hpp \
- /home/pblunsom/packages/include/boost/date_time/date.hpp \
- /home/pblunsom/packages/include/boost/date_time/year_month_day.hpp \
- /home/pblunsom/packages/include/boost/date_time/period.hpp \
- /home/pblunsom/packages/include/boost/date_time/gregorian/greg_calendar.hpp \
- /home/pblunsom/packages/include/boost/date_time/gregorian/greg_weekday.hpp \
- /home/pblunsom/packages/include/boost/date_time/constrained_value.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_base_of.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_base_and_derived.hpp \
- /home/pblunsom/packages/include/boost/date_time/date_defs.hpp \
- /home/pblunsom/packages/include/boost/date_time/gregorian/greg_day_of_year.hpp \
- /home/pblunsom/packages/include/boost/date_time/gregorian_calendar.hpp \
- /home/pblunsom/packages/include/boost/date_time/gregorian_calendar.ipp \
- /home/pblunsom/packages/include/boost/date_time/gregorian/greg_ymd.hpp \
- /home/pblunsom/packages/include/boost/date_time/gregorian/greg_day.hpp \
- /home/pblunsom/packages/include/boost/date_time/gregorian/greg_year.hpp \
- /home/pblunsom/packages/include/boost/date_time/gregorian/greg_month.hpp \
- /home/pblunsom/packages/include/boost/date_time/gregorian/greg_duration.hpp \
- /home/pblunsom/packages/include/boost/date_time/date_duration.hpp \
- /home/pblunsom/packages/include/boost/date_time/date_duration_types.hpp \
- /home/pblunsom/packages/include/boost/date_time/gregorian/greg_duration_types.hpp \
- /home/pblunsom/packages/include/boost/date_time/gregorian/greg_date.hpp \
- /home/pblunsom/packages/include/boost/date_time/adjust_functors.hpp \
- /home/pblunsom/packages/include/boost/date_time/wrapping_int.hpp \
- /home/pblunsom/packages/include/boost/date_time/date_generators.hpp \
- /home/pblunsom/packages/include/boost/date_time/date_clock_device.hpp \
- /home/pblunsom/packages/include/boost/date_time/date_iterator.hpp \
- /home/pblunsom/packages/include/boost/date_time/time_system_split.hpp \
- /home/pblunsom/packages/include/boost/date_time/time_system_counted.hpp \
- /home/pblunsom/packages/include/boost/date_time/time.hpp \
- /home/pblunsom/packages/include/boost/date_time/posix_time/date_duration_operators.hpp \
- /home/pblunsom/packages/include/boost/date_time/posix_time/posix_time_duration.hpp \
- /home/pblunsom/packages/include/boost/date_time/posix_time/time_period.hpp \
- /home/pblunsom/packages/include/boost/date_time/time_iterator.hpp \
- /home/pblunsom/packages/include/boost/date_time/dst_rules.hpp \
- /home/pblunsom/packages/include/boost/thread/xtime.hpp \
- /home/pblunsom/packages/include/boost/date_time/posix_time/conversion.hpp \
- /home/pblunsom/packages/include/boost/date_time/gregorian/conversion.hpp \
- /home/pblunsom/packages/include/boost/thread/pthread/timespec.hpp \
- /home/pblunsom/packages/include/boost/thread/pthread/pthread_mutex_scoped_lock.hpp \
- /home/pblunsom/packages/include/boost/optional.hpp \
- /home/pblunsom/packages/include/boost/optional/optional.hpp \
- /home/pblunsom/packages/include/boost/type_traits/type_with_alignment.hpp \
- /home/pblunsom/packages/include/boost/detail/reference_content.hpp \
- /home/pblunsom/packages/include/boost/type_traits/has_nothrow_copy.hpp \
- /home/pblunsom/packages/include/boost/none.hpp \
- /home/pblunsom/packages/include/boost/none_t.hpp \
- /home/pblunsom/packages/include/boost/utility/compare_pointees.hpp \
- /home/pblunsom/packages/include/boost/optional/optional_fwd.hpp \
- /home/pblunsom/packages/include/boost/thread/pthread/condition_variable_fwd.hpp \
- /home/pblunsom/packages/include/boost/thread/detail/thread.hpp \
- /home/pblunsom/packages/include/boost/thread/detail/thread_heap_alloc.hpp \
- /home/pblunsom/packages/include/boost/thread/pthread/thread_heap_alloc.hpp \
- /home/pblunsom/packages/include/boost/thread/detail/thread_interruption.hpp \
- /home/pblunsom/packages/include/boost/thread/detail/thread_group.hpp \
- /home/pblunsom/packages/include/boost/thread/shared_mutex.hpp \
- /home/pblunsom/packages/include/boost/thread/pthread/shared_mutex.hpp \
- /home/pblunsom/packages/include/boost/thread/condition_variable.hpp \
- /home/pblunsom/packages/include/boost/thread/pthread/condition_variable.hpp \
- /home/pblunsom/packages/include/boost/thread/pthread/thread_data.hpp \
- /home/pblunsom/packages/include/boost/thread/future.hpp \
- /home/pblunsom/packages/include/boost/exception_ptr.hpp \
- /home/pblunsom/packages/include/boost/exception/detail/exception_ptr.hpp \
- /home/pblunsom/packages/include/boost/type_traits/is_fundamental.hpp \
- /home/pblunsom/packages/include/boost/thread/condition.hpp timing.h \
- clock_gettime_stub.c contexts_corpus.hh contexts_lexer.h \
- ../../../decoder/dict.h \
- /home/pblunsom/packages/include/boost/functional/hash.hpp \
- /home/pblunsom/packages/include/boost/functional/hash/hash.hpp \
- /home/pblunsom/packages/include/boost/functional/hash/hash_fwd.hpp \
- /home/pblunsom/packages/include/boost/functional/hash/detail/hash_float.hpp \
- /home/pblunsom/packages/include/boost/functional/hash/detail/float_functions.hpp \
- /home/pblunsom/packages/include/boost/functional/hash/detail/limits.hpp \
- /home/pblunsom/packages/include/boost/integer/static_log2.hpp \
- /home/pblunsom/packages/include/boost/functional/hash/detail/hash_float_generic.hpp \
- /home/pblunsom/packages/include/boost/functional/hash/extensions.hpp \
- /home/pblunsom/packages/include/boost/detail/container_fwd.hpp \
- ../../../decoder/wordid.h gzstream.hh
-clock_gettime_stub.o: clock_gettime_stub.c
-gammadist.o: gammadist.c gammadist.h mt19937ar.h
-mt19937ar.o: mt19937ar.c mt19937ar.h
diff --git a/gi/pyp-topics/src/mpi-corpus.hh b/gi/pyp-topics/src/mpi-corpus.hh
deleted file mode 100644
index f5c478a9..00000000
--- a/gi/pyp-topics/src/mpi-corpus.hh
+++ /dev/null
@@ -1,69 +0,0 @@
-#ifndef _MPI_CORPUS_HH
-#define _MPI_CORPUS_HH
-
-#include <vector>
-#include <string>
-#include <map>
-#include <tr1/unordered_map>
-
-#include <boost/ptr_container/ptr_vector.hpp>
-#include <boost/mpi/environment.hpp>
-#include <boost/mpi/communicator.hpp>
-
-#include "contexts_corpus.hh"
-
-
-////////////////////////////////////////////////////////////////
-// MPICorpus
-////////////////////////////////////////////////////////////////
-
-class MPICorpus : public ContextsCorpus {
-public:
- MPICorpus() : ContextsCorpus() {
- boost::mpi::communicator world;
- m_rank = world.rank();
- m_size = world.size();
- m_start = -1;
- m_end = -1;
- }
- virtual ~MPICorpus() {}
-
- virtual unsigned read_contexts(const std::string &filename,
- BackoffGenerator* backoff_gen=0,
- bool filter_singeltons=false,
- bool binary_contexts=false) {
- unsigned result = ContextsCorpus::read_contexts(filename, backoff_gen, filter_singeltons, binary_contexts);
-
- if (m_rank == 0) std::cerr << "\tLoad balancing terms per mpi segment:" << std::endl;
- float segment_size = num_terms() / m_size;
- float term_threshold = segment_size;
- int seen_terms = 0;
- std::vector<int> end_points;
- for (int i=0; i < num_documents(); ++i) {
- seen_terms += m_documents.at(i).size();
- if (seen_terms >= term_threshold) {
- end_points.push_back(i+1);
- term_threshold += segment_size;
- if (m_rank == 0) std::cerr << "\t\t" << i+1 << ": " << seen_terms << " terms, " << 100*seen_terms / (float)num_terms() << "%" << std::endl;
- }
- }
- m_start = (m_rank == 0 ? 0 : end_points.at(m_rank-1));
- m_end = (m_rank == m_size-1 ? num_documents() : end_points.at(m_rank));
-
- return result;
- }
-
- void
- bounds(int* start, int* end) const {
- *start = m_start;
- *end = m_end;
- }
-
-
-
-protected:
- int m_rank, m_size;
- int m_start, m_end;
-};
-
-#endif // _MPI_CORPUS_HH
diff --git a/gi/pyp-topics/src/mpi-pyp-topics.cc b/gi/pyp-topics/src/mpi-pyp-topics.cc
deleted file mode 100644
index d6e22af6..00000000
--- a/gi/pyp-topics/src/mpi-pyp-topics.cc
+++ /dev/null
@@ -1,466 +0,0 @@
-#include <boost/mpi/communicator.hpp>
-
-#include "timing.h"
-#include "mpi-pyp-topics.hh"
-
-//#include <boost/date_time/posix_time/posix_time_types.hpp>
-void MPIPYPTopics::sample_corpus(const MPICorpus& corpus, int samples,
- int freq_cutoff_start, int freq_cutoff_end,
- int freq_cutoff_interval,
- int max_contexts_per_document) {
- Timer timer;
-
- //int documents = corpus.num_documents();
- /*
- m_mpi_start = 0;
- m_mpi_end = documents;
- if (m_size != 1) {
- assert(documents < std::numeric_limits<int>::max());
- m_mpi_start = (documents / m_size) * m_rank;
- if (m_rank == m_size-1) m_mpi_end = documents;
- else m_mpi_end = (documents / m_size)*(m_rank+1);
- }
- */
- corpus.bounds(&m_mpi_start, &m_mpi_end);
- int local_documents = m_mpi_end - m_mpi_start;
-
- if (!m_backoff.get()) {
- m_word_pyps.clear();
- m_word_pyps.push_back(MPIPYPs());
- }
-
- if (m_am_root) std::cerr << "\n Training with " << m_word_pyps.size()-1 << " backoff level"
- << (m_word_pyps.size()>1 ? ":" : "s:") << std::endl;
-
- for (int i=0; i<(int)m_word_pyps.size(); ++i) {
- m_word_pyps.at(i).reserve(m_num_topics);
- for (int j=0; j<m_num_topics; ++j)
- m_word_pyps.at(i).push_back(new MPIPYP<int>(0.5, 1.0));
- }
- if (m_am_root) std::cerr << std::endl;
-
- m_document_pyps.reserve(local_documents);
- //m_document_pyps.reserve(corpus.num_documents());
- //for (int j=0; j<corpus.num_documents(); ++j)
- for (int j=0; j<local_documents; ++j)
- m_document_pyps.push_back(new PYP<int>(0.5, 1.0));
-
- m_topic_p0 = 1.0/m_num_topics;
- m_term_p0 = 1.0/corpus.num_types();
- m_backoff_p0 = 1.0/corpus.num_documents();
-
- if (m_am_root) std::cerr << " Documents: " << corpus.num_documents() << "("
- << local_documents << ")" << " Terms: " << corpus.num_types() << std::endl;
-
- int frequency_cutoff = freq_cutoff_start;
- if (m_am_root) std::cerr << " Context frequency cutoff set to " << frequency_cutoff << std::endl;
-
- timer.Reset();
- // Initialisation pass
- int document_id=0, topic_counter=0;
- for (int i=0; i<local_documents; ++i) {
- document_id = i+m_mpi_start;
-
- //for (Corpus::const_iterator corpusIt=corpus.begin();
- // corpusIt != corpus.end(); ++corpusIt, ++document_id) {
- m_corpus_topics.push_back(DocumentTopics(corpus.at(document_id).size(), 0));
-
- int term_index=0;
- for (Document::const_iterator docIt=corpus.at(document_id).begin();
- docIt != corpus.at(document_id).end(); ++docIt, ++term_index) {
- topic_counter++;
- Term term = *docIt;
-
- // sample a new_topic
- //int new_topic = (topic_counter % m_num_topics);
- int freq = corpus.context_count(term);
- int new_topic = -1;
- if (freq > frequency_cutoff
- && (!max_contexts_per_document || term_index < max_contexts_per_document)) {
- new_topic = sample(i, term);
- //new_topic = document_id % m_num_topics;
-
- // add the new topic to the PYPs
- increment(term, new_topic);
-
- if (m_use_topic_pyp) {
- F p0 = m_topic_pyp.prob(new_topic, m_topic_p0);
- int table_delta = m_document_pyps.at(i).increment(new_topic, p0);
- if (table_delta)
- m_topic_pyp.increment(new_topic, m_topic_p0, rnd);
- }
- else m_document_pyps.at(i).increment(new_topic, m_topic_p0);
- }
-
- m_corpus_topics.at(i).at(term_index) = new_topic;
- }
- }
-
- // Synchronise the topic->word counds across the processes.
- synchronise();
-
- if (m_am_root) std::cerr << " Initialized in " << timer.Elapsed() << " seconds\n";
-
- int* randomDocIndices = new int[local_documents];
- for (int i = 0; i < local_documents; ++i)
- randomDocIndices[i] = i;
-
- // Sampling phase
- for (int curr_sample=0; curr_sample < samples; ++curr_sample) {
- if (freq_cutoff_interval > 0 && curr_sample != 1
- && curr_sample % freq_cutoff_interval == 1
- && frequency_cutoff > freq_cutoff_end) {
- frequency_cutoff--;
- if (m_am_root) std::cerr << "\n Context frequency cutoff set to " << frequency_cutoff << std::endl;
- }
-
- if (m_am_root) std::cerr << "\n -- Sample " << curr_sample << " "; std::cerr.flush();
-
- // Randomize the corpus indexing array
- int tmp;
- int processed_terms=0;
- for (int i = (local_documents-1); i > 0; --i) {
- //i+1 since j \in [0,i] but rnd() \in [0,1)
- int j = (int)(rnd() * (i+1));
- assert(j >= 0 && j <= i);
- tmp = randomDocIndices[i];
- randomDocIndices[i] = randomDocIndices[j];
- randomDocIndices[j] = tmp;
- }
-
- // for each document in the corpus
- for (int rand_doc=0; rand_doc<local_documents; ++rand_doc) {
- int doc_index = randomDocIndices[rand_doc];
- int document_id = doc_index + m_mpi_start;
- const Document& doc = corpus.at(document_id);
-
- // for each term in the document
- int term_index=0;
- Document::const_iterator docEnd = doc.end();
- for (Document::const_iterator docIt=doc.begin();
- docIt != docEnd; ++docIt, ++term_index) {
-
- if (max_contexts_per_document && term_index > max_contexts_per_document)
- break;
-
- Term term = *docIt;
- int freq = corpus.context_count(term);
- if (freq < frequency_cutoff)
- continue;
-
- processed_terms++;
-
- // remove the prevous topic from the PYPs
- int current_topic = m_corpus_topics.at(doc_index).at(term_index);
- // a negative label mean that term hasn't been sampled yet
- if (current_topic >= 0) {
- decrement(term, current_topic);
-
- int table_delta = m_document_pyps.at(doc_index).decrement(current_topic);
- if (m_use_topic_pyp && table_delta < 0)
- m_topic_pyp.decrement(current_topic, rnd);
- }
-
- // sample a new_topic
- int new_topic = sample(doc_index, term);
-
- // add the new topic to the PYPs
- m_corpus_topics.at(doc_index).at(term_index) = new_topic;
- increment(term, new_topic);
-
- if (m_use_topic_pyp) {
- F p0 = m_topic_pyp.prob(new_topic, m_topic_p0);
- int table_delta = m_document_pyps.at(doc_index).increment(new_topic, p0);
- if (table_delta)
- m_topic_pyp.increment(new_topic, m_topic_p0, rnd);
- }
- else m_document_pyps.at(doc_index).increment(new_topic, m_topic_p0);
- }
- if (document_id && document_id % 10000 == 0) {
- if (m_am_root) std::cerr << "."; std::cerr.flush();
- }
- }
- std::cerr << "|"; std::cerr.flush();
-
- // Synchronise the topic->word counds across the processes.
- synchronise();
-
- if (m_am_root) std::cerr << " ||| sampled " << processed_terms << " terms.";
-
- if (curr_sample != 0 && curr_sample % 10 == 0) {
- if (m_am_root) std::cerr << " ||| time=" << (timer.Elapsed() / 10.0) << " sec/sample" << std::endl;
- timer.Reset();
- if (m_am_root) std::cerr << " ... Resampling hyperparameters"; std::cerr.flush();
-
- // resample the hyperparamters
- F log_p=0.0;
- for (std::vector<MPIPYPs>::iterator levelIt=m_word_pyps.begin();
- levelIt != m_word_pyps.end(); ++levelIt) {
- for (MPIPYPs::iterator pypIt=levelIt->begin();
- pypIt != levelIt->end(); ++pypIt) {
- pypIt->resample_prior(rnd);
- log_p += pypIt->log_restaurant_prob();
- }
- }
-
- for (PYPs::iterator pypIt=m_document_pyps.begin();
- pypIt != m_document_pyps.end(); ++pypIt) {
- pypIt->resample_prior(rnd);
- log_p += pypIt->log_restaurant_prob();
- }
-
- if (m_use_topic_pyp) {
- m_topic_pyp.resample_prior(rnd);
- log_p += m_topic_pyp.log_restaurant_prob();
- }
-
- std::cerr.precision(10);
- if (m_am_root) std::cerr << " ||| LLH=" << log_p << " ||| resampling time=" << timer.Elapsed() << " sec" << std::endl;
- timer.Reset();
-
- int k=0;
- if (m_am_root) std::cerr << "Topics distribution: ";
- std::cerr.precision(2);
- for (MPIPYPs::iterator pypIt=m_word_pyps.front().begin();
- pypIt != m_word_pyps.front().end(); ++pypIt, ++k) {
- if (m_am_root && k % 5 == 0) std::cerr << std::endl << '\t';
- if (m_am_root) std::cerr << "<" << k << ":" << pypIt->num_customers() << ","
- << pypIt->num_types() << "," << m_topic_pyp.prob(k, m_topic_p0) << "> ";
- }
- std::cerr.precision(4);
- if (m_am_root) std::cerr << std::endl;
- }
- }
- delete [] randomDocIndices;
-}
-
-void MPIPYPTopics::synchronise() {
- // Synchronise the topic->word counds across the processes.
- //for (std::vector<MPIPYPs>::iterator levelIt=m_word_pyps.begin();
- // levelIt != m_word_pyps.end(); ++levelIt) {
-// std::vector<MPIPYPs>::iterator levelIt=m_word_pyps.begin();
-// {
-// for (MPIPYPs::iterator pypIt=levelIt->begin(); pypIt != levelIt->end(); ++pypIt) {
- for (size_t label=0; label < m_word_pyps.at(0).size(); ++label) {
- MPIPYP<int>& pyp = m_word_pyps.at(0).at(label);
-
- //if (!m_am_root) boost::mpi::communicator().barrier();
- //std::cerr << "Before Sync Process " << m_rank << ":";
- //pyp.debug_info(std::cerr); std::cerr << std::endl;
- //if (m_am_root) boost::mpi::communicator().barrier();
-
- MPIPYP<int>::dish_delta_type delta;
- pyp.synchronise(&delta);
-
- for (MPIPYP<int>::dish_delta_type::const_iterator it=delta.begin(); it != delta.end(); ++it) {
- int count = it->second;
- if (count > 0)
- for (int i=0; i < count; ++i) increment(it->first, label);
- if (count < 0)
- for (int i=0; i > count; --i) decrement(it->first, label);
- }
- pyp.reset_deltas();
-
- //if (!m_am_root) boost::mpi::communicator().barrier();
- //std::cerr << "After Sync Process " << m_rank << ":";
- //pyp.debug_info(std::cerr); std::cerr << std::endl;
- //if (m_am_root) boost::mpi::communicator().barrier();
- }
-// }
- // Synchronise the hierarchical topic pyp
- MPIPYP<int>::dish_delta_type topic_delta;
- m_topic_pyp.synchronise(&topic_delta);
- for (MPIPYP<int>::dish_delta_type::const_iterator it=topic_delta.begin(); it != topic_delta.end(); ++it) {
- int count = it->second;
- if (count > 0)
- for (int i=0; i < count; ++i)
- m_topic_pyp.increment(it->first, m_topic_p0, rnd);
- if (count < 0)
- for (int i=0; i > count; --i)
- m_topic_pyp.decrement(it->first, rnd);
- }
- m_topic_pyp.reset_deltas();
-}
-
-void MPIPYPTopics::decrement(const Term& term, int topic, int level) {
- //std::cerr << "MPIPYPTopics::decrement(" << term << "," << topic << "," << level << ")" << std::endl;
- m_word_pyps.at(level).at(topic).decrement(term, rnd);
- if (m_backoff.get()) {
- Term backoff_term = (*m_backoff)[term];
- if (!m_backoff->is_null(backoff_term))
- decrement(backoff_term, topic, level+1);
- }
-}
-
-void MPIPYPTopics::increment(const Term& term, int topic, int level) {
- //std::cerr << "MPIPYPTopics::increment(" << term << "," << topic << "," << level << ")" << std::endl;
- m_word_pyps.at(level).at(topic).increment(term, word_pyps_p0(term, topic, level), rnd);
-
- if (m_backoff.get()) {
- Term backoff_term = (*m_backoff)[term];
- if (!m_backoff->is_null(backoff_term))
- increment(backoff_term, topic, level+1);
- }
-}
-
-int MPIPYPTopics::sample(const DocumentId& doc, const Term& term) {
- // First pass: collect probs
- F sum=0.0;
- std::vector<F> sums;
- for (int k=0; k<m_num_topics; ++k) {
- F p_w_k = prob(term, k);
-
- F topic_prob = m_topic_p0;
- if (m_use_topic_pyp) topic_prob = m_topic_pyp.prob(k, m_topic_p0);
-
- //F p_k_d = m_document_pyps[doc].prob(k, topic_prob);
- F p_k_d = m_document_pyps.at(doc).unnormalised_prob(k, topic_prob);
-
- sum += (p_w_k*p_k_d);
- sums.push_back(sum);
- }
- // Second pass: sample a topic
- F cutoff = rnd() * sum;
- for (int k=0; k<m_num_topics; ++k) {
- if (cutoff <= sums[k])
- return k;
- }
- std::cerr << cutoff << " " << sum << std::endl;
- assert(false);
-}
-
-MPIPYPTopics::F MPIPYPTopics::word_pyps_p0(const Term& term, int topic, int level) const {
- //for (int i=0; i<level+1; ++i) std::cerr << " ";
- //std::cerr << "MPIPYPTopics::word_pyps_p0(" << term << "," << topic << "," << level << ")" << std::endl;
-
- F p0 = m_term_p0;
- if (m_backoff.get()) {
- //static F fudge=m_backoff_p0; // TODO
-
- Term backoff_term = (*m_backoff)[term];
- if (!m_backoff->is_null(backoff_term)) {
- assert (level < m_backoff->order());
- //p0 = (1.0/(double)m_backoff->terms_at_level(level))*prob(backoff_term, topic, level+1);
- p0 = prob(backoff_term, topic, level+1);
- }
- else
- p0 = m_term_p0;
- }
- //for (int i=0; i<level+1; ++i) std::cerr << " ";
- //std::cerr << "MPIPYPTopics::word_pyps_p0(" << term << "," << topic << "," << level << ") = " << p0 << std::endl;
- return p0;
-}
-
-MPIPYPTopics::F MPIPYPTopics::prob(const Term& term, int topic, int level) const {
- //for (int i=0; i<level+1; ++i) std::cerr << " ";
- //std::cerr << "MPIPYPTopics::prob(" << term << "," << topic << "," << level << " " << factor << ")" << std::endl;
-
- F p0 = word_pyps_p0(term, topic, level);
- F p_w_k = m_word_pyps.at(level).at(topic).prob(term, p0);
-
- //for (int i=0; i<level+1; ++i) std::cerr << " ";
- //std::cerr << "MPIPYPTopics::prob(" << term << "," << topic << "," << level << ") = " << p_w_k << std::endl;
-
- return p_w_k;
-}
-
-int MPIPYPTopics::max_topic() const {
- if (!m_use_topic_pyp)
- return -1;
-
- F current_max=0.0;
- int current_topic=-1;
- for (int k=0; k<m_num_topics; ++k) {
- F prob = m_topic_pyp.prob(k, m_topic_p0);
- if (prob > current_max) {
- current_max = prob;
- current_topic = k;
- }
- }
- assert(current_topic >= 0);
- assert(current_max >= 0);
- return current_max;
-}
-
-std::pair<int,MPIPYPTopics::F> MPIPYPTopics::max(const DocumentId& true_doc) const {
- //std::cerr << "MPIPYPTopics::max(" << doc << "," << term << ")" << std::endl;
- // collect probs
- F current_max=0.0;
- DocumentId local_doc = true_doc - m_mpi_start;
- int current_topic=-1;
- for (int k=0; k<m_num_topics; ++k) {
- //F p_w_k = prob(term, k);
-
- F topic_prob = m_topic_p0;
- if (m_use_topic_pyp)
- topic_prob = m_topic_pyp.prob(k, m_topic_p0);
-
- F prob = 0;
- if (local_doc < 0) prob = topic_prob;
- else prob = m_document_pyps.at(local_doc).prob(k, topic_prob);
-
- if (prob > current_max) {
- current_max = prob;
- current_topic = k;
- }
- }
- assert(current_topic >= 0);
- assert(current_max >= 0);
- return std::make_pair(current_topic, current_max);
-}
-
-std::pair<int,MPIPYPTopics::F> MPIPYPTopics::max(const DocumentId& true_doc, const Term& term) const {
- //std::cerr << "MPIPYPTopics::max(" << doc << "," << term << ")" << std::endl;
- // collect probs
- F current_max=0.0;
- DocumentId local_doc = true_doc - m_mpi_start;
- int current_topic=-1;
- for (int k=0; k<m_num_topics; ++k) {
- F p_w_k = prob(term, k);
-
- F topic_prob = m_topic_p0;
- if (m_use_topic_pyp)
- topic_prob = m_topic_pyp.prob(k, m_topic_p0);
-
- F p_k_d = 0;
- if (local_doc < 0) p_k_d = topic_prob;
- else p_k_d = m_document_pyps.at(local_doc).prob(k, topic_prob);
-
- F prob = (p_w_k*p_k_d);
- if (prob > current_max) {
- current_max = prob;
- current_topic = k;
- }
- }
- assert(current_topic >= 0);
- assert(current_max >= 0);
- return std::make_pair(current_topic, current_max);
-}
-
-std::ostream& MPIPYPTopics::print_document_topics(std::ostream& out) const {
- for (CorpusTopics::const_iterator corpusIt=m_corpus_topics.begin();
- corpusIt != m_corpus_topics.end(); ++corpusIt) {
- int term_index=0;
- for (DocumentTopics::const_iterator docIt=corpusIt->begin();
- docIt != corpusIt->end(); ++docIt, ++term_index) {
- if (term_index) out << " ";
- out << *docIt;
- }
- out << std::endl;
- }
- return out;
-}
-
-std::ostream& MPIPYPTopics::print_topic_terms(std::ostream& out) const {
- for (PYPs::const_iterator pypsIt=m_word_pyps.front().begin();
- pypsIt != m_word_pyps.front().end(); ++pypsIt) {
- int term_index=0;
- for (PYP<int>::const_iterator termIt=pypsIt->begin();
- termIt != pypsIt->end(); ++termIt, ++term_index) {
- if (term_index) out << " ";
- out << termIt->first << ":" << termIt->second;
- }
- out << std::endl;
- }
- return out;
-}
diff --git a/gi/pyp-topics/src/mpi-pyp-topics.hh b/gi/pyp-topics/src/mpi-pyp-topics.hh
deleted file mode 100644
index d96bc4e5..00000000
--- a/gi/pyp-topics/src/mpi-pyp-topics.hh
+++ /dev/null
@@ -1,106 +0,0 @@
-#ifndef MPI_PYP_TOPICS_HH
-#define MPI_PYP_TOPICS_HH
-
-#include <vector>
-#include <iostream>
-
-#include <boost/ptr_container/ptr_vector.hpp>
-#include <boost/random/uniform_real.hpp>
-#include <boost/random/variate_generator.hpp>
-#include <boost/random/mersenne_twister.hpp>
-#include <boost/random/inversive_congruential.hpp>
-#include <boost/random/linear_congruential.hpp>
-#include <boost/random/lagged_fibonacci.hpp>
-#include <boost/mpi/environment.hpp>
-#include <boost/mpi/communicator.hpp>
-
-
-#include "mpi-pyp.hh"
-#include "mpi-corpus.hh"
-
-class MPIPYPTopics {
-public:
- typedef std::vector<int> DocumentTopics;
- typedef std::vector<DocumentTopics> CorpusTopics;
- typedef double F;
-
-public:
- MPIPYPTopics(int num_topics, bool use_topic_pyp=false, unsigned long seed = 0)
- : m_num_topics(num_topics), m_word_pyps(1),
- m_topic_pyp(0.5,1.0), m_use_topic_pyp(use_topic_pyp),
- m_seed(seed),
- uni_dist(0,1), rng(seed == 0 ? (unsigned long)this : seed),
- rnd(rng, uni_dist), m_mpi_start(-1), m_mpi_end(-1) {
- boost::mpi::communicator m_world;
- m_rank = m_world.rank();
- m_size = m_world.size();
- m_am_root = (m_rank == 0);
- }
-
- void sample_corpus(const MPICorpus& corpus, int samples,
- int freq_cutoff_start=0, int freq_cutoff_end=0,
- int freq_cutoff_interval=0,
- int max_contexts_per_document=0);
-
- int sample(const DocumentId& doc, const Term& term);
- std::pair<int,F> max(const DocumentId& doc, const Term& term) const;
- std::pair<int,F> max(const DocumentId& doc) const;
- int max_topic() const;
-
- void set_backoff(const std::string& filename) {
- m_backoff.reset(new TermBackoff);
- m_backoff->read(filename);
- m_word_pyps.clear();
- m_word_pyps.resize(m_backoff->order(), MPIPYPs());
- }
- void set_backoff(TermBackoffPtr backoff) {
- m_backoff = backoff;
- m_word_pyps.clear();
- m_word_pyps.resize(m_backoff->order(), MPIPYPs());
- }
-
- F prob(const Term& term, int topic, int level=0) const;
- void decrement(const Term& term, int topic, int level=0);
- void increment(const Term& term, int topic, int level=0);
-
- std::ostream& print_document_topics(std::ostream& out) const;
- std::ostream& print_topic_terms(std::ostream& out) const;
-
- void synchronise();
-
-private:
- F word_pyps_p0(const Term& term, int topic, int level) const;
-
- int m_num_topics;
- F m_term_p0, m_topic_p0, m_backoff_p0;
-
- CorpusTopics m_corpus_topics;
- typedef boost::ptr_vector< PYP<int> > PYPs;
- typedef boost::ptr_vector< MPIPYP<int> > MPIPYPs;
- PYPs m_document_pyps;
- std::vector<MPIPYPs> m_word_pyps;
- MPIPYP<int> m_topic_pyp;
- bool m_use_topic_pyp;
-
- unsigned long m_seed;
-
- //typedef boost::mt19937 base_generator_type;
- //typedef boost::hellekalek1995 base_generator_type;
- typedef boost::lagged_fibonacci607 base_generator_type;
- typedef boost::uniform_real<> uni_dist_type;
- typedef boost::variate_generator<base_generator_type&, uni_dist_type> gen_type;
-
- uni_dist_type uni_dist;
- base_generator_type rng; //this gets the seed
- gen_type rnd; //instantiate: rnd(rng, uni_dist)
- //call: rnd() generates uniform on [0,1)
-
- TermBackoffPtr m_backoff;
-
- boost::mpi::communicator m_world;
- bool m_am_root;
- int m_rank, m_size;
- int m_mpi_start, m_mpi_end;
-};
-
-#endif // PYP_TOPICS_HH
diff --git a/gi/pyp-topics/src/mpi-pyp.hh b/gi/pyp-topics/src/mpi-pyp.hh
deleted file mode 100644
index c2341b9e..00000000
--- a/gi/pyp-topics/src/mpi-pyp.hh
+++ /dev/null
@@ -1,447 +0,0 @@
-#ifndef _mpipyp_hh
-#define _mpipyp_hh
-
-#include <math.h>
-#include <map>
-#include <tr1/unordered_map>
-//#include <google/sparse_hash_map>
-
-#include <boost/random/uniform_real.hpp>
-#include <boost/random/variate_generator.hpp>
-#include <boost/random/mersenne_twister.hpp>
-#include <boost/tuple/tuple.hpp>
-#include <boost/serialization/map.hpp>
-#include <boost/mpi.hpp>
-#include <boost/mpi/environment.hpp>
-#include <boost/mpi/communicator.hpp>
-#include <boost/mpi/operations.hpp>
-
-
-#include "pyp.hh"
-
-//
-// Pitman-Yor process with customer and table tracking
-//
-
-template <typename Dish, typename Hash=std::tr1::hash<Dish> >
-class MPIPYP : public PYP<Dish, Hash> {
-public:
- typedef std::map<Dish, int> dish_delta_type;
-
- MPIPYP(double a, double b, Hash hash=Hash());
-
- template < typename Uniform01 >
- int increment(Dish d, double p0, Uniform01& rnd);
- template < typename Uniform01 >
- int decrement(Dish d, Uniform01& rnd);
-
- void clear();
- void reset_deltas();
-
- void synchronise(dish_delta_type* result);
-
-private:
- typedef std::map<Dish, typename PYP<Dish,Hash>::TableCounter> table_delta_type;
-
- dish_delta_type m_count_delta;
- table_delta_type m_table_delta;
-};
-
-template <typename Dish, typename Hash>
-MPIPYP<Dish,Hash>::MPIPYP(double a, double b, Hash h)
-: PYP<Dish,Hash>(a, b, 0, h) {}
-
-template <typename Dish, typename Hash>
- template <typename Uniform01>
-int
-MPIPYP<Dish,Hash>::increment(Dish dish, double p0, Uniform01& rnd) {
- //std::cerr << "-----INCREMENT DISH " << dish << std::endl;
- int delta = 0;
- int table_joined=-1;
- typename PYP<Dish,Hash>::TableCounter &tc = PYP<Dish,Hash>::_dish_tables[dish];
-
- // seated on a new or existing table?
- int c = PYP<Dish,Hash>::count(dish);
- int t = PYP<Dish,Hash>::num_tables(dish);
- int T = PYP<Dish,Hash>::num_tables();
- double& a = PYP<Dish,Hash>::_a;
- double& b = PYP<Dish,Hash>::_b;
- double pshare = (c > 0) ? (c - a*t) : 0.0;
- double pnew = (b + a*T) * p0;
- if (pshare < 0.0) {
- std::cerr << pshare << " " << c << " " << a << " " << t << std::endl;
- assert(false);
- }
-
- if (rnd() < pnew / (pshare + pnew)) {
- // assign to a new table
- tc.tables += 1;
- tc.table_histogram[1] += 1;
- PYP<Dish,Hash>::_total_tables += 1;
- delta = 1;
- table_joined = 1;
- }
- else {
- // randomly assign to an existing table
- // remove constant denominator from inner loop
- double r = rnd() * (c - a*t);
- for (std::map<int,int>::iterator
- hit = tc.table_histogram.begin();
- hit != tc.table_histogram.end(); ++hit) {
- r -= ((hit->first - a) * hit->second);
- if (r <= 0) {
- tc.table_histogram[hit->first+1] += 1;
- hit->second -= 1;
- table_joined = hit->first+1;
- if (hit->second == 0)
- tc.table_histogram.erase(hit);
- break;
- }
- }
- if (r > 0) {
- std::cerr << r << " " << c << " " << a << " " << t << std::endl;
- assert(false);
- }
- delta = 0;
- }
-
- std::tr1::unordered_map<Dish,int,Hash>::operator[](dish) += 1;
- //google::sparse_hash_map<Dish,int,Hash>::operator[](dish) += 1;
- PYP<Dish,Hash>::_total_customers += 1;
-
- // MPI Delta handling
- // track the customer entering
- typename dish_delta_type::iterator customer_it;
- bool customer_insert_result;
- boost::tie(customer_it, customer_insert_result)
- = m_count_delta.insert(std::make_pair(dish,0));
-
- customer_it->second += 1;
- if (customer_it->second == 0)
- m_count_delta.erase(customer_it);
-
- // increment the histogram bar for the table joined
- /*
- typename PYP<Dish,Hash>::TableCounter &delta_tc = m_table_delta[dish];
-
- std::map<int,int> &histogram = delta_tc.table_histogram;
- assert (table_joined > 0);
-
- typename std::map<int,int>::iterator table_it; bool table_insert_result;
- boost::tie(table_it, table_insert_result) = histogram.insert(std::make_pair(table_joined,0));
- table_it->second += 1;
- if (delta == 0) {
- // decrement the histogram bar for the table left
- typename std::map<int,int>::iterator left_table_it;
- boost::tie(left_table_it, table_insert_result)
- = histogram.insert(std::make_pair(table_joined-1,0));
- left_table_it->second -= 1;
- if (left_table_it->second == 0) histogram.erase(left_table_it);
- }
- else delta_tc.tables += 1;
-
- if (table_it->second == 0) histogram.erase(table_it);
-
- //std::cerr << "Added (" << delta << ") " << dish << " to table " << table_joined << "\n";
- //std::cerr << "Dish " << dish << " has " << count(dish) << " customers, and is sitting at " << PYP<Dish,Hash>::num_tables(dish) << " tables.\n";
- //for (std::map<int,int>::const_iterator
- // hit = delta_tc.table_histogram.begin();
- // hit != delta_tc.table_histogram.end(); ++hit) {
- // std::cerr << " " << hit->second << " tables with " << hit->first << " customers." << std::endl;
- //}
- //std::cerr << "Added (" << delta << ") " << dish << " to table " << table_joined << "\n";
- //std::cerr << "Dish " << dish << " has " << count(dish) << " customers, and is sitting at " << PYP<Dish,Hash>::num_tables(dish) << " tables.\n";
- int x_num_customers=0, x_num_table=0;
- for (std::map<int,int>::const_iterator
- hit = delta_tc.table_histogram.begin();
- hit != delta_tc.table_histogram.end(); ++hit) {
- x_num_table += hit->second;
- x_num_customers += (hit->second*hit->first);
- }
- int tmp_c = PYP<Dish,Hash>::count(dish);
- int tmp_t = PYP<Dish,Hash>::num_tables(dish);
- assert (x_num_customers <= tmp_c);
- assert (x_num_table <= tmp_t);
-
- if (delta_tc.table_histogram.empty()) {
- assert (delta_tc.tables == 0);
- m_table_delta.erase(dish);
- }
- */
-
- //PYP<Dish,Hash>::debug_info(std::cerr);
- //std::cerr << " Dish " << dish << " has count " << PYP<Dish,Hash>::count(dish) << " tables " << PYP<Dish,Hash>::num_tables(dish) << std::endl;
-
- return delta;
-}
-
-template <typename Dish, typename Hash>
- template <typename Uniform01>
-int
-MPIPYP<Dish,Hash>::decrement(Dish dish, Uniform01& rnd)
-{
- //std::cerr << "-----DECREMENT DISH " << dish << std::endl;
- typename std::tr1::unordered_map<Dish, int>::iterator dcit = find(dish);
- //typename google::sparse_hash_map<Dish, int>::iterator dcit = find(dish);
- if (dcit == PYP<Dish,Hash>::end()) {
- std::cerr << dish << std::endl;
- assert(false);
- }
-
- int delta = 0, table_left=-1;
-
- typename std::tr1::unordered_map<Dish, typename PYP<Dish,Hash>::TableCounter>::iterator dtit
- = PYP<Dish,Hash>::_dish_tables.find(dish);
- //typename google::sparse_hash_map<Dish, TableCounter>::iterator dtit = _dish_tables.find(dish);
- if (dtit == PYP<Dish,Hash>::_dish_tables.end()) {
- std::cerr << dish << std::endl;
- assert(false);
- }
- typename PYP<Dish,Hash>::TableCounter &tc = dtit->second;
-
- double r = rnd() * PYP<Dish,Hash>::count(dish);
- for (std::map<int,int>::iterator hit = tc.table_histogram.begin();
- hit != tc.table_histogram.end(); ++hit) {
- r -= (hit->first * hit->second);
- if (r <= 0) {
- table_left = hit->first;
- if (hit->first > 1) {
- tc.table_histogram[hit->first-1] += 1;
- }
- else {
- delta = -1;
- tc.tables -= 1;
- PYP<Dish,Hash>::_total_tables -= 1;
- }
-
- hit->second -= 1;
- if (hit->second == 0) tc.table_histogram.erase(hit);
- break;
- }
- }
- if (r > 0) {
- std::cerr << r << " " << PYP<Dish,Hash>::count(dish) << " " << PYP<Dish,Hash>::_a << " "
- << PYP<Dish,Hash>::num_tables(dish) << std::endl;
- assert(false);
- }
-
- // remove the customer
- dcit->second -= 1;
- PYP<Dish,Hash>::_total_customers -= 1;
- assert(dcit->second >= 0);
- if (dcit->second == 0) {
- PYP<Dish,Hash>::erase(dcit);
- PYP<Dish,Hash>::_dish_tables.erase(dtit);
- }
-
- // MPI Delta processing
- typename dish_delta_type::iterator it;
- bool insert_result;
- boost::tie(it, insert_result) = m_count_delta.insert(std::make_pair(dish,0));
- it->second -= 1;
- if (it->second == 0) m_count_delta.erase(it);
-
- assert (table_left > 0);
- typename PYP<Dish,Hash>::TableCounter& delta_tc = m_table_delta[dish];
- if (table_left > 1) {
- std::map<int,int>::iterator tit;
- boost::tie(tit, insert_result) = delta_tc.table_histogram.insert(std::make_pair(table_left-1,0));
- tit->second += 1;
- if (tit->second == 0) delta_tc.table_histogram.erase(tit);
- }
- else delta_tc.tables -= 1;
-
- std::map<int,int>::iterator tit;
- boost::tie(tit, insert_result) = delta_tc.table_histogram.insert(std::make_pair(table_left,0));
- tit->second -= 1;
- if (tit->second == 0) delta_tc.table_histogram.erase(tit);
-
- // std::cerr << "Dish " << dish << " has " << count(dish) << " customers, and is sitting at " << PYP<Dish,Hash>::num_tables(dish) << " tables.\n";
- // for (std::map<int,int>::const_iterator
- // hit = delta_tc.table_histogram.begin();
- // hit != delta_tc.table_histogram.end(); ++hit) {
- // std::cerr << " " << hit->second << " tables with " << hit->first << " customers." << std::endl;
- // }
- int x_num_customers=0, x_num_table=0;
- for (std::map<int,int>::const_iterator
- hit = delta_tc.table_histogram.begin();
- hit != delta_tc.table_histogram.end(); ++hit) {
- x_num_table += hit->second;
- x_num_customers += (hit->second*hit->first);
- }
- int tmp_c = PYP<Dish,Hash>::count(dish);
- int tmp_t = PYP<Dish,Hash>::num_tables(dish);
- assert (x_num_customers <= tmp_c);
- assert (x_num_table <= tmp_t);
-
- if (delta_tc.table_histogram.empty()) {
- // std::cerr << " DELETING " << dish << std::endl;
- assert (delta_tc.tables == 0);
- m_table_delta.erase(dish);
- }
-
- //PYP<Dish,Hash>::debug_info(std::cerr);
- //std::cerr << " Dish " << dish << " has count " << PYP<Dish,Hash>::count(dish) << " tables " << PYP<Dish,Hash>::num_tables(dish) << std::endl;
- return delta;
-}
-
-template <typename Dish, typename Hash>
-void
-MPIPYP<Dish,Hash>::clear() {
- PYP<Dish,Hash>::clear();
- reset_deltas();
-}
-
-template <typename Dish, typename Hash>
-void
-MPIPYP<Dish,Hash>::reset_deltas() {
- m_count_delta.clear();
- m_table_delta.clear();
-}
-
-template <typename Dish>
-struct sum_maps {
- typedef std::map<Dish,int> map_type;
- map_type& operator() (map_type& l, map_type const & r) const {
- for (typename map_type::const_iterator it=r.begin(); it != r.end(); it++)
- l[it->first] += it->second;
- return l;
- }
-};
-
-template <typename Dish>
-struct subtract_maps {
- typedef std::map<Dish,int> map_type;
- map_type& operator() (map_type& l, map_type const & r) const {
- for (typename map_type::const_iterator it=r.begin(); it != r.end(); it++)
- l[it->first] -= it->second;
- return l;
- }
-};
-
-// Needed Boost definitions
-namespace boost {
- namespace mpi {
- template <>
- struct is_commutative< sum_maps<int>, std::map<int,int> > : mpl::true_ {};
- }
-
- namespace serialization {
- template<class Archive>
- void serialize(Archive & ar, PYP<int>::TableCounter& t, const unsigned int version) {
- ar & t.table_histogram;
- ar & t.tables;
- }
-
- } // namespace serialization
-} // namespace boost
-
-template <typename A, typename B, typename C>
-struct triple {
- triple() {}
- triple(const A& a, const B& b, const C& c) : first(a), second(b), third(c) {}
- A first;
- B second;
- C third;
-
- template<class Archive>
- void serialize(Archive &ar, const unsigned int version){
- ar & first;
- ar & second;
- ar & third;
- }
-};
-
-BOOST_IS_BITWISE_SERIALIZABLE(MPIPYP<int>::dish_delta_type)
-BOOST_CLASS_TRACKING(MPIPYP<int>::dish_delta_type,track_never)
-
-template <typename Dish, typename Hash>
-void
-MPIPYP<Dish,Hash>::synchronise(dish_delta_type* result) {
- boost::mpi::communicator world;
- //int rank = world.rank(), size = world.size();
-
- boost::mpi::all_reduce(world, m_count_delta, *result, sum_maps<Dish>());
- subtract_maps<Dish>()(*result, m_count_delta);
-
-/*
- // communicate the customer count deltas
- dish_delta_type global_dish_delta;
- boost::mpi::all_reduce(world, m_count_delta, global_dish_delta, sum_maps<Dish>());
-
- // update this restaurant
- for (typename dish_delta_type::const_iterator it=global_dish_delta.begin();
- it != global_dish_delta.end(); ++it) {
- int global_delta = it->second - m_count_delta[it->first];
- if (global_delta == 0) continue;
- typename std::tr1::unordered_map<Dish,int,Hash>::iterator dit; bool inserted;
- boost::tie(dit, inserted)
- = std::tr1::unordered_map<Dish,int,Hash>::insert(std::make_pair(it->first, 0));
- dit->second += global_delta;
- assert(dit->second >= 0);
- if (dit->second == 0) {
- std::tr1::unordered_map<Dish,int,Hash>::erase(dit);
- }
-
- PYP<Dish,Hash>::_total_customers += (it->second - m_count_delta[it->first]);
- int tmp = PYP<Dish,Hash>::_total_customers;
- assert(tmp >= 0);
- //std::cerr << "Process " << rank << " adding " << (it->second - m_count_delta[it->first]) << " of customer " << it->first << std::endl;
- }
-*/
-/*
- // communicate the table count deltas
- for (int process = 0; process < size; ++process) {
- typename std::vector< triple<Dish, int, int> > message;
- if (rank == process) {
- // broadcast deltas
- for (typename table_delta_type::const_iterator dish_it=m_table_delta.begin();
- dish_it != m_table_delta.end(); ++dish_it) {
- //assert (dish_it->second.tables > 0);
- for (std::map<int,int>::const_iterator it=dish_it->second.table_histogram.begin();
- it != dish_it->second.table_histogram.end(); ++it) {
- triple<Dish, int, int> m(dish_it->first, it->first, it->second);
- message.push_back(m);
- }
- // append a special message with the total table delta for this dish
- triple<Dish, int, int> m(dish_it->first, -1, dish_it->second.tables);
- message.push_back(m);
- }
- boost::mpi::broadcast(world, message, process);
- }
- else {
- // receive deltas
- boost::mpi::broadcast(world, message, process);
- for (typename std::vector< triple<Dish, int, int> >::const_iterator it=message.begin(); it != message.end(); ++it) {
- typename PYP<Dish,Hash>::TableCounter& tc = PYP<Dish,Hash>::_dish_tables[it->first];
- if (it->second >= 0) {
- std::map<int,int>::iterator tit; bool inserted;
- boost::tie(tit, inserted) = tc.table_histogram.insert(std::make_pair(it->second, 0));
- tit->second += it->third;
- if (tit->second < 0) {
- std::cerr << tit->first << " " << tit->second << " " << it->first << " " << it->second << " " << it->third << std::endl;
- assert(tit->second >= 0);
- }
- if (tit->second == 0) {
- tc.table_histogram.erase(tit);
- }
- }
- else {
- tc.tables += it->third;
- PYP<Dish,Hash>::_total_tables += it->third;
- assert(tc.tables >= 0);
- if (tc.tables == 0) assert(tc.table_histogram.empty());
- if (tc.table_histogram.empty()) {
- assert (tc.tables == 0);
- PYP<Dish,Hash>::_dish_tables.erase(it->first);
- }
- }
- }
- }
- }
-*/
-
-// reset_deltas();
-}
-
-#endif
diff --git a/gi/pyp-topics/src/mpi-train-contexts.cc b/gi/pyp-topics/src/mpi-train-contexts.cc
deleted file mode 100644
index e05e0eac..00000000
--- a/gi/pyp-topics/src/mpi-train-contexts.cc
+++ /dev/null
@@ -1,201 +0,0 @@
-// STL
-#include <iostream>
-#include <fstream>
-#include <algorithm>
-#include <iterator>
-
-// Boost
-#include <boost/program_options/parsers.hpp>
-#include <boost/program_options/variables_map.hpp>
-#include <boost/scoped_ptr.hpp>
-#include <boost/mpi/environment.hpp>
-#include <boost/mpi/communicator.hpp>
-#include <boost/lexical_cast.hpp>
-
-// Local
-#include "mpi-pyp-topics.hh"
-#include "corpus.hh"
-#include "mpi-corpus.hh"
-#include "gzstream.hh"
-
-static const char *REVISION = "$Rev: 170 $";
-
-// Namespaces
-using namespace boost;
-using namespace boost::program_options;
-using namespace std;
-
-int main(int argc, char **argv)
-{
- mpi::environment env(argc, argv);
- mpi::communicator world;
- int rank = world.rank();
- bool am_root = (rank==0);
- if (am_root) cout << "Pitman Yor topic models: Copyright 2010 Phil Blunsom\n";
- if (am_root) std::cout << "I am process " << world.rank() << " of " << world.size() << "." << std::endl;
- if (am_root) cout << REVISION << '\n' <<endl;
-
- ////////////////////////////////////////////////////////////////////////////////////////////
- // Command line processing
- variables_map vm;
-
- // Command line processing
- {
- options_description cmdline_specific("Command line specific options");
- cmdline_specific.add_options()
- ("help,h", "print help message")
- ("config,c", value<string>(), "config file specifying additional command line options")
- ;
- options_description config_options("Allowed options");
- config_options.add_options()
- ("help,h", "print help message")
- ("data,d", value<string>(), "file containing the documents and context terms")
- ("topics,t", value<int>()->default_value(50), "number of topics")
- ("document-topics-out,o", value<string>(), "file to write the document topics to")
- ("default-topics-out", value<string>(), "file to write default term topic assignments.")
- ("topic-words-out,w", value<string>(), "file to write the topic word distribution to")
- ("samples,s", value<int>()->default_value(10), "number of sampling passes through the data")
- ("backoff-type", value<string>(), "backoff type: none|simple")
-// ("filter-singleton-contexts", "filter singleton contexts")
- ("hierarchical-topics", "Use a backoff hierarchical PYP as the P0 for the document topics distribution.")
- ("binary-counts,b", "Use binary rather than integer counts for contexts.")
- ("freq-cutoff-start", value<int>()->default_value(0), "initial frequency cutoff.")
- ("freq-cutoff-end", value<int>()->default_value(0), "final frequency cutoff.")
- ("freq-cutoff-interval", value<int>()->default_value(0), "number of iterations between frequency decrement.")
- ("max-contexts-per-document", value<int>()->default_value(0), "Only sample the n most frequent contexts for a document.")
- ;
-
- cmdline_specific.add(config_options);
-
- store(parse_command_line(argc, argv, cmdline_specific), vm);
- notify(vm);
-
- if (vm.count("config") > 0) {
- ifstream config(vm["config"].as<string>().c_str());
- store(parse_config_file(config, config_options), vm);
- }
-
- if (vm.count("help")) {
- cout << cmdline_specific << "\n";
- return 1;
- }
- }
- ////////////////////////////////////////////////////////////////////////////////////////////
-
- if (!vm.count("data")) {
- cerr << "Please specify a file containing the data." << endl;
- return 1;
- }
-
- // seed the random number generator: 0 = automatic, specify value otherwise
- unsigned long seed = 0;
- MPIPYPTopics model(vm["topics"].as<int>(), vm.count("hierarchical-topics"), seed);
-
- // read the data
- BackoffGenerator* backoff_gen=0;
- if (vm.count("backoff-type")) {
- if (vm["backoff-type"].as<std::string>() == "none") {
- backoff_gen = 0;
- }
- else if (vm["backoff-type"].as<std::string>() == "simple") {
- backoff_gen = new SimpleBackoffGenerator();
- }
- else {
- cerr << "Backoff type (--backoff-type) must be one of none|simple." <<endl;
- return(1);
- }
- }
-
- //ContextsCorpus contexts_corpus;
- MPICorpus contexts_corpus;
- contexts_corpus.read_contexts(vm["data"].as<string>(), backoff_gen, /*vm.count("filter-singleton-contexts")*/ false, vm.count("binary-counts"));
- int mpi_start = 0, mpi_end = 0;
- contexts_corpus.bounds(&mpi_start, &mpi_end);
- std::cerr << "\tProcess " << rank << " has documents " << mpi_start << " -> " << mpi_end << "." << std::endl;
-
- model.set_backoff(contexts_corpus.backoff_index());
-
- if (backoff_gen)
- delete backoff_gen;
-
- // train the sampler
- model.sample_corpus(contexts_corpus, vm["samples"].as<int>(),
- vm["freq-cutoff-start"].as<int>(),
- vm["freq-cutoff-end"].as<int>(),
- vm["freq-cutoff-interval"].as<int>(),
- vm["max-contexts-per-document"].as<int>());
-
- if (vm.count("document-topics-out")) {
- std::ofstream documents_out((vm["document-topics-out"].as<string>() + ".pyp-process-" + boost::lexical_cast<std::string>(rank)).c_str());
- //int documents = contexts_corpus.num_documents();
- /*
- int mpi_start = 0, mpi_end = documents;
- if (world.size() != 1) {
- mpi_start = (documents / world.size()) * rank;
- if (rank == world.size()-1) mpi_end = documents;
- else mpi_end = (documents / world.size())*(rank+1);
- }
- */
-
- map<int,int> all_terms;
- for (int document_id=mpi_start; document_id<mpi_end; ++document_id) {
- assert (document_id < contexts_corpus.num_documents());
- const Document& doc = contexts_corpus.at(document_id);
- vector<int> unique_terms;
- for (Document::const_iterator docIt=doc.begin(); docIt != doc.end(); ++docIt) {
- if (unique_terms.empty() || *docIt != unique_terms.back())
- unique_terms.push_back(*docIt);
- // increment this terms frequency
- pair<map<int,int>::iterator,bool> insert_result = all_terms.insert(make_pair(*docIt,1));
- if (!insert_result.second)
- all_terms[*docIt] = all_terms[*docIt] + 1;
- }
- documents_out << contexts_corpus.key(document_id) << '\t';
- documents_out << model.max(document_id).first << " " << doc.size() << " ||| ";
- for (std::vector<int>::const_iterator termIt=unique_terms.begin(); termIt != unique_terms.end(); ++termIt) {
- if (termIt != unique_terms.begin())
- documents_out << " ||| ";
- vector<std::string> strings = contexts_corpus.context2string(*termIt);
- copy(strings.begin(), strings.end(),ostream_iterator<std::string>(documents_out, " "));
- std::pair<int,MPIPYPTopics::F> maxinfo = model.max(document_id, *termIt);
- documents_out << "||| C=" << maxinfo.first << " P=" << maxinfo.second;
- }
- documents_out <<endl;
- }
- documents_out.close();
- world.barrier();
-
- if (am_root) {
- ogzstream root_documents_out(vm["document-topics-out"].as<string>().c_str());
- for (int p=0; p < world.size(); ++p) {
- std::string rank_p_prefix((vm["document-topics-out"].as<string>() + ".pyp-process-" + boost::lexical_cast<std::string>(p)).c_str());
- std::ifstream rank_p_trees_istream(rank_p_prefix.c_str(), std::ios_base::binary);
- root_documents_out << rank_p_trees_istream.rdbuf();
- rank_p_trees_istream.close();
- remove((rank_p_prefix).c_str());
- }
- root_documents_out.close();
- }
-
- if (am_root && vm.count("default-topics-out")) {
- ofstream default_topics(vm["default-topics-out"].as<string>().c_str());
- default_topics << model.max_topic() <<endl;
- for (std::map<int,int>::const_iterator termIt=all_terms.begin(); termIt != all_terms.end(); ++termIt) {
- vector<std::string> strings = contexts_corpus.context2string(termIt->first);
- default_topics << model.max(-1, termIt->first).first << " ||| " << termIt->second << " ||| ";
- copy(strings.begin(), strings.end(),ostream_iterator<std::string>(default_topics, " "));
- default_topics <<endl;
- }
- }
- }
-
- if (am_root && vm.count("topic-words-out")) {
- ogzstream topics_out(vm["topic-words-out"].as<string>().c_str());
- model.print_topic_terms(topics_out);
- topics_out.close();
- }
-
- cout <<endl;
-
- return 0;
-}
diff --git a/gi/pyp-topics/src/mt19937ar.c b/gi/pyp-topics/src/mt19937ar.c
deleted file mode 100644
index 6551ea39..00000000
--- a/gi/pyp-topics/src/mt19937ar.c
+++ /dev/null
@@ -1,194 +0,0 @@
-/*
- A C-program for MT19937, with initialization improved 2002/1/26.
- Coded by Takuji Nishimura and Makoto Matsumoto.
-
- Before using, initialize the state by using mt_init_genrand(seed)
- or mt_init_by_array(init_key, key_length).
-
- Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions
- are met:
-
- 1. Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
-
- 3. The names of its contributors may not be used to endorse or promote
- products derived from this software without specific prior written
- permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
- CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-
- Any feedback is very welcome.
- http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html
- email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space)
-*/
-
-#include "mt19937ar.h" /* XXX MJ 17th March 2006 */
-
-/* Period parameters */
-#define N 624
-#define M 397
-#define MATRIX_A 0x9908b0dfUL /* constant vector a */
-#define UPPER_MASK 0x80000000UL /* most significant w-r bits */
-#define LOWER_MASK 0x7fffffffUL /* least significant r bits */
-
-static unsigned long mt[N]; /* the array for the state vector */
-static int mti=N+1; /* mti==N+1 means mt[N] is not initialized */
-
-/* initializes mt[N] with a seed */
-void mt_init_genrand(unsigned long s)
-{
- mt[0]= s & 0xffffffffUL;
- for (mti=1; mti<N; mti++) {
- mt[mti] =
- (1812433253UL * (mt[mti-1] ^ (mt[mti-1] >> 30)) + mti);
- /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
- /* In the previous versions, MSBs of the seed affect */
- /* only MSBs of the array mt[]. */
- /* 2002/01/09 modified by Makoto Matsumoto */
- mt[mti] &= 0xffffffffUL;
- /* for >32 bit machines */
- }
-}
-
-/* initialize by an array with array-length */
-/* init_key is the array for initializing keys */
-/* key_length is its length */
-/* slight change for C++, 2004/2/26 */
-void mt_init_by_array(unsigned long init_key[], int key_length)
-{
- int i, j, k;
- mt_init_genrand(19650218UL);
- i=1; j=0;
- k = (N>key_length ? N : key_length);
- for (; k; k--) {
- mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 30)) * 1664525UL))
- + init_key[j] + j; /* non linear */
- mt[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */
- i++; j++;
- if (i>=N) { mt[0] = mt[N-1]; i=1; }
- if (j>=key_length) j=0;
- }
- for (k=N-1; k; k--) {
- mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 30)) * 1566083941UL))
- - i; /* non linear */
- mt[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */
- i++;
- if (i>=N) { mt[0] = mt[N-1]; i=1; }
- }
-
- mt[0] = 0x80000000UL; /* MSB is 1; assuring non-zero initial array */
-}
-
-/* generates a random number on [0,0xffffffff]-interval */
-unsigned long mt_genrand_int32(void)
-{
- unsigned long y;
- static unsigned long mag01[2]={0x0UL, MATRIX_A};
- /* mag01[x] = x * MATRIX_A for x=0,1 */
-
- if (mti >= N) { /* generate N words at one time */
- int kk;
-
- if (mti == N+1) /* if mt_init_genrand() has not been called, */
- mt_init_genrand(5489UL); /* a default initial seed is used */
-
- for (kk=0;kk<N-M;kk++) {
- y = (mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK);
- mt[kk] = mt[kk+M] ^ (y >> 1) ^ mag01[y & 0x1UL];
- }
- for (;kk<N-1;kk++) {
- y = (mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK);
- mt[kk] = mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & 0x1UL];
- }
- y = (mt[N-1]&UPPER_MASK)|(mt[0]&LOWER_MASK);
- mt[N-1] = mt[M-1] ^ (y >> 1) ^ mag01[y & 0x1UL];
-
- mti = 0;
- }
-
- y = mt[mti++];
-
- /* Tempering */
- y ^= (y >> 11);
- y ^= (y << 7) & 0x9d2c5680UL;
- y ^= (y << 15) & 0xefc60000UL;
- y ^= (y >> 18);
-
- return y;
-}
-
-/* generates a random number on [0,0x7fffffff]-interval */
-long mt_genrand_int31(void)
-{
- return (long)( mt_genrand_int32()>>1);
-}
-
-/* generates a random number on [0,1]-real-interval */
-double mt_genrand_real1(void)
-{
- return mt_genrand_int32()*(1.0/4294967295.0);
- /* divided by 2^32-1 */
-}
-
-/* generates a random number on [0,1)-real-interval */
-double mt_genrand_real2(void)
-{
- return mt_genrand_int32()*(1.0/4294967296.0);
- /* divided by 2^32 */
-}
-
-/* generates a random number on (0,1)-real-interval */
-double mt_genrand_real3(void)
-{
- return (((double) mt_genrand_int32()) + 0.5)*(1.0/4294967296.0);
- /* divided by 2^32 */
-}
-
-/* generates a random number on [0,1) with 53-bit resolution*/
-double mt_genrand_res53(void)
-{
- unsigned long a=mt_genrand_int32()>>5, b=mt_genrand_int32()>>6;
- return(a*67108864.0+b)*(1.0/9007199254740992.0);
-}
-/* These real versions are due to Isaku Wada, 2002/01/09 added */
-
-/*
-#include <stdio.h>
-
-int main(void)
-{
- int i;
- unsigned long init[4]={0x123, 0x234, 0x345, 0x456}, length=4;
- mt_init_by_array(init, length);
- printf("1000 outputs of genrand_int32()\n");
- for (i=0; i<1000; i++) {
- printf("%10lu ", mt_genrand_int32());
- if (i%5==4) printf("\n");
- }
- printf("\n1000 outputs of genrand_real2()\n");
- for (i=0; i<1000; i++) {
- printf("%10.8f ", mt_genrand_real2());
- if (i%5==4) printf("\n");
- }
- return 0;
-}
-*/
diff --git a/gi/pyp-topics/src/mt19937ar.h b/gi/pyp-topics/src/mt19937ar.h
deleted file mode 100644
index caab4045..00000000
--- a/gi/pyp-topics/src/mt19937ar.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/* mt19937ar.h
- *
- * Mark Johnson, 17th March 2006
- */
-
-#ifndef MT19937AR_H
-#define MT19937AR_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
- /* initializes mt[N] with a seed */
- void mt_init_genrand(unsigned long s);
-
- /* initialize by an array with array-length */
- /* init_key is the array for initializing keys */
- /* key_length is its length */
- /* slight change for C++, 2004/2/26 */
- void mt_init_by_array(unsigned long init_key[], int key_length);
-
- /* generates a random number on [0,0xffffffff]-interval */
- unsigned long mt_genrand_int32(void);
-
- /* generates a random number on [0,0x7fffffff]-interval */
- long mt_genrand_int31(void);
-
- /* generates a random number on [0,1]-real-interval */
- double mt_genrand_real1(void);
-
- /* generates a random number on [0,1)-real-interval */
- double mt_genrand_real2(void);
-
- /* generates a random number on (0,1)-real-interval */
- double mt_genrand_real3(void);
-
- /* generates a random number on [0,1) with 53-bit resolution*/
- double mt_genrand_res53(void);
-
-#ifdef __cplusplus
-};
-#endif
-
-#endif /* MT19937AR_H */
diff --git a/gi/pyp-topics/src/pyp-topics.cc b/gi/pyp-topics/src/pyp-topics.cc
deleted file mode 100644
index 4de52fd7..00000000
--- a/gi/pyp-topics/src/pyp-topics.cc
+++ /dev/null
@@ -1,499 +0,0 @@
-#include "timing.h"
-#include "pyp-topics.hh"
-#include "contexts_corpus.hh"
-
-//Dict const *dict;
-
-//#include <boost/date_time/posix_time/posix_time_types.hpp>
-void PYPTopics::sample_corpus(const Corpus& corpus, int samples,
- int freq_cutoff_start, int freq_cutoff_end,
- int freq_cutoff_interval,
- int max_contexts_per_document,
- F temp_start, F temp_end) {
- Timer timer;
- //dict = &((ContextsCorpus*) &corpus)->dict();
-
- if (!m_backoff.get()) {
- m_word_pyps.clear();
- m_word_pyps.push_back(PYPs());
- }
-
- std::cerr << "\n Training with " << m_word_pyps.size()-1 << " backoff level"
- << (m_word_pyps.size()==2 ? ":" : "s:") << std::endl;
-
-
- for (int i=0; i<(int)m_word_pyps.size(); ++i)
- {
- m_word_pyps.at(i).reserve(m_num_topics);
- for (int j=0; j<m_num_topics; ++j)
- m_word_pyps.at(i).push_back(new PYP<int>(0.01, 1.0, m_seed));
- }
- std::cerr << std::endl;
-
- m_document_pyps.reserve(corpus.num_documents());
- for (int j=0; j<corpus.num_documents(); ++j)
- m_document_pyps.push_back(new PYP<int>(0.01, 1.0, m_seed));
-
- m_topic_p0 = 1.0/m_num_topics;
- m_term_p0 = 1.0/(F)m_backoff->terms_at_level(m_word_pyps.size()-1);
- //m_term_p0 = 1.0/corpus.num_types();
- m_backoff_p0 = 1.0/corpus.num_documents();
-
- std::cerr << " Documents: " << corpus.num_documents() << " Terms: "
- << corpus.num_types() << std::endl;
-
- int frequency_cutoff = freq_cutoff_start;
- std::cerr << " Context frequency cutoff set to " << frequency_cutoff << std::endl;
-
- timer.Reset();
- // Initialisation pass
- int document_id=0, topic_counter=0;
- for (Corpus::const_iterator corpusIt=corpus.begin();
- corpusIt != corpus.end(); ++corpusIt, ++document_id) {
- m_corpus_topics.push_back(DocumentTopics(corpusIt->size(), 0));
-
- int term_index=0;
- for (Document::const_iterator docIt=corpusIt->begin();
- docIt != corpusIt->end(); ++docIt, ++term_index) {
- topic_counter++;
- Term term = *docIt;
-
- // sample a new_topic
- //int new_topic = (topic_counter % m_num_topics);
- int freq = corpus.context_count(term);
- int new_topic = -1;
- if (freq > frequency_cutoff
- && (!max_contexts_per_document || term_index < max_contexts_per_document)) {
- //new_topic = sample(document_id, term);
- //new_topic = document_id % m_num_topics;
- new_topic = (int) (rnd() * m_num_topics);
-
- // add the new topic to the PYPs
- increment(term, new_topic);
-
- if (m_use_topic_pyp) {
- F p0 = m_topic_pyp.prob(new_topic, m_topic_p0);
- int table_delta = m_document_pyps[document_id].increment(new_topic, p0);
- if (table_delta)
- m_topic_pyp.increment(new_topic, m_topic_p0);
- }
- else m_document_pyps[document_id].increment(new_topic, m_topic_p0);
- }
-
- m_corpus_topics[document_id][term_index] = new_topic;
- }
- }
- std::cerr << " Initialized in " << timer.Elapsed() << " seconds\n";
-
- int* randomDocIndices = new int[corpus.num_documents()];
- for (int i = 0; i < corpus.num_documents(); ++i)
- randomDocIndices[i] = i;
-
- if (num_jobs < max_threads)
- num_jobs = max_threads;
- int job_incr = (int) ( (float)m_document_pyps.size() / float(num_jobs) );
-
- // Sampling phase
- for (int curr_sample=0; curr_sample < samples; ++curr_sample) {
- if (freq_cutoff_interval > 0 && curr_sample != 1
- && curr_sample % freq_cutoff_interval == 1
- && frequency_cutoff > freq_cutoff_end) {
- frequency_cutoff--;
- std::cerr << "\n Context frequency cutoff set to " << frequency_cutoff << std::endl;
- }
-
- F temp = 1.0 / (temp_start - curr_sample*(temp_start-temp_end)/samples);
- std::cerr << "\n -- Sample " << curr_sample << " (T=" << temp << ") "; std::cerr.flush();
-
- // Randomize the corpus indexing array
- int tmp;
- int processed_terms=0;
- /*
- for (int i = corpus.num_documents()-1; i > 0; --i)
- {
- //i+1 since j \in [0,i] but rnd() \in [0,1)
- int j = (int)(rnd() * (i+1));
- assert(j >= 0 && j <= i);
- tmp = randomDocIndices[i];
- randomDocIndices[i] = randomDocIndices[j];
- randomDocIndices[j] = tmp;
- }
- */
-
- // for each document in the corpus
- int document_id;
- for (int i=0; i<corpus.num_documents(); ++i) {
- document_id = randomDocIndices[i];
-
- // for each term in the document
- int term_index=0;
- Document::const_iterator docEnd = corpus.at(document_id).end();
- for (Document::const_iterator docIt=corpus.at(document_id).begin();
- docIt != docEnd; ++docIt, ++term_index) {
- if (max_contexts_per_document && term_index > max_contexts_per_document)
- break;
-
- Term term = *docIt;
-
- int freq = corpus.context_count(term);
- if (freq < frequency_cutoff)
- continue;
-
- processed_terms++;
-
- // remove the prevous topic from the PYPs
- int current_topic = m_corpus_topics[document_id][term_index];
- // a negative label mean that term hasn't been sampled yet
- if (current_topic >= 0) {
- decrement(term, current_topic);
-
- int table_delta = m_document_pyps[document_id].decrement(current_topic);
- if (m_use_topic_pyp && table_delta < 0)
- m_topic_pyp.decrement(current_topic);
- }
-
- // sample a new_topic
- int new_topic = sample(document_id, term, temp);
- //std::cerr << "TERM: " << dict->Convert(term) << " (" << term << ") " << " Old Topic: "
- // << current_topic << " New Topic: " << new_topic << "\n" << std::endl;
-
- // add the new topic to the PYPs
- m_corpus_topics[document_id][term_index] = new_topic;
- increment(term, new_topic);
-
- if (m_use_topic_pyp) {
- F p0 = m_topic_pyp.prob(new_topic, m_topic_p0);
- int table_delta = m_document_pyps[document_id].increment(new_topic, p0);
- if (table_delta)
- m_topic_pyp.increment(new_topic, m_topic_p0);
- }
- else m_document_pyps[document_id].increment(new_topic, m_topic_p0);
- }
- if (document_id && document_id % 10000 == 0) {
- std::cerr << "."; std::cerr.flush();
- }
- }
- std::cerr << " ||| LLH= " << log_likelihood();
-
- if (curr_sample != 0 && curr_sample % 10 == 0) {
- //if (true) {
- std::cerr << " ||| time=" << (timer.Elapsed() / 10.0) << " sec/sample" << std::endl;
- timer.Reset();
- std::cerr << " ... Resampling hyperparameters (";
-
- // resample the hyperparamters
- F log_p=0.0;
- if (max_threads == 1)
- {
- std::cerr << "1 thread)" << std::endl; std::cerr.flush();
- log_p += hresample_topics();
- log_p += hresample_docs(0, m_document_pyps.size());
- }
- else
- { //parallelize
- std::cerr << max_threads << " threads, " << num_jobs << " jobs)" << std::endl; std::cerr.flush();
-
- WorkerPool<JobReturnsF, F> pool(max_threads);
- int i=0, sz = m_document_pyps.size();
- //documents...
- while (i <= sz - 2*job_incr)
- {
- JobReturnsF job = boost::bind(&PYPTopics::hresample_docs, this, i, i+job_incr);
- pool.addJob(job);
- i += job_incr;
- }
- // do all remaining documents
- JobReturnsF job = boost::bind(&PYPTopics::hresample_docs, this, i,sz);
- pool.addJob(job);
-
- //topics...
- JobReturnsF topics_job = boost::bind(&PYPTopics::hresample_topics, this);
- pool.addJob(topics_job);
-
- log_p += pool.get_result(); //blocks
-
- }
-
- if (m_use_topic_pyp) {
- m_topic_pyp.resample_prior(rnd);
- log_p += m_topic_pyp.log_restaurant_prob();
- }
-
- std::cerr.precision(10);
- std::cerr << " ||| LLH=" << log_likelihood() << " ||| resampling time=" << timer.Elapsed() << " sec" << std::endl;
- timer.Reset();
-
- int k=0;
- std::cerr << "Topics distribution: ";
- std::cerr.precision(2);
- for (PYPs::iterator pypIt=m_word_pyps.front().begin();
- pypIt != m_word_pyps.front().end(); ++pypIt, ++k) {
- if (k % 5 == 0) std::cerr << std::endl << '\t';
- std::cerr << "<" << k << ":" << pypIt->num_customers() << ","
- << pypIt->num_types() << "," << m_topic_pyp.prob(k, m_topic_p0) << "> ";
- }
- std::cerr.precision(10);
- std::cerr << std::endl;
- }
- }
- delete [] randomDocIndices;
-}
-
-PYPTopics::F PYPTopics::hresample_docs(int start, int end)
-{
- int resample_counter=0;
- F log_p = 0.0;
- assert(start >= 0);
- assert(end >= 0);
- assert(start <= end);
- for (int i=start; i < end; ++i)
- {
- m_document_pyps[i].resample_prior(rnd);
- log_p += m_document_pyps[i].log_restaurant_prob();
- if (resample_counter++ % 5000 == 0) {
- std::cerr << "."; std::cerr.flush();
- }
- }
- return log_p;
-}
-
-PYPTopics::F PYPTopics::hresample_topics()
-{
- F log_p = 0.0;
- for (std::vector<PYPs>::iterator levelIt=m_word_pyps.begin();
- levelIt != m_word_pyps.end(); ++levelIt) {
- for (PYPs::iterator pypIt=levelIt->begin();
- pypIt != levelIt->end(); ++pypIt) {
-
- pypIt->resample_prior(rnd);
- log_p += pypIt->log_restaurant_prob();
- }
- std::cerr << log_p << std::endl;
- }
- return log_p;
-}
-
-PYPTopics::F PYPTopics::log_likelihood() const
-{
- F log_p = 0.0;
-
- // LLH of topic term distribution
- size_t i=0;
- for (std::vector<PYPs>::const_iterator levelIt=m_word_pyps.begin();
- levelIt != m_word_pyps.end(); ++levelIt, ++i) {
- for (PYPs::const_iterator pypIt=levelIt->begin();
- pypIt != levelIt->end(); ++pypIt, ++i) {
- log_p += pypIt->log_restaurant_prob();
-
- if (i == m_word_pyps.size()-1)
- log_p += (pypIt->num_tables() * -log(m_backoff->terms_at_level(i)));
- else
- log_p += (pypIt->num_tables() * log(m_term_p0));
- }
- }
- std::cerr << " TERM LLH: " << log_p << " "; //std::endl;
-
- // LLH of document topic distribution
- for (size_t i=0; i < m_document_pyps.size(); ++i) {
- log_p += m_document_pyps[i].log_restaurant_prob();
- if (!m_use_topic_pyp) log_p += (m_document_pyps[i].num_tables() * m_topic_p0);
- }
- if (m_use_topic_pyp) {
- log_p += m_topic_pyp.log_restaurant_prob();
- log_p += (m_topic_pyp.num_tables() * log(m_topic_p0));
- }
-
- return log_p;
-}
-
-void PYPTopics::decrement(const Term& term, int topic, int level) {
- //std::cerr << "PYPTopics::decrement(" << term << "," << topic << "," << level << ")" << std::endl;
- int table_delta = m_word_pyps.at(level).at(topic).decrement(term);
- if (table_delta && m_backoff.get()) {
- Term backoff_term = (*m_backoff)[term];
- if (!m_backoff->is_null(backoff_term))
- decrement(backoff_term, topic, level+1);
- }
-}
-
-void PYPTopics::increment(const Term& term, int topic, int level) {
- //std::cerr << "PYPTopics::increment(" << term << "," << topic << "," << level << ")" << std::endl;
- int table_delta = m_word_pyps.at(level).at(topic).increment(term, word_pyps_p0(term, topic, level));
-
- if (table_delta && m_backoff.get()) {
- Term backoff_term = (*m_backoff)[term];
- if (!m_backoff->is_null(backoff_term))
- increment(backoff_term, topic, level+1);
- }
-}
-
-int PYPTopics::sample(const DocumentId& doc, const Term& term, F inv_temp) {
- // First pass: collect probs
- F sum=0.0;
- std::vector<F> sums;
- for (int k=0; k<m_num_topics; ++k) {
- F p_w_k = prob(term, k);
-
- F topic_prob = m_topic_p0;
- if (m_use_topic_pyp) topic_prob = m_topic_pyp.prob(k, m_topic_p0);
-
- //F p_k_d = m_document_pyps[doc].prob(k, topic_prob);
- F p_k_d = m_document_pyps[doc].unnormalised_prob(k, topic_prob);
-
- F prob = p_w_k*p_k_d;
- /*
- if (prob < 0.0) { std::cerr << "\n\n" << prob << " " << p_w_k << " " << p_k_d << std::endl; assert(false); }
- if (prob > 1.0) { std::cerr << "\n\n" << prob << " " << p_w_k << " " << p_k_d << std::endl; assert(false); }
- assert (pow(prob, inv_temp) >= 0.0);
- assert (pow(prob, inv_temp) <= 1.0);
- */
- sum += pow(prob, inv_temp);
- sums.push_back(sum);
- }
- // Second pass: sample a topic
- F cutoff = rnd() * sum;
- for (int k=0; k<m_num_topics; ++k) {
- if (cutoff <= sums[k])
- return k;
- }
- assert(false);
-}
-
-PYPTopics::F PYPTopics::word_pyps_p0(const Term& term, int topic, int level) const {
- //for (int i=0; i<level+1; ++i) std::cerr << " ";
- //std::cerr << "PYPTopics::word_pyps_p0(" << term << "," << topic << "," << level << ")" << std::endl;
-
- F p0 = m_term_p0;
- if (m_backoff.get()) {
- //static F fudge=m_backoff_p0; // TODO
-
- Term backoff_term = (*m_backoff)[term];
- //std::cerr << "T: " << term << " BO: " << backoff_term << std::endl;
- if (!m_backoff->is_null(backoff_term)) {
- assert (level < m_backoff->order());
- //p0 = (1.0/(F)m_backoff->terms_at_level(level))*prob(backoff_term, topic, level+1);
- p0 = m_term_p0*prob(backoff_term, topic, level+1);
- p0 = prob(backoff_term, topic, level+1);
- }
- else
- p0 = (1.0/(F) m_backoff->terms_at_level(level));
- //p0 = m_term_p0;
- }
- //for (int i=0; i<level+1; ++i) std::cerr << " ";
- //std::cerr << "PYPTopics::word_pyps_p0(" << term << "," << topic << "," << level << ") = " << p0 << std::endl;
- return p0;
-}
-
-PYPTopics::F PYPTopics::prob(const Term& term, int topic, int level) const {
- //for (int i=0; i<level+1; ++i) std::cerr << " ";
- //std::cerr << "PYPTopics::prob(" << dict->Convert(term) << "," << topic << "," << level << ")" << std::endl;
-
- F p0 = word_pyps_p0(term, topic, level);
- F p_w_k = m_word_pyps.at(level).at(topic).prob(term, p0);
-
- /*
- for (int i=0; i<level+1; ++i) std::cerr << " ";
- std::cerr << "PYPTopics::prob(" << dict->Convert(term) << "," << topic << "," << level << ") = " << p_w_k << std::endl;
- for (int i=0; i<level+1; ++i) std::cerr << " ";
- m_word_pyps.at(level).at(topic).debug_info(std::cerr);
- */
- return p_w_k;
-}
-
-int PYPTopics::max_topic() const {
- if (!m_use_topic_pyp)
- return -1;
-
- F current_max=0.0;
- int current_topic=-1;
- for (int k=0; k<m_num_topics; ++k) {
- F prob = m_topic_pyp.prob(k, m_topic_p0);
- if (prob > current_max) {
- current_max = prob;
- current_topic = k;
- }
- }
- assert(current_topic >= 0);
- return current_topic;
-}
-
-std::pair<int,PYPTopics::F> PYPTopics::max(const DocumentId& doc) const {
- //std::cerr << "PYPTopics::max(" << doc << "," << term << ")" << std::endl;
- // collect probs
- F current_max=0.0;
- int current_topic=-1;
- for (int k=0; k<m_num_topics; ++k) {
- //F p_w_k = prob(term, k);
-
- F topic_prob = m_topic_p0;
- if (m_use_topic_pyp)
- topic_prob = m_topic_pyp.prob(k, m_topic_p0);
-
- F prob = 0;
- if (doc < 0) prob = topic_prob;
- else prob = m_document_pyps[doc].prob(k, topic_prob);
-
- if (prob > current_max) {
- current_max = prob;
- current_topic = k;
- }
- }
- assert(current_topic >= 0);
- assert(current_max >= 0);
- return std::make_pair(current_topic, current_max);
-}
-
-std::pair<int,PYPTopics::F> PYPTopics::max(const DocumentId& doc, const Term& term) const {
- //std::cerr << "PYPTopics::max(" << doc << "," << term << ")" << std::endl;
- // collect probs
- F current_max=0.0;
- int current_topic=-1;
- for (int k=0; k<m_num_topics; ++k) {
- F p_w_k = prob(term, k);
-
- F topic_prob = m_topic_p0;
- if (m_use_topic_pyp)
- topic_prob = m_topic_pyp.prob(k, m_topic_p0);
-
- F p_k_d = 0;
- if (doc < 0) p_k_d = topic_prob;
- else p_k_d = m_document_pyps[doc].prob(k, topic_prob);
-
- F prob = (p_w_k*p_k_d);
- if (prob > current_max) {
- current_max = prob;
- current_topic = k;
- }
- }
- assert(current_topic >= 0);
- assert(current_max >= 0);
- return std::make_pair(current_topic,current_max);
-}
-
-std::ostream& PYPTopics::print_document_topics(std::ostream& out) const {
- for (CorpusTopics::const_iterator corpusIt=m_corpus_topics.begin();
- corpusIt != m_corpus_topics.end(); ++corpusIt) {
- int term_index=0;
- for (DocumentTopics::const_iterator docIt=corpusIt->begin();
- docIt != corpusIt->end(); ++docIt, ++term_index) {
- if (term_index) out << " ";
- out << *docIt;
- }
- out << std::endl;
- }
- return out;
-}
-
-std::ostream& PYPTopics::print_topic_terms(std::ostream& out) const {
- for (PYPs::const_iterator pypsIt=m_word_pyps.front().begin();
- pypsIt != m_word_pyps.front().end(); ++pypsIt) {
- int term_index=0;
- for (PYP<int>::const_iterator termIt=pypsIt->begin();
- termIt != pypsIt->end(); ++termIt, ++term_index) {
- if (term_index) out << " ";
- out << termIt->first << ":" << termIt->second;
- }
- out << std::endl;
- }
- return out;
-}
diff --git a/gi/pyp-topics/src/pyp-topics.hh b/gi/pyp-topics/src/pyp-topics.hh
deleted file mode 100644
index 3a910540..00000000
--- a/gi/pyp-topics/src/pyp-topics.hh
+++ /dev/null
@@ -1,98 +0,0 @@
-#ifndef PYP_TOPICS_HH
-#define PYP_TOPICS_HH
-
-#include <vector>
-#include <iostream>
-#include <boost/ptr_container/ptr_vector.hpp>
-
-#include <boost/random/uniform_real.hpp>
-#include <boost/random/variate_generator.hpp>
-#include <boost/random/mersenne_twister.hpp>
-
-#include "pyp.hh"
-#include "corpus.hh"
-#include "workers.hh"
-
-class PYPTopics {
-public:
- typedef std::vector<int> DocumentTopics;
- typedef std::vector<DocumentTopics> CorpusTopics;
- typedef long double F;
-
-public:
- PYPTopics(int num_topics, bool use_topic_pyp=false, unsigned long seed = 0,
- int max_threads = 1, int num_jobs = 1)
- : m_num_topics(num_topics), m_word_pyps(1),
- m_topic_pyp(0.5,1.0,seed), m_use_topic_pyp(use_topic_pyp),
- m_seed(seed),
- uni_dist(0,1), rng(seed == 0 ? (unsigned long)this : seed),
- rnd(rng, uni_dist), max_threads(max_threads), num_jobs(num_jobs) {}
-
- void sample_corpus(const Corpus& corpus, int samples,
- int freq_cutoff_start=0, int freq_cutoff_end=0,
- int freq_cutoff_interval=0,
- int max_contexts_per_document=0,
- F temp_start=1.0, F temp_end=1.0);
-
- int sample(const DocumentId& doc, const Term& term, F inv_temp=1.0);
- std::pair<int,F> max(const DocumentId& doc, const Term& term) const;
- std::pair<int,F> max(const DocumentId& doc) const;
- int max_topic() const;
-
- void set_backoff(const std::string& filename) {
- m_backoff.reset(new TermBackoff);
- m_backoff->read(filename);
- m_word_pyps.clear();
- m_word_pyps.resize(m_backoff->order(), PYPs());
- }
- void set_backoff(TermBackoffPtr backoff) {
- m_backoff = backoff;
- m_word_pyps.clear();
- m_word_pyps.resize(m_backoff->order(), PYPs());
- }
-
- F prob(const Term& term, int topic, int level=0) const;
- void decrement(const Term& term, int topic, int level=0);
- void increment(const Term& term, int topic, int level=0);
-
- F log_likelihood() const;
-
- std::ostream& print_document_topics(std::ostream& out) const;
- std::ostream& print_topic_terms(std::ostream& out) const;
-
-private:
- F word_pyps_p0(const Term& term, int topic, int level) const;
-
- int m_num_topics;
- F m_term_p0, m_topic_p0, m_backoff_p0;
-
- CorpusTopics m_corpus_topics;
- typedef boost::ptr_vector< PYP<int> > PYPs;
- PYPs m_document_pyps;
- std::vector<PYPs> m_word_pyps;
- PYP<int> m_topic_pyp;
- bool m_use_topic_pyp;
-
- unsigned long m_seed;
-
- typedef boost::mt19937 base_generator_type;
- typedef boost::uniform_real<> uni_dist_type;
- typedef boost::variate_generator<base_generator_type&, uni_dist_type> gen_type;
-
- uni_dist_type uni_dist;
- base_generator_type rng; //this gets the seed
- gen_type rnd; //instantiate: rnd(rng, uni_dist)
- //call: rnd() generates uniform on [0,1)
-
- typedef boost::function<F()> JobReturnsF;
-
- F hresample_docs(int start, int end); //does i in [start, end)
-
- F hresample_topics();
-
- int max_threads;
- int num_jobs;
- TermBackoffPtr m_backoff;
-};
-
-#endif // PYP_TOPICS_HH
diff --git a/gi/pyp-topics/src/pyp.hh b/gi/pyp-topics/src/pyp.hh
deleted file mode 100644
index b1cb62be..00000000
--- a/gi/pyp-topics/src/pyp.hh
+++ /dev/null
@@ -1,566 +0,0 @@
-#ifndef _pyp_hh
-#define _pyp_hh
-
-#include "slice-sampler.h"
-#include <math.h>
-#include <map>
-#include <tr1/unordered_map>
-//#include <google/sparse_hash_map>
-
-#include <boost/random/uniform_real.hpp>
-#include <boost/random/variate_generator.hpp>
-#include <boost/random/mersenne_twister.hpp>
-
-#include "log_add.h"
-#include "mt19937ar.h"
-
-//
-// Pitman-Yor process with customer and table tracking
-//
-
-template <typename Dish, typename Hash=std::tr1::hash<Dish> >
-class PYP : protected std::tr1::unordered_map<Dish, int, Hash>
-//class PYP : protected google::sparse_hash_map<Dish, int, Hash>
-{
-public:
- using std::tr1::unordered_map<Dish,int>::const_iterator;
- using std::tr1::unordered_map<Dish,int>::iterator;
- using std::tr1::unordered_map<Dish,int>::begin;
- using std::tr1::unordered_map<Dish,int>::end;
-// using google::sparse_hash_map<Dish,int>::const_iterator;
-// using google::sparse_hash_map<Dish,int>::iterator;
-// using google::sparse_hash_map<Dish,int>::begin;
-// using google::sparse_hash_map<Dish,int>::end;
-
- PYP(double a, double b, unsigned long seed = 0, Hash hash=Hash());
-
- virtual int increment(Dish d, double p0);
- virtual int decrement(Dish d);
-
- // lookup functions
- int count(Dish d) const;
- double prob(Dish dish, double p0) const;
- double prob(Dish dish, double dcd, double dca,
- double dtd, double dta, double p0) const;
- double unnormalised_prob(Dish dish, double p0) const;
-
- int num_customers() const { return _total_customers; }
- int num_types() const { return std::tr1::unordered_map<Dish,int>::size(); }
- //int num_types() const { return google::sparse_hash_map<Dish,int>::size(); }
- bool empty() const { return _total_customers == 0; }
-
- double log_prob(Dish dish, double log_p0) const;
- // nb. d* are NOT logs
- double log_prob(Dish dish, double dcd, double dca,
- double dtd, double dta, double log_p0) const;
-
- int num_tables(Dish dish) const;
- int num_tables() const;
-
- double a() const { return _a; }
- void set_a(double a) { _a = a; }
-
- double b() const { return _b; }
- void set_b(double b) { _b = b; }
-
- virtual void clear();
- std::ostream& debug_info(std::ostream& os) const;
-
- double log_restaurant_prob() const;
- double log_prior() const;
- static double log_prior_a(double a, double beta_a, double beta_b);
- static double log_prior_b(double b, double gamma_c, double gamma_s);
-
- template <typename Uniform01>
- void resample_prior(Uniform01& rnd);
- template <typename Uniform01>
- void resample_prior_a(Uniform01& rnd);
- template <typename Uniform01>
- void resample_prior_b(Uniform01& rnd);
-
-protected:
- double _a, _b; // parameters of the Pitman-Yor distribution
- double _a_beta_a, _a_beta_b; // parameters of Beta prior on a
- double _b_gamma_s, _b_gamma_c; // parameters of Gamma prior on b
-
- struct TableCounter {
- TableCounter() : tables(0) {};
- int tables;
- std::map<int, int> table_histogram; // num customers at table -> number tables
- };
- typedef std::tr1::unordered_map<Dish, TableCounter, Hash> DishTableType;
- //typedef google::sparse_hash_map<Dish, TableCounter, Hash> DishTableType;
- DishTableType _dish_tables;
- int _total_customers, _total_tables;
-
- typedef boost::mt19937 base_generator_type;
- typedef boost::uniform_real<> uni_dist_type;
- typedef boost::variate_generator<base_generator_type&, uni_dist_type> gen_type;
-
-// uni_dist_type uni_dist;
-// base_generator_type rng; //this gets the seed
-// gen_type rnd; //instantiate: rnd(rng, uni_dist)
- //call: rnd() generates uniform on [0,1)
-
- // Function objects for calculating the parts of the log_prob for
- // the parameters a and b
- struct resample_a_type {
- int n, m; double b, a_beta_a, a_beta_b;
- const DishTableType& dish_tables;
- resample_a_type(int n, int m, double b, double a_beta_a,
- double a_beta_b, const DishTableType& dish_tables)
- : n(n), m(m), b(b), a_beta_a(a_beta_a), a_beta_b(a_beta_b), dish_tables(dish_tables) {}
-
- double operator() (double proposed_a) const {
- double log_prior = log_prior_a(proposed_a, a_beta_a, a_beta_b);
- double log_prob = 0.0;
- double lgamma1a = lgamma(1.0 - proposed_a);
- for (typename DishTableType::const_iterator dish_it=dish_tables.begin(); dish_it != dish_tables.end(); ++dish_it)
- for (std::map<int, int>::const_iterator table_it=dish_it->second.table_histogram.begin();
- table_it !=dish_it->second.table_histogram.end(); ++table_it)
- log_prob += (table_it->second * (lgamma(table_it->first - proposed_a) - lgamma1a));
-
- log_prob += (proposed_a == 0.0 ? (m-1.0)*log(b)
- : ((m-1.0)*log(proposed_a) + lgamma((m-1.0) + b/proposed_a) - lgamma(b/proposed_a)));
- assert(std::isfinite(log_prob));
- return log_prob + log_prior;
- }
- };
-
- struct resample_b_type {
- int n, m; double a, b_gamma_c, b_gamma_s;
- resample_b_type(int n, int m, double a, double b_gamma_c, double b_gamma_s)
- : n(n), m(m), a(a), b_gamma_c(b_gamma_c), b_gamma_s(b_gamma_s) {}
-
- double operator() (double proposed_b) const {
- double log_prior = log_prior_b(proposed_b, b_gamma_c, b_gamma_s);
- double log_prob = 0.0;
- log_prob += (a == 0.0 ? (m-1.0)*log(proposed_b)
- : ((m-1.0)*log(a) + lgamma((m-1.0) + proposed_b/a) - lgamma(proposed_b/a)));
- log_prob += (lgamma(1.0+proposed_b) - lgamma(n+proposed_b));
- return log_prob + log_prior;
- }
- };
-
- /* lbetadist() returns the log probability density of x under a Beta(alpha,beta)
- * distribution. - copied from Mark Johnson's gammadist.c
- */
- static long double lbetadist(long double x, long double alpha, long double beta);
-
- /* lgammadist() returns the log probability density of x under a Gamma(alpha,beta)
- * distribution - copied from Mark Johnson's gammadist.c
- */
- static long double lgammadist(long double x, long double alpha, long double beta);
-
-};
-
-template <typename Dish, typename Hash>
-PYP<Dish,Hash>::PYP(double a, double b, unsigned long seed, Hash)
-: std::tr1::unordered_map<Dish, int, Hash>(10), _a(a), _b(b),
-//: google::sparse_hash_map<Dish, int, Hash>(10), _a(a), _b(b),
- _a_beta_a(1), _a_beta_b(1), _b_gamma_s(1), _b_gamma_c(1),
- //_a_beta_a(1), _a_beta_b(1), _b_gamma_s(10), _b_gamma_c(0.1),
- _total_customers(0), _total_tables(0)//,
- //uni_dist(0,1), rng(seed == 0 ? (unsigned long)this : seed), rnd(rng, uni_dist)
-{
-// std::cerr << "\t##PYP<Dish,Hash>::PYP(a=" << _a << ",b=" << _b << ")" << std::endl;
- //set_deleted_key(-std::numeric_limits<Dish>::max());
-}
-
-template <typename Dish, typename Hash>
-double
-PYP<Dish,Hash>::prob(Dish dish, double p0) const
-{
- int c = count(dish), t = num_tables(dish);
- double r = num_tables() * _a + _b;
- //std::cerr << "\t\t\t\tPYP<Dish,Hash>::prob(" << dish << "," << p0 << ") c=" << c << " r=" << r << std::endl;
- if (c > 0)
- return (c - _a * t + r * p0) / (num_customers() + _b);
- else
- return r * p0 / (num_customers() + _b);
-}
-
-template <typename Dish, typename Hash>
-double
-PYP<Dish,Hash>::unnormalised_prob(Dish dish, double p0) const
-{
- int c = count(dish), t = num_tables(dish);
- double r = num_tables() * _a + _b;
- if (c > 0) return (c - _a * t + r * p0);
- else return r * p0;
-}
-
-template <typename Dish, typename Hash>
-double
-PYP<Dish,Hash>::prob(Dish dish, double dcd, double dca,
- double dtd, double dta, double p0)
-const
-{
- int c = count(dish) + dcd, t = num_tables(dish) + dtd;
- double r = (num_tables() + dta) * _a + _b;
- if (c > 0)
- return (c - _a * t + r * p0) / (num_customers() + dca + _b);
- else
- return r * p0 / (num_customers() + dca + _b);
-}
-
-template <typename Dish, typename Hash>
-double
-PYP<Dish,Hash>::log_prob(Dish dish, double log_p0) const
-{
- using std::log;
- int c = count(dish), t = num_tables(dish);
- double r = log(num_tables() * _a + b);
- if (c > 0)
- return Log<double>::add(log(c - _a * t), r + log_p0)
- - log(num_customers() + _b);
- else
- return r + log_p0 - log(num_customers() + b);
-}
-
-template <typename Dish, typename Hash>
-double
-PYP<Dish,Hash>::log_prob(Dish dish, double dcd, double dca,
- double dtd, double dta, double log_p0)
-const
-{
- using std::log;
- int c = count(dish) + dcd, t = num_tables(dish) + dtd;
- double r = log((num_tables() + dta) * _a + b);
- if (c > 0)
- return Log<double>::add(log(c - _a * t), r + log_p0)
- - log(num_customers() + dca + _b);
- else
- return r + log_p0 - log(num_customers() + dca + b);
-}
-
-template <typename Dish, typename Hash>
-int
-PYP<Dish,Hash>::increment(Dish dish, double p0) {
- int delta = 0;
- TableCounter &tc = _dish_tables[dish];
-
- // seated on a new or existing table?
- int c = count(dish), t = num_tables(dish), T = num_tables();
- double pshare = (c > 0) ? (c - _a*t) : 0.0;
- double pnew = (_b + _a*T) * p0;
- assert (pshare >= 0.0);
- //assert (pnew > 0.0);
-
- //if (rnd() < pnew / (pshare + pnew)) {
- if (mt_genrand_res53() < pnew / (pshare + pnew)) {
- // assign to a new table
- tc.tables += 1;
- tc.table_histogram[1] += 1;
- _total_tables += 1;
- delta = 1;
- }
- else {
- // randomly assign to an existing table
- // remove constant denominator from inner loop
- //double r = rnd() * (c - _a*t);
- double r = mt_genrand_res53() * (c - _a*t);
- for (std::map<int,int>::iterator
- hit = tc.table_histogram.begin();
- hit != tc.table_histogram.end(); ++hit) {
- r -= ((hit->first - _a) * hit->second);
- if (r <= 0) {
- tc.table_histogram[hit->first+1] += 1;
- hit->second -= 1;
- if (hit->second == 0)
- tc.table_histogram.erase(hit);
- break;
- }
- }
- if (r > 0) {
- std::cerr << r << " " << c << " " << _a << " " << t << std::endl;
- assert(false);
- }
- delta = 0;
- }
-
- std::tr1::unordered_map<Dish,int,Hash>::operator[](dish) += 1;
- //google::sparse_hash_map<Dish,int,Hash>::operator[](dish) += 1;
- _total_customers += 1;
-
- return delta;
-}
-
-template <typename Dish, typename Hash>
-int
-PYP<Dish,Hash>::count(Dish dish) const
-{
- typename std::tr1::unordered_map<Dish, int>::const_iterator
- //typename google::sparse_hash_map<Dish, int>::const_iterator
- dcit = find(dish);
- if (dcit != end())
- return dcit->second;
- else
- return 0;
-}
-
-template <typename Dish, typename Hash>
-int
-PYP<Dish,Hash>::decrement(Dish dish)
-{
- typename std::tr1::unordered_map<Dish, int>::iterator dcit = find(dish);
- //typename google::sparse_hash_map<Dish, int>::iterator dcit = find(dish);
- if (dcit == end()) {
- std::cerr << dish << std::endl;
- assert(false);
- }
-
- int delta = 0;
-
- typename std::tr1::unordered_map<Dish, TableCounter>::iterator dtit = _dish_tables.find(dish);
- //typename google::sparse_hash_map<Dish, TableCounter>::iterator dtit = _dish_tables.find(dish);
- if (dtit == _dish_tables.end()) {
- std::cerr << dish << std::endl;
- assert(false);
- }
- TableCounter &tc = dtit->second;
-
- //std::cerr << "\tdecrement for " << dish << "\n";
- //std::cerr << "\tBEFORE histogram: " << tc.table_histogram << " ";
- //std::cerr << "count: " << count(dish) << " ";
- //std::cerr << "tables: " << tc.tables << "\n";
-
- //double r = rnd() * count(dish);
- double r = mt_genrand_res53() * count(dish);
- for (std::map<int,int>::iterator hit = tc.table_histogram.begin();
- hit != tc.table_histogram.end(); ++hit)
- {
- //r -= (hit->first - _a) * hit->second;
- r -= (hit->first) * hit->second;
- if (r <= 0)
- {
- if (hit->first > 1)
- tc.table_histogram[hit->first-1] += 1;
- else
- {
- delta = -1;
- tc.tables -= 1;
- _total_tables -= 1;
- }
-
- hit->second -= 1;
- if (hit->second == 0) tc.table_histogram.erase(hit);
- break;
- }
- }
- if (r > 0) {
- std::cerr << r << " " << count(dish) << " " << _a << " " << num_tables(dish) << std::endl;
- assert(false);
- }
-
- // remove the customer
- dcit->second -= 1;
- _total_customers -= 1;
- assert(dcit->second >= 0);
- if (dcit->second == 0) {
- erase(dcit);
- _dish_tables.erase(dtit);
- //std::cerr << "\tAFTER histogram: Empty\n";
- }
- else {
- //std::cerr << "\tAFTER histogram: " << _dish_tables[dish].table_histogram << " ";
- //std::cerr << "count: " << count(dish) << " ";
- //std::cerr << "tables: " << _dish_tables[dish].tables << "\n";
- }
-
- return delta;
-}
-
-template <typename Dish, typename Hash>
-int
-PYP<Dish,Hash>::num_tables(Dish dish) const
-{
- typename std::tr1::unordered_map<Dish, TableCounter, Hash>::const_iterator
- //typename google::sparse_hash_map<Dish, TableCounter, Hash>::const_iterator
- dtit = _dish_tables.find(dish);
-
- //assert(dtit != _dish_tables.end());
- if (dtit == _dish_tables.end())
- return 0;
-
- return dtit->second.tables;
-}
-
-template <typename Dish, typename Hash>
-int
-PYP<Dish,Hash>::num_tables() const
-{
- return _total_tables;
-}
-
-template <typename Dish, typename Hash>
-std::ostream&
-PYP<Dish,Hash>::debug_info(std::ostream& os) const
-{
- int hists = 0, tables = 0;
- for (typename std::tr1::unordered_map<Dish, TableCounter, Hash>::const_iterator
- //for (typename google::sparse_hash_map<Dish, TableCounter, Hash>::const_iterator
- dtit = _dish_tables.begin(); dtit != _dish_tables.end(); ++dtit)
- {
- hists += dtit->second.table_histogram.size();
- tables += dtit->second.tables;
-
-// if (dtit->second.tables <= 0)
-// std::cerr << dtit->first << " " << count(dtit->first) << std::endl;
- assert(dtit->second.tables > 0);
- assert(!dtit->second.table_histogram.empty());
-
-// os << "Dish " << dtit->first << " has " << count(dtit->first) << " customers, and is sitting at " << dtit->second.tables << " tables.\n";
- for (std::map<int,int>::const_iterator
- hit = dtit->second.table_histogram.begin();
- hit != dtit->second.table_histogram.end(); ++hit) {
-// os << " " << hit->second << " tables with " << hit->first << " customers." << std::endl;
- assert(hit->second > 0);
- }
- }
-
- os << "restaurant has "
- << _total_customers << " customers; "
- << _total_tables << " tables; "
- << tables << " tables'; "
- << num_types() << " dishes; "
- << _dish_tables.size() << " dishes'; and "
- << hists << " histogram entries\n";
-
- return os;
-}
-
-template <typename Dish, typename Hash>
-void
-PYP<Dish,Hash>::clear()
-{
- this->std::tr1::unordered_map<Dish,int,Hash>::clear();
- //this->google::sparse_hash_map<Dish,int,Hash>::clear();
- _dish_tables.clear();
- _total_tables = _total_customers = 0;
-}
-
-// log_restaurant_prob returns the log probability of the PYP table configuration.
-// Excludes Hierarchical P0 term which must be calculated separately.
-template <typename Dish, typename Hash>
-double
-PYP<Dish,Hash>::log_restaurant_prob() const {
- if (_total_customers < 1)
- return (double)0.0;
-
- double log_prob = 0.0;
- double lgamma1a = lgamma(1.0-_a);
-
- //std::cerr << "-------------------\n" << std::endl;
- for (typename DishTableType::const_iterator dish_it=_dish_tables.begin();
- dish_it != _dish_tables.end(); ++dish_it) {
- for (std::map<int, int>::const_iterator table_it=dish_it->second.table_histogram.begin();
- table_it !=dish_it->second.table_histogram.end(); ++table_it) {
- log_prob += (table_it->second * (lgamma(table_it->first - _a) - lgamma1a));
- //std::cerr << "|" << dish_it->first->parent << " --> " << dish_it->first->rhs << " " << table_it->first << " " << table_it->second << " " << log_prob;
- }
- }
- //std::cerr << std::endl;
-
- log_prob += (_a == (double)0.0 ? (_total_tables-1.0)*log(_b) : (_total_tables-1.0)*log(_a) + lgamma((_total_tables-1.0) + _b/_a) - lgamma(_b/_a));
- //std::cerr << "\t\t" << log_prob << std::endl;
- log_prob += (lgamma(1.0 + _b) - lgamma(_total_customers + _b));
-
- //std::cerr << _total_customers << " " << _total_tables << " " << log_prob << " " << log_prior() << std::endl;
- //std::cerr << _a << " " << _b << std::endl;
- if (!std::isfinite(log_prob)) {
- assert(false);
- }
- //return log_prob;
- if (log_prob > 0.0)
- std::cerr << log_prob << std::endl;
- return log_prob;// + log_prior();
-}
-
-template <typename Dish, typename Hash>
-double
-PYP<Dish,Hash>::log_prior() const {
- double prior = 0.0;
- if (_a_beta_a > 0.0 && _a_beta_b > 0.0 && _a > 0.0)
- prior += log_prior_a(_a, _a_beta_a, _a_beta_b);
- if (_b_gamma_s > 0.0 && _b_gamma_c > 0.0)
- prior += log_prior_b(_b, _b_gamma_c, _b_gamma_s);
-
- return prior;
-}
-
-template <typename Dish, typename Hash>
-double
-PYP<Dish,Hash>::log_prior_a(double a, double beta_a, double beta_b) {
- return lbetadist(a, beta_a, beta_b);
-}
-
-template <typename Dish, typename Hash>
-double
-PYP<Dish,Hash>::log_prior_b(double b, double gamma_c, double gamma_s) {
- return lgammadist(b, gamma_c, gamma_s);
-}
-
-template <typename Dish, typename Hash>
-long double PYP<Dish,Hash>::lbetadist(long double x, long double alpha, long double beta) {
- assert(x > 0);
- assert(x < 1);
- assert(alpha > 0);
- assert(beta > 0);
- return (alpha-1)*log(x)+(beta-1)*log(1-x)+lgamma(alpha+beta)-lgamma(alpha)-lgamma(beta);
-//boost::math::lgamma
-}
-
-template <typename Dish, typename Hash>
-long double PYP<Dish,Hash>::lgammadist(long double x, long double alpha, long double beta) {
- assert(alpha > 0);
- assert(beta > 0);
- return (alpha-1)*log(x) - alpha*log(beta) - x/beta - lgamma(alpha);
-}
-
-
-template <typename Dish, typename Hash>
- template <typename Uniform01>
-void
-PYP<Dish,Hash>::resample_prior(Uniform01& rnd) {
- for (int num_its=5; num_its >= 0; --num_its) {
- resample_prior_b(rnd);
- resample_prior_a(rnd);
- }
- resample_prior_b(rnd);
-}
-
-template <typename Dish, typename Hash>
- template <typename Uniform01>
-void
-PYP<Dish,Hash>::resample_prior_b(Uniform01& rnd) {
- if (_total_tables == 0)
- return;
-
- //int niterations = 10; // number of resampling iterations
- int niterations = 5; // number of resampling iterations
- //std::cerr << "\n## resample_prior_b(), initial a = " << _a << ", b = " << _b << std::endl;
- resample_b_type b_log_prob(_total_customers, _total_tables, _a, _b_gamma_c, _b_gamma_s);
- _b = slice_sampler1d(b_log_prob, _b, rnd, (double) 0.0, std::numeric_limits<double>::infinity(),
- //_b = slice_sampler1d(b_log_prob, _b, mt_genrand_res53, (double) 0.0, std::numeric_limits<double>::infinity(),
- (double) 0.0, niterations, 100*niterations);
- //std::cerr << "\n## resample_prior_b(), final a = " << _a << ", b = " << _b << std::endl;
-}
-
-template <typename Dish, typename Hash>
- template <typename Uniform01>
-void
-PYP<Dish,Hash>::resample_prior_a(Uniform01& rnd) {
- if (_total_tables == 0)
- return;
-
- //int niterations = 10;
- int niterations = 5;
- //std::cerr << "\n## Initial a = " << _a << ", b = " << _b << std::endl;
- resample_a_type a_log_prob(_total_customers, _total_tables, _b, _a_beta_a, _a_beta_b, _dish_tables);
- _a = slice_sampler1d(a_log_prob, _a, rnd, std::numeric_limits<double>::min(),
- //_a = slice_sampler1d(a_log_prob, _a, mt_genrand_res53, std::numeric_limits<double>::min(),
- (double) 1.0, (double) 0.0, niterations, 100*niterations);
-}
-
-#endif
diff --git a/gi/pyp-topics/src/slice-sampler.h b/gi/pyp-topics/src/slice-sampler.h
deleted file mode 100644
index 3108a0f7..00000000
--- a/gi/pyp-topics/src/slice-sampler.h
+++ /dev/null
@@ -1,192 +0,0 @@
-//! slice-sampler.h is an MCMC slice sampler
-//!
-//! Mark Johnson, 1st August 2008
-
-#ifndef SLICE_SAMPLER_H
-#define SLICE_SAMPLER_H
-
-#include <algorithm>
-#include <cassert>
-#include <cmath>
-#include <iostream>
-#include <limits>
-
-//! slice_sampler_rfc_type{} returns the value of a user-specified
-//! function if the argument is within range, or - infinity otherwise
-//
-template <typename F, typename Fn, typename U>
-struct slice_sampler_rfc_type {
- F min_x, max_x;
- const Fn& f;
- U max_nfeval, nfeval;
- slice_sampler_rfc_type(F min_x, F max_x, const Fn& f, U max_nfeval)
- : min_x(min_x), max_x(max_x), f(f), max_nfeval(max_nfeval), nfeval(0) { }
-
- F operator() (F x) {
- if (min_x < x && x < max_x) {
- assert(++nfeval <= max_nfeval);
- F fx = f(x);
- assert(std::isfinite(fx));
- return fx;
- }
- else
- return -std::numeric_limits<F>::infinity();
- }
-}; // slice_sampler_rfc_type{}
-
-//! slice_sampler1d() implements the univariate "range doubling" slice sampler
-//! described in Neal (2003) "Slice Sampling", The Annals of Statistics 31(3), 705-767.
-//
-template <typename F, typename LogF, typename Uniform01>
-F slice_sampler1d(const LogF& logF0, //!< log of function to sample
- F x, //!< starting point
- Uniform01& u01, //!< uniform [0,1) random number generator
- F min_x = -std::numeric_limits<F>::infinity(), //!< minimum value of support
- F max_x = std::numeric_limits<F>::infinity(), //!< maximum value of support
- F w = 0.0, //!< guess at initial width
- unsigned nsamples=1, //!< number of samples to draw
- unsigned max_nfeval=200) //!< max number of function evaluations
-{
- typedef unsigned U;
- slice_sampler_rfc_type<F,LogF,U> logF(min_x, max_x, logF0, max_nfeval);
-
- assert(std::isfinite(x));
-
- if (w <= 0.0) { // set w to a default width
- if (min_x > -std::numeric_limits<F>::infinity() && max_x < std::numeric_limits<F>::infinity())
- w = (max_x - min_x)/4;
- else
- w = std::max(((x < 0.0) ? -x : x)/4, (F) 0.1);
- }
- assert(std::isfinite(w));
-
- F logFx = logF(x);
- for (U sample = 0; sample < nsamples; ++sample) {
- F logY = logFx + log(u01()+1e-100); //! slice logFx at this value
- assert(std::isfinite(logY));
-
- F xl = x - w*u01(); //! lower bound on slice interval
- F logFxl = logF(xl);
- F xr = xl + w; //! upper bound on slice interval
- F logFxr = logF(xr);
-
- while (logY < logFxl || logY < logFxr) // doubling procedure
- if (u01() < 0.5)
- logFxl = logF(xl -= xr - xl);
- else
- logFxr = logF(xr += xr - xl);
-
- F xl1 = xl;
- F xr1 = xr;
- while (true) { // shrinking procedure
- F x1 = xl1 + u01()*(xr1 - xl1);
- if (logY < logF(x1)) {
- F xl2 = xl; // acceptance procedure
- F xr2 = xr;
- bool d = false;
- while (xr2 - xl2 > 1.1*w) {
- F xm = (xl2 + xr2)/2;
- if ((x < xm && x1 >= xm) || (x >= xm && x1 < xm))
- d = true;
- if (x1 < xm)
- xr2 = xm;
- else
- xl2 = xm;
- if (d && logY >= logF(xl2) && logY >= logF(xr2))
- goto unacceptable;
- }
- x = x1;
- goto acceptable;
- }
- goto acceptable;
- unacceptable:
- if (x1 < x) // rest of shrinking procedure
- xl1 = x1;
- else
- xr1 = x1;
- }
- acceptable:
- w = (4*w + (xr1 - xl1))/5; // update width estimate
- }
- return x;
-}
-
-/*
-//! slice_sampler1d() implements a 1-d MCMC slice sampler.
-//! It should be correct for unimodal distributions, but
-//! not for multimodal ones.
-//
-template <typename F, typename LogP, typename Uniform01>
-F slice_sampler1d(const LogP& logP, //!< log of distribution to sample
- F x, //!< initial sample
- Uniform01& u01, //!< uniform random number generator
- F min_x = -std::numeric_limits<F>::infinity(), //!< minimum value of support
- F max_x = std::numeric_limits<F>::infinity(), //!< maximum value of support
- F w = 0.0, //!< guess at initial width
- unsigned nsamples=1, //!< number of samples to draw
- unsigned max_nfeval=200) //!< max number of function evaluations
-{
- typedef unsigned U;
- assert(std::isfinite(x));
- if (w <= 0.0) {
- if (min_x > -std::numeric_limits<F>::infinity() && max_x < std::numeric_limits<F>::infinity())
- w = (max_x - min_x)/4;
- else
- w = std::max(((x < 0.0) ? -x : x)/4, 0.1);
- }
- // TRACE4(x, min_x, max_x, w);
- F logPx = logP(x);
- assert(std::isfinite(logPx));
- U nfeval = 1;
- for (U sample = 0; sample < nsamples; ++sample) {
- F x0 = x;
- F logU = logPx + log(u01()+1e-100);
- assert(std::isfinite(logU));
- F r = u01();
- F xl = std::max(min_x, x - r*w);
- F xr = std::min(max_x, x + (1-r)*w);
- // TRACE3(x, logPx, logU);
- while (xl > min_x && logP(xl) > logU) {
- xl -= w;
- w *= 2;
- ++nfeval;
- if (nfeval >= max_nfeval)
- std::cerr << "## Error: nfeval = " << nfeval << ", max_nfeval = " << max_nfeval << ", sample = " << sample << ", nsamples = " << nsamples << ", r = " << r << ", w = " << w << ", xl = " << xl << std::endl;
- assert(nfeval < max_nfeval);
- }
- xl = std::max(xl, min_x);
- while (xr < max_x && logP(xr) > logU) {
- xr += w;
- w *= 2;
- ++nfeval;
- if (nfeval >= max_nfeval)
- std::cerr << "## Error: nfeval = " << nfeval << ", max_nfeval = " << max_nfeval << ", sample = " << sample << ", nsamples = " << nsamples << ", r = " << r << ", w = " << w << ", xr = " << xr << std::endl;
- assert(nfeval < max_nfeval);
- }
- xr = std::min(xr, max_x);
- while (true) {
- r = u01();
- x = r*xl + (1-r)*xr;
- assert(std::isfinite(x));
- logPx = logP(x);
- // TRACE4(logPx, x, xl, xr);
- assert(std::isfinite(logPx));
- ++nfeval;
- if (nfeval >= max_nfeval)
- std::cerr << "## Error: nfeval = " << nfeval << ", max_nfeval = " << max_nfeval << ", sample = " << sample << ", nsamples = " << nsamples << ", r = " << r << ", w = " << w << ", xl = " << xl << ", xr = " << xr << ", x = " << x << std::endl;
- assert(nfeval < max_nfeval);
- if (logPx > logU)
- break;
- else if (x > x0)
- xr = x;
- else
- xl = x;
- }
- // w = (4*w + (xr-xl))/5; // gradually adjust w
- }
- // TRACE2(logPx, x);
- return x;
-} // slice_sampler1d()
-*/
-
-#endif // SLICE_SAMPLER_H
diff --git a/gi/pyp-topics/src/timing.h b/gi/pyp-topics/src/timing.h
deleted file mode 100644
index 08360b0f..00000000
--- a/gi/pyp-topics/src/timing.h
+++ /dev/null
@@ -1,37 +0,0 @@
-#ifndef TIMING_H
-#define TIMING_H
-
-#ifdef __CYGWIN__
-# ifndef _POSIX_MONOTONIC_CLOCK
-# define _POSIX_MONOTONIC_CLOCK
-// this modifies <time.h>
-# endif
-// in case someone included <time.h> before we got here (this is lifted from time.h>)
-# ifndef CLOCK_MONOTONIC
-# define CLOCK_MONOTONIC (clockid_t)4
-# endif
-#endif
-
-
-#include <time.h>
-#include <sys/time.h>
-#include "clock_gettime_stub.c"
-
-struct Timer {
- Timer() { Reset(); }
- void Reset()
- {
- clock_gettime(CLOCK_MONOTONIC, &start_t);
- }
- double Elapsed() const {
- timespec end_t;
- clock_gettime(CLOCK_MONOTONIC, &end_t);
- const double elapsed = (end_t.tv_sec - start_t.tv_sec)
- + (end_t.tv_nsec - start_t.tv_nsec) / 1000000000.0;
- return elapsed;
- }
- private:
- timespec start_t;
-};
-
-#endif
diff --git a/gi/pyp-topics/src/train-contexts.cc b/gi/pyp-topics/src/train-contexts.cc
deleted file mode 100644
index 9463f9fc..00000000
--- a/gi/pyp-topics/src/train-contexts.cc
+++ /dev/null
@@ -1,174 +0,0 @@
-// STL
-#include <iostream>
-#include <fstream>
-#include <algorithm>
-#include <iterator>
-
-// Boost
-#include <boost/program_options/parsers.hpp>
-#include <boost/program_options/variables_map.hpp>
-#include <boost/scoped_ptr.hpp>
-
-// Local
-#include "pyp-topics.hh"
-#include "corpus.hh"
-#include "contexts_corpus.hh"
-#include "gzstream.hh"
-
-static const char *REVISION = "$Rev$";
-
-// Namespaces
-using namespace boost;
-using namespace boost::program_options;
-using namespace std;
-
-int main(int argc, char **argv)
-{
- cout << "Pitman Yor topic models: Copyright 2010 Phil Blunsom\n";
- cout << REVISION << '\n' <<endl;
-
- ////////////////////////////////////////////////////////////////////////////////////////////
- // Command line processing
- variables_map vm;
-
- // Command line processing
- {
- options_description cmdline_specific("Command line specific options");
- cmdline_specific.add_options()
- ("help,h", "print help message")
- ("config,c", value<string>(), "config file specifying additional command line options")
- ;
- options_description config_options("Allowed options");
- config_options.add_options()
- ("data,d", value<string>(), "file containing the documents and context terms")
- ("topics,t", value<int>()->default_value(50), "number of topics")
- ("document-topics-out,o", value<string>(), "file to write the document topics to")
- ("default-topics-out", value<string>(), "file to write default term topic assignments.")
- ("topic-words-out,w", value<string>(), "file to write the topic word distribution to")
- ("samples,s", value<int>()->default_value(10), "number of sampling passes through the data")
- ("backoff-type", value<string>(), "backoff type: none|simple")
-// ("filter-singleton-contexts", "filter singleton contexts")
- ("hierarchical-topics", "Use a backoff hierarchical PYP as the P0 for the document topics distribution.")
- ("freq-cutoff-start", value<int>()->default_value(0), "initial frequency cutoff.")
- ("freq-cutoff-end", value<int>()->default_value(0), "final frequency cutoff.")
- ("freq-cutoff-interval", value<int>()->default_value(0), "number of iterations between frequency decrement.")
- ("max-threads", value<int>()->default_value(1), "maximum number of simultaneous threads allowed")
- ("max-contexts-per-document", value<int>()->default_value(0), "Only sample the n most frequent contexts for a document.")
- ("num-jobs", value<int>()->default_value(1), "allows finer control over parallelization")
- ("temp-start", value<double>()->default_value(1.0), "starting annealing temperature.")
- ("temp-end", value<double>()->default_value(1.0), "end annealing temperature.")
- ;
-
- cmdline_specific.add(config_options);
-
- store(parse_command_line(argc, argv, cmdline_specific), vm);
- notify(vm);
-
- if (vm.count("config") > 0) {
- ifstream config(vm["config"].as<string>().c_str());
- store(parse_config_file(config, config_options), vm);
- }
-
- if (vm.count("help")) {
- cout << cmdline_specific << "\n";
- return 1;
- }
- }
- ////////////////////////////////////////////////////////////////////////////////////////////
-
- if (!vm.count("data")) {
- cerr << "Please specify a file containing the data." << endl;
- return 1;
- }
- assert(vm["max-threads"].as<int>() > 0);
- assert(vm["num-jobs"].as<int>() > -1);
- // seed the random number generator: 0 = automatic, specify value otherwise
- unsigned long seed = 0;
- PYPTopics model(vm["topics"].as<int>(), vm.count("hierarchical-topics"), seed, vm["max-threads"].as<int>(), vm["num-jobs"].as<int>());
-
- // read the data
- BackoffGenerator* backoff_gen=0;
- if (vm.count("backoff-type")) {
- if (vm["backoff-type"].as<std::string>() == "none") {
- backoff_gen = 0;
- }
- else if (vm["backoff-type"].as<std::string>() == "simple") {
- backoff_gen = new SimpleBackoffGenerator();
- }
- else {
- cerr << "Backoff type (--backoff-type) must be one of none|simple." <<endl;
- return(1);
- }
- }
-
- ContextsCorpus contexts_corpus;
- contexts_corpus.read_contexts(vm["data"].as<string>(), backoff_gen, /*vm.count("filter-singleton-contexts")*/ false);
- model.set_backoff(contexts_corpus.backoff_index());
-
- if (backoff_gen)
- delete backoff_gen;
-
- // train the sampler
- model.sample_corpus(contexts_corpus, vm["samples"].as<int>(),
- vm["freq-cutoff-start"].as<int>(),
- vm["freq-cutoff-end"].as<int>(),
- vm["freq-cutoff-interval"].as<int>(),
- vm["max-contexts-per-document"].as<int>(),
- vm["temp-start"].as<double>(), vm["temp-end"].as<double>());
-
- if (vm.count("document-topics-out")) {
- ogzstream documents_out(vm["document-topics-out"].as<string>().c_str());
-
- int document_id=0;
- map<int,int> all_terms;
- for (Corpus::const_iterator corpusIt=contexts_corpus.begin();
- corpusIt != contexts_corpus.end(); ++corpusIt, ++document_id) {
- vector<int> unique_terms;
- for (Document::const_iterator docIt=corpusIt->begin();
- docIt != corpusIt->end(); ++docIt) {
- if (unique_terms.empty() || *docIt != unique_terms.back())
- unique_terms.push_back(*docIt);
- // increment this terms frequency
- pair<map<int,int>::iterator,bool> insert_result = all_terms.insert(make_pair(*docIt,1));
- if (!insert_result.second)
- all_terms[*docIt] = all_terms[*docIt] + 1;
- //insert_result.first++;
- }
- documents_out << contexts_corpus.key(document_id) << '\t';
- documents_out << model.max(document_id).first << " " << corpusIt->size() << " ||| ";
- for (std::vector<int>::const_iterator termIt=unique_terms.begin();
- termIt != unique_terms.end(); ++termIt) {
- if (termIt != unique_terms.begin())
- documents_out << " ||| ";
- vector<std::string> strings = contexts_corpus.context2string(*termIt);
- copy(strings.begin(), strings.end(),ostream_iterator<std::string>(documents_out, " "));
- std::pair<int,PYPTopics::F> maxinfo = model.max(document_id, *termIt);
- documents_out << "||| C=" << maxinfo.first << " P=" << maxinfo.second;
-
- }
- documents_out <<endl;
- }
- documents_out.close();
-
- if (vm.count("default-topics-out")) {
- ofstream default_topics(vm["default-topics-out"].as<string>().c_str());
- default_topics << model.max_topic() <<endl;
- for (std::map<int,int>::const_iterator termIt=all_terms.begin(); termIt != all_terms.end(); ++termIt) {
- vector<std::string> strings = contexts_corpus.context2string(termIt->first);
- default_topics << model.max(-1, termIt->first).first << " ||| " << termIt->second << " ||| ";
- copy(strings.begin(), strings.end(),ostream_iterator<std::string>(default_topics, " "));
- default_topics <<endl;
- }
- }
- }
-
- if (vm.count("topic-words-out")) {
- ogzstream topics_out(vm["topic-words-out"].as<string>().c_str());
- model.print_topic_terms(topics_out);
- topics_out.close();
- }
-
- cout <<endl;
-
- return 0;
-}
diff --git a/gi/pyp-topics/src/train.cc b/gi/pyp-topics/src/train.cc
deleted file mode 100644
index db7ca46e..00000000
--- a/gi/pyp-topics/src/train.cc
+++ /dev/null
@@ -1,135 +0,0 @@
-// STL
-#include <iostream>
-#include <fstream>
-
-// Boost
-#include <boost/program_options/parsers.hpp>
-#include <boost/program_options/variables_map.hpp>
-#include <boost/scoped_ptr.hpp>
-
-// Local
-#include "pyp-topics.hh"
-#include "corpus.hh"
-#include "contexts_corpus.hh"
-#include "gzstream.hh"
-
-static const char *REVISION = "$Rev$";
-
-// Namespaces
-using namespace boost;
-using namespace boost::program_options;
-using namespace std;
-
-int main(int argc, char **argv)
-{
- std::cout << "Pitman Yor topic models: Copyright 2010 Phil Blunsom\n";
- std::cout << REVISION << '\n' << std::endl;
-
- ////////////////////////////////////////////////////////////////////////////////////////////
- // Command line processing
- variables_map vm;
-
- // Command line processing
- options_description cmdline_specific("Command line specific options");
- cmdline_specific.add_options()
- ("help,h", "print help message")
- ("config,c", value<string>(), "config file specifying additional command line options")
- ;
- options_description generic("Allowed options");
- generic.add_options()
- ("documents,d", value<string>(), "file containing the documents")
- ("topics,t", value<int>()->default_value(50), "number of topics")
- ("document-topics-out,o", value<string>(), "file to write the document topics to")
- ("topic-words-out,w", value<string>(), "file to write the topic word distribution to")
- ("samples,s", value<int>()->default_value(10), "number of sampling passes through the data")
- ("test-corpus", value<string>(), "file containing the test data")
- ("backoff-paths", value<string>(), "file containing the term backoff paths")
- ;
- options_description config_options, cmdline_options;
- config_options.add(generic);
- cmdline_options.add(generic).add(cmdline_specific);
-
- store(parse_command_line(argc, argv, cmdline_options), vm);
- if (vm.count("config") > 0) {
- ifstream config(vm["config"].as<string>().c_str());
- store(parse_config_file(config, cmdline_options), vm);
- }
- notify(vm);
- ////////////////////////////////////////////////////////////////////////////////////////////
-
- if (vm.count("documents") == 0) {
- cerr << "Please specify a file containing the documents." << endl;
- cout << cmdline_options << "\n";
- return 1;
- }
-
- if (vm.count("help")) {
- cout << cmdline_options << "\n";
- return 1;
- }
-
- // seed the random number generator: 0 = automatic, specify value otherwise
- unsigned long seed = 0;
- PYPTopics model(vm["topics"].as<int>(), false, seed);
-
- // read the data
- Corpus corpus;
- corpus.read(vm["documents"].as<string>());
-
- // read the backoff dictionary
- if (vm.count("backoff-paths"))
- model.set_backoff(vm["backoff-paths"].as<string>());
-
- // train the sampler
- model.sample_corpus(corpus, vm["samples"].as<int>());
-
- if (vm.count("document-topics-out")) {
- ogzstream documents_out(vm["document-topics-out"].as<string>().c_str());
- //model.print_document_topics(documents_out);
-
- int document_id=0;
- for (Corpus::const_iterator corpusIt=corpus.begin();
- corpusIt != corpus.end(); ++corpusIt, ++document_id) {
- std::vector<int> unique_terms;
- for (Document::const_iterator docIt=corpusIt->begin();
- docIt != corpusIt->end(); ++docIt) {
- if (unique_terms.empty() || *docIt != unique_terms.back())
- unique_terms.push_back(*docIt);
- }
- documents_out << unique_terms.size();
- for (std::vector<int>::const_iterator termIt=unique_terms.begin();
- termIt != unique_terms.end(); ++termIt)
- documents_out << " " << *termIt << ":" << model.max(document_id, *termIt).first;
- documents_out << std::endl;
- }
- documents_out.close();
- }
-
- if (vm.count("topic-words-out")) {
- ogzstream topics_out(vm["topic-words-out"].as<string>().c_str());
- model.print_topic_terms(topics_out);
- topics_out.close();
- }
-
- if (vm.count("test-corpus")) {
- TestCorpus test_corpus;
- test_corpus.read(vm["test-corpus"].as<string>());
- ogzstream topics_out((vm["test-corpus"].as<string>() + ".topics.gz").c_str());
-
- for (TestCorpus::const_iterator corpusIt=test_corpus.begin();
- corpusIt != test_corpus.end(); ++corpusIt) {
- int index=0;
- for (DocumentTerms::const_iterator instanceIt=corpusIt->begin();
- instanceIt != corpusIt->end(); ++instanceIt, ++index) {
- int topic = model.max(instanceIt->doc, instanceIt->term).first;
- if (index != 0) topics_out << " ";
- topics_out << topic;
- }
- topics_out << std::endl;
- }
- topics_out.close();
- }
- std::cout << std::endl;
-
- return 0;
-}
diff --git a/gi/pyp-topics/src/utility.h b/gi/pyp-topics/src/utility.h
deleted file mode 100644
index 405a5b0a..00000000
--- a/gi/pyp-topics/src/utility.h
+++ /dev/null
@@ -1,962 +0,0 @@
-// utility.h
-//
-// (c) Mark Johnson, 24th January 2005
-//
-// modified 6th May 2002 to ensure write/read consistency, fixed 18th July 2002
-// modified 14th July 2002 to include insert() (generic inserter)
-// modified 26th September 2003 to use mapped_type instead of data_type
-// 25th August 2004 added istream >> const char*
-// 24th January 2005 added insert_newkey()
-//
-// Defines:
-// loop macros foreach, cforeach
-// dfind (default find function)
-// afind (find function that asserts key exists)
-// insert_newkey (inserts a new key into a map)
-// insert (generic inserter into standard data structures)
-// disjoint (set operation)
-// first_lessthan and second_lessthan (compares elements of pairs)
-//
-// Simplified interfaces to STL routines:
-//
-// includes (simplified interface)
-// set_intersection (simplified interface)
-// inserter (simplified interface)
-// max_element (simplified interface)
-// min_element (simplified interface)
-// hash functions for pairs, vectors, lists, slists and maps
-// input and output for pairs and vectors
-// resource_usage (interface improved)
-
-
-#ifndef UTILITY_H
-#define UTILITY_H
-
-#include <algorithm>
-// #include <boost/smart_ptr.hpp> // Comment out this line if boost is not used
-#include <cassert>
-#include <cmath>
-#include <cctype>
-#include <cstdio>
-#include <unordered_map>
-#include <unordered_set>
-#include <ext/slist>
-#include <iostream>
-#include <iterator>
-#include <list>
-#include <map>
-#include <set>
-#include <string>
-#include <utility>
-#include <vector>
-#include <memory>
-
-#if (__GNUC__ > 3) || (__GNUC__ >= 3 && __GNUC_MINOR__ >= 1)
-#define EXT_NAMESPACE __gnu_cxx
-#else
-#define EXT_NAMESPACE std
-#endif
-
-namespace ext = EXT_NAMESPACE;
-
-inline float power(float x, float y) { return powf(x, y); }
-inline double power(double x, double y) { return pow(x, y); }
-inline long double power(long double x, long double y) { return powl(x, y); }
-
-typedef unsigned U;
-typedef long double F; // slower than double, but underflows less
-
-///////////////////////////////////////////////////////////////////////////
-// //
-// Looping constructs //
-// //
-///////////////////////////////////////////////////////////////////////////
-
-// foreach is a simple loop construct
-//
-// STORE should be an STL container
-// TYPE is the typename of STORE
-// VAR will be defined as a local variable of type TYPE::iterator
-//
-#define foreach(TYPE, VAR, STORE) \
- for (TYPE::iterator VAR = (STORE).begin(); VAR != (STORE).end(); ++VAR)
-
-// cforeach is just like foreach, except that VAR is a const_iterator
-//
-// STORE should be an STL container
-// TYPE is the typename of STORE
-// VAR will be defined as a local variable of type TYPE::const_iterator
-//
-#define cforeach(TYPE, VAR, STORE) \
- for (TYPE::const_iterator VAR = (STORE).begin(); VAR != (STORE).end(); ++VAR)
-
-
-///////////////////////////////////////////////////////////////////////////
-// //
-// Map searching //
-// //
-// dfind(map, key) returns the key's value in map, or map's default //
-// value if no such key exists (the default value is not inserted) //
-// //
-// afind(map, key) returns a reference to the key's value in map, and //
-// asserts that this value exists //
-// //
-///////////////////////////////////////////////////////////////////////////
-
-// dfind(Map, Key) returns the value Map associates with Key, or the
-// Map's default value if no such Key exists
-//
-template <class Map, class Key>
-inline typename Map::mapped_type dfind(Map& m, const Key& k)
-{
- typename Map::iterator i = m.find(k);
- if (i == m.end())
- return typename Map::mapped_type();
- else
- return i->second;
-}
-
-template <class Map, class Key>
-inline const typename Map::mapped_type dfind(const Map& m, const Key& k)
-{
- typename Map::const_iterator i = m.find(k);
- if (i == m.end())
- return typename Map::mapped_type();
- else
- return i->second;
-}
-
-
-// afind(map, key) returns a reference to the value associated
-// with key in map. It uses assert to check that the key's value
-// is defined.
-//
-template <class Map, class Key>
-inline typename Map::mapped_type& afind(Map& m, const Key& k)
-{
- typename Map::iterator i = m.find(k);
- assert(i != m.end());
- return i->second;
-}
-
-template <class Map, class Key>
-inline const typename Map::mapped_type& afind(const Map& m, const Key& k)
-{
- typename Map::const_iterator i = m.find(k);
- assert(i != m.end());
- return i->second;
-}
-
-//! insert_newkey(map, key, value) checks that map does not contain
-//! key, and binds key to value.
-//
-template <class Map, class Key, class Value>
-inline typename Map::value_type&
-insert_newkey(Map& m, const Key& k,const Value& v)
-{
- std::pair<typename Map::iterator, bool> itb
- = m.insert(Map::value_type(k, v));
- assert(itb.second);
- return *(itb.first);
-} // insert_newkey()
-
-
-///////////////////////////////////////////////////////////////////////////
-// //
-// Insert operations //
-// //
-///////////////////////////////////////////////////////////////////////////
-
-
-template <typename T>
-void insert(std::list<T>& xs, const T& x) {
- xs.push_back(x);
-}
-
-template <typename T>
-void insert(std::set<T>& xs, const T& x) {
- xs.insert(x);
-}
-
-template <typename T>
-void insert(std::vector<T>& xs, const T& x) {
- xs.push_back(x);
-}
-
-
-///////////////////////////////////////////////////////////////////////////
-// //
-// Additional versions of standard algorithms //
-// //
-///////////////////////////////////////////////////////////////////////////
-
-template <typename Set1, typename Set2>
-inline bool includes(const Set1& set1, const Set2& set2)
-{
- return std::includes(set1.begin(), set1.end(), set2.begin(), set2.end());
-}
-
-template <typename Set1, typename Set2, typename Compare>
-inline bool includes(const Set1& set1, const Set2& set2, Compare comp)
-{
- return std::includes(set1.begin(), set1.end(), set2.begin(), set2.end(), comp);
-}
-
-
-template <typename InputIter1, typename InputIter2>
-bool disjoint(InputIter1 first1, InputIter1 last1,
- InputIter2 first2, InputIter2 last2)
-{
- while (first1 != last1 && first2 != last2)
- if (*first1 < *first2)
- ++first1;
- else if (*first2 < *first1)
- ++first2;
- else // *first1 == *first2
- return false;
- return true;
-}
-
-template <typename InputIter1, typename InputIter2, typename Compare>
-bool disjoint(InputIter1 first1, InputIter1 last1,
- InputIter2 first2, InputIter2 last2, Compare comp)
-{
- while (first1 != last1 && first2 != last2)
- if (comp(*first1, *first2))
- ++first1;
- else if (comp(*first2, *first1))
- ++first2;
- else // *first1 == *first2
- return false;
- return true;
-}
-
-template <typename Set1, typename Set2>
-inline bool disjoint(const Set1& set1, const Set2& set2)
-{
- return disjoint(set1.begin(), set1.end(), set2.begin(), set2.end());
-}
-
-template <typename Set1, typename Set2, typename Compare>
-inline bool disjoint(const Set1& set1, const Set2& set2, Compare comp)
-{
- return disjoint(set1.begin(), set1.end(), set2.begin(), set2.end(), comp);
-}
-
-
-template <typename Set1, typename Set2, typename OutputIterator>
-inline OutputIterator set_intersection(const Set1& set1, const Set2& set2,
- OutputIterator result)
-{
- return set_intersection(set1.begin(), set1.end(), set2.begin(), set2.end(), result);
-}
-
-template <typename Set1, typename Set2, typename OutputIterator, typename Compare>
-inline OutputIterator set_intersection(const Set1& set1, const Set2& set2,
- OutputIterator result, Compare comp)
-{
- return set_intersection(set1.begin(), set1.end(), set2.begin(), set2.end(), result, comp);
-}
-
-
-template <typename Container>
-inline std::insert_iterator<Container> inserter(Container& container)
-{
- return std::inserter(container, container.begin());
-}
-
-// max_element
-//
-template <class Es> inline typename Es::iterator max_element(Es& es)
-{
- return std::max_element(es.begin(), es.end());
-}
-
-template <class Es> inline typename Es::const_iterator max_element(const Es& es)
-{
- return std::max_element(es.begin(), es.end());
-}
-
-template <class Es, class BinaryPredicate>
-inline typename Es::iterator max_element(Es& es, BinaryPredicate comp)
-{
- return std::max_element(es.begin(), es.end(), comp);
-}
-
-template <class Es, class BinaryPredicate>
-inline typename Es::const_iterator max_element(const Es& es, BinaryPredicate comp)
-{
- return std::max_element(es.begin(), es.end(), comp);
-}
-
-// min_element
-//
-template <class Es> inline typename Es::iterator min_element(Es& es)
-{
- return std::min_element(es.begin(), es.end());
-}
-
-template <class Es> inline typename Es::const_iterator min_element(const Es& es)
-{
- return std::min_element(es.begin(), es.end());
-}
-
-template <class Es, class BinaryPredicate>
-inline typename Es::iterator min_element(Es& es, BinaryPredicate comp)
-{
- return std::min_element(es.begin(), es.end(), comp);
-}
-
-template <class Es, class BinaryPredicate>
-inline typename Es::const_iterator min_element(const Es& es, BinaryPredicate comp)
-{
- return std::min_element(es.begin(), es.end(), comp);
-}
-
-// first_lessthan and second_lessthan
-//
-struct first_lessthan {
- template <typename T1, typename T2>
- bool operator() (const T1& e1, const T2& e2) {
- return e1.first < e2.first;
- }
-};
-
-struct second_lessthan {
- template <typename T1, typename T2>
- bool operator() (const T1& e1, const T2& e2) {
- return e1.second < e2.second;
- }
-};
-
-// first_greaterthan and second_greaterthan
-//
-struct first_greaterthan {
- template <typename T1, typename T2>
- bool operator() (const T1& e1, const T2& e2) {
- return e1.first > e2.first;
- }
-};
-
-struct second_greaterthan {
- template <typename T1, typename T2>
- bool operator() (const T1& e1, const T2& e2) {
- return e1.second > e2.second;
- }
-};
-
-
-///////////////////////////////////////////////////////////////////////////
-// //
-// hash<> specializations //
-// //
-// These must be in namespace std. They permit the corresponding STL //
-// container to be used as a key in an STL hash table. //
-// //
-///////////////////////////////////////////////////////////////////////////
-
-//namespace EXT_NAMESPACE {
-namespace std {
- /*
- // hash function for bool
- //
- template <> struct hash<bool>
- {
- size_t operator() (bool b) const
- {
- return b;
- } // operator()
- }; // hash<bool>{}
-
- // hash function for double
- //
- template <> struct hash<double>
- {
- size_t operator() (double d) const
- {
- int exponent;
- double fraction = frexp(d, &exponent);
- return size_t(exponent) ^ size_t(1000000.0*(fabs(fraction-0.5)));
- } // operator()
- }; // hash<double>{}
-
- // hash function for strings
- //
- template <> struct hash<std::string>
- {
- size_t operator()(const std::string& s) const
- {
- typedef std::string::const_iterator CI;
-
- unsigned long h = 0;
- unsigned long g;
- CI p = s.begin();
- CI end = s.end();
-
- while (p!=end) {
- h = (h << 4) + (*p++);
- if ((g = h&0xf0000000)) {
- h = h ^ (g >> 24);
- h = h ^ g;
- }}
- return size_t(h);
- } // operator()
- }; // hash<string>{}
-
-*/
- // hash function for arbitrary pairs
- //
- template<class T1, class T2> struct hash<std::pair<T1,T2> > {
- size_t operator()(const std::pair<T1,T2>& p) const
- {
- size_t h1 = hash<T1>()(p.first);
- size_t h2 = hash<T2>()(p.second);
- return h1 ^ (h1 >> 1) ^ h2 ^ (h2 << 1);
- }
- };
-
-
- // hash function for vectors
- //
- template<class T> struct hash<std::vector<T> >
- { // This is the fn hashpjw of Aho, Sethi and Ullman, p 436.
- size_t operator()(const std::vector<T>& s) const
- {
- typedef typename std::vector<T>::const_iterator CI;
-
- unsigned long h = 0;
- unsigned long g;
- CI p = s.begin();
- CI end = s.end();
-
- while (p!=end) {
- h = (h << 5) + hash<T>()(*p++);
- if ((g = h&0xff000000)) {
- h = h ^ (g >> 23);
- h = h ^ g;
- }}
- return size_t(h);
- }
- };
-
- // hash function for slists
- //
- template<class T> struct hash<ext::slist<T> >
- { // This is the fn hashpjw of Aho, Sethi and Ullman, p 436.
- size_t operator()(const ext::slist<T>& s) const
- {
- typedef typename ext::slist<T>::const_iterator CI;
-
- unsigned long h = 0;
- unsigned long g;
- CI p = s.begin();
- CI end = s.end();
-
- while (p!=end) {
- h = (h << 7) + hash<T>()(*p++);
- if ((g = h&0xff000000)) {
- h = h ^ (g >> 23);
- h = h ^ g;
- }}
- return size_t(h);
- }
- };
-
- // hash function for maps
- //
- template<typename T1, typename T2> struct hash<std::map<T1,T2> >
- {
- size_t operator()(const std::map<T1,T2>& m) const
- {
- typedef typename std::map<T1,T2> M;
- typedef typename M::const_iterator CI;
-
- unsigned long h = 0;
- unsigned long g;
- CI p = m.begin();
- CI end = m.end();
-
- while (p != end) {
- h = (h << 11) + hash<typename M::value_type>()(*p++);
- if ((g = h&0xff000000)) {
- h = h ^ (g >> 23);
- h = h ^ g;
- }}
- return size_t(h);
- }
- };
-
-} // namespace EXT_NAMESPACE
-
-
-
-///////////////////////////////////////////////////////////////////////////
-// //
-// Write/Read code //
-// //
-// These routines should possess write/read invariance IF their elements //
-// also have write-read invariance. Whitespace, '(' and ')' are used as //
-// delimiters. //
-// //
-///////////////////////////////////////////////////////////////////////////
-
-
-// Define istream >> const char* so that it consumes the characters from the
-// istream. Just as in scanf, a space consumes an arbitrary amount of whitespace.
-//
-inline std::istream& operator>> (std::istream& is, const char* cp)
-{
- if (*cp == '\0')
- return is;
- else if (*cp == ' ') {
- char c;
- if (is.get(c)) {
- if (isspace(c))
- return is >> cp;
- else {
- is.unget();
- return is >> (cp+1);
- }
- }
- else {
- is.clear(is.rdstate() & ~std::ios::failbit); // clear failbit
- return is >> (cp+1);
- }
- }
- else {
- char c;
- if (is.get(c)) {
- if (c == *cp)
- return is >> (cp+1);
- else {
- is.unget();
- is.setstate(std::ios::failbit);
- }
- }
- return is;
- }
-}
-
-
-// Write out an auto_ptr object just as you would write out the pointer object
-//
-template <typename T>
-inline std::ostream& operator<<(std::ostream& os, const std::auto_ptr<T>& sp)
-{
- return os << sp.get();
-}
-
-
-// Pairs
-//
-template <class T1, class T2>
-std::ostream& operator<< (std::ostream& os, const std::pair<T1,T2>& p)
-{
- return os << '(' << p.first << ' ' << p.second << ')';
-}
-
-template <class T1, class T2>
-std::istream& operator>> (std::istream& is, std::pair<T1,T2>& p)
-{
- char c;
- if (is >> c) {
- if (c == '(') {
- if (is >> p.first >> p.second >> c && c == ')')
- return is;
- else
- is.setstate(std::ios::badbit);
- }
- else
- is.putback(c);
- }
- is.setstate(std::ios::failbit);
- return is;
-}
-
-// Lists
-//
-template <class T>
-std::ostream& operator<< (std::ostream& os, const std::list<T>& xs)
-{
- os << '(';
- for (typename std::list<T>::const_iterator xi = xs.begin(); xi != xs.end(); ++xi) {
- if (xi != xs.begin())
- os << ' ';
- os << *xi;
- }
- return os << ')';
-}
-
-template <class T>
-std::istream& operator>> (std::istream& is, std::list<T>& xs)
-{
- char c; // This code avoids unnecessary copy
- if (is >> c) { // read the initial '('
- if (c == '(') {
- xs.clear(); // clear the list
- do {
- xs.push_back(T()); // create a new elt in list
- is >> xs.back(); // read element
- }
- while (is.good()); // read as long as possible
- xs.pop_back(); // last read failed; pop last elt
- is.clear(is.rdstate() & ~std::ios::failbit); // clear failbit
- if (is >> c && c == ')') // read terminating ')'
- return is; // successful return
- else
- is.setstate(std::ios::badbit); // something went wrong, set badbit
- }
- else // c is not '('
- is.putback(c); // put c back into input
- }
- is.setstate(std::ios::failbit); // read failed, set failbit
- return is;
-}
-
-// Vectors
-//
-template <class T>
-std::ostream& operator<< (std::ostream& os, const std::vector<T>& xs)
-{
- os << '(';
- for (typename std::vector<T>::const_iterator xi = xs.begin(); xi != xs.end(); ++xi) {
- if (xi != xs.begin())
- os << ' ';
- os << *xi;
- }
- return os << ')';
-}
-
-template <class T>
-std::istream& operator>> (std::istream& is, std::vector<T>& xs)
-{
- char c; // This code avoids unnecessary copy
- if (is >> c) { // read the initial '('
- if (c == '(') {
- xs.clear(); // clear the list
- do {
- xs.push_back(T()); // create a new elt in list
- is >> xs.back(); // read element
- }
- while (is.good()); // read as long as possible
- xs.pop_back(); // last read failed; pop last elt
- is.clear(is.rdstate() & ~std::ios::failbit); // clear failbit
- if (is >> c && c == ')') // read terminating ')'
- return is; // successful return
- else
- is.setstate(std::ios::badbit); // something went wrong, set badbit
- }
- else // c is not '('
- is.putback(c); // put c back into input
- }
- is.setstate(std::ios::failbit); // read failed, set failbit
- return is;
-}
-
-// Slists
-//
-template <class T>
-std::ostream& operator<< (std::ostream& os, const ext::slist<T>& xs)
-{
- os << '(';
- for (typename ext::slist<T>::const_iterator xi = xs.begin(); xi != xs.end(); ++xi) {
- if (xi != xs.begin())
- os << ' ';
- os << *xi;
- }
- return os << ')';
-}
-
-template <class T>
-std::istream& operator>> (std::istream& is, ext::slist<T>& xs)
-{
- char c;
- if (is >> c) {
- if (c == '(') {
- xs.clear();
- T e;
- if (is >> e) {
- xs.push_front(e);
- typename ext::slist<T>::iterator xi = xs.begin();
- while (is >> e)
- xi = xs.insert_after(xi, e);
- is.clear(is.rdstate() & ~std::ios::failbit);
- if (is >> c && c == ')')
- return is;
- else
- is.setstate(std::ios::badbit);
- }
- else { // empty list
- is.clear(is.rdstate() & ~std::ios::failbit);
- if (is >> c && c == ')')
- return is;
- else // didn't see closing ')'
- is.setstate(std::ios::badbit);
- }
- }
- else // didn't read '('
- is.putback(c);
- }
- is.setstate(std::ios::failbit);
- return is;
-}
-
-// Sets
-//
-template <class T>
-std::ostream& operator<< (std::ostream& os, const std::set<T>& s)
-{
- os << '(';
- for (typename std::set<T>::const_iterator i = s.begin(); i != s.end(); ++i) {
- if (i != s.begin())
- os << ' ';
- os << *i;
- }
- return os << ')';
-}
-
-template <class T>
-std::istream& operator>> (std::istream& is, std::set<T>& s)
-{
- char c;
- if (is >> c) {
- if (c == '(') {
- s.clear();
- T e;
- while (is >> e)
- s.insert(e);
- is.clear(is.rdstate() & ~std::ios::failbit);
- if (is >> c && c == ')')
- return is;
- else
- is.setstate(std::ios::badbit);
- }
- else
- is.putback(c);
- }
- is.setstate(std::ios::failbit);
- return is;
-}
-
-// Hash_sets
-//
-template <class T>
-std::ostream& operator<< (std::ostream& os, const std::unordered_set<T>& s)
-{
- os << '(';
- for (typename std::unordered_set<T>::const_iterator i = s.begin(); i != s.end(); ++i) {
- if (i != s.begin())
- os << ' ';
- os << *i;
- }
- return os << ')';
-}
-
-template <class T>
-std::istream& operator>> (std::istream& is, std::unordered_set<T>& s)
-{
- char c;
- if (is >> c) {
- if (c == '(') {
- s.clear();
- T e;
- while (is >> e)
- s.insert(e);
- is.clear(is.rdstate() & ~std::ios::failbit);
- if (is >> c && c == ')')
- return is;
- else
- is.setstate(std::ios::badbit);
- }
- else
- is.putback(c);
- }
- is.setstate(std::ios::failbit);
- return is;
-}
-
-
-// Maps
-//
-template <class Key, class Value>
-std::ostream& operator<< (std::ostream& os, const std::map<Key,Value>& m)
-{
- typedef std::map<Key,Value> M;
- os << '(';
- for (typename M::const_iterator it = m.begin(); it != m.end(); ++it) {
- if (it != m.begin())
- os << ' ';
- os << *it;
- }
- return os << ")";
-}
-
-template <class Key, class Value>
-std::istream& operator>> (std::istream& is, std::map<Key,Value>& m)
-{
- char c;
- if (is >> c) {
- if (c == '(') {
- m.clear();
- std::pair<Key,Value> e;
- while (is >> e)
- m.insert(e);
- is.clear(is.rdstate() & ~std::ios::failbit);
- if (is >> c && c == ')')
- return is;
- else
- is.setstate(std::ios::badbit);
- }
- else
- is.putback(c);
- }
- is.setstate(std::ios::failbit);
- return is;
-}
-
-// Hash_maps
-//
-template <class Key, class Value>
-std::ostream& operator<< (std::ostream& os, const std::unordered_map<Key,Value>& m)
-{
- typedef std::unordered_map<Key,Value> M;
- os << '(';
- for (typename M::const_iterator it = m.begin(); it != m.end(); ++it) {
- if (it != m.begin())
- os << ' ';
- os << *it;
- }
- return os << ")";
-}
-
-template <class Key, class Value>
-std::istream& operator>> (std::istream& is, std::unordered_map<Key,Value>& m)
-{
- char c;
- if (is >> c) {
- if (c == '(') {
- m.clear();
- std::pair<Key,Value> e;
- while (is >> e)
- m.insert(e);
- is.clear(is.rdstate() & ~std::ios::failbit);
- if (is >> c && c == ')')
- return is;
- else
- is.setstate(std::ios::badbit);
- }
- else
- is.putback(c);
- }
- is.setstate(std::ios::failbit);
- return is;
-}
-
-
-///////////////////////////////////////////////////////////////////////////
-// //
-// Boost library additions //
-// //
-///////////////////////////////////////////////////////////////////////////
-
-#ifdef BOOST_SHARED_PTR_HPP_INCLUDED
-
-// enhancements to boost::shared_ptr so it can be used with hash
-//
-namespace std {
- template <typename T> struct equal_to<boost::shared_ptr<T> >
- : public binary_function<boost::shared_ptr<T>, boost::shared_ptr<T>, bool> {
- bool operator() (const boost::shared_ptr<T>& p1, const boost::shared_ptr<T>& p2) const {
- return equal_to<T*>()(p1.get(), p2.get());
- }
- };
-} // namespace std
-
-//namespace EXT_NAMESPACE {
-namespace std {
- template <typename T> struct hash<boost::shared_ptr<T> > {
- size_t operator() (const boost::shared_ptr<T>& a) const {
- return hash<T*>()(a.get());
- }
- };
-} // namespace ext
-
-template <typename T>
-inline std::ostream& operator<< (std::ostream& os, const boost::shared_ptr<T>& sp)
-{
- return os << sp.get();
-}
-
-#endif // BOOST_SHARED_PTR_HPP_INCLUDED
-
-struct resource_usage { };
-
-#ifndef __i386
-inline std::ostream& operator<< (std::ostream& os, resource_usage r)
-{
- return os;
-}
-#else // Assume we are on a 586 linux
-inline std::ostream& operator<< (std::ostream& os, resource_usage r)
-{
- FILE* fp = fopen("/proc/self/stat", "r");
- assert(fp);
- int utime;
- int stime;
- unsigned int vsize;
- unsigned int rss;
- int result =
- fscanf(fp, "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %d %d %*d %*d %*d %*d"
- "%*u %*u %*d %u %u", &utime, &stime, &vsize, &rss);
- assert(result == 4);
- fclose(fp);
- // s << "utime = " << utime << ", stime = " << stime << ", vsize = " << vsize << ", rss = " << rss
- ;
- // return s << "utime = " << utime << ", vsize = " << vsize;
- return os << "utime " << float(utime)/1.0e2 << "s, vsize "
- << float(vsize)/1048576.0 << " Mb.";
-}
-#endif
-
-//! A default_value_type{} object is used to read an object from a stream,
-//! assigning a default value if the read fails. Users should not need to
-//! construct such objects, but should use default_value() instead.
-//
-template <typename object_type, typename default_type>
-struct default_value_type {
- object_type& object;
- const default_type defaultvalue;
- default_value_type(object_type& object, const default_type defaultvalue)
- : object(object), defaultvalue(defaultvalue) { }
-};
-
-//! default_value() is used to read an object from a stream, assigning a
-//! default value if the read fails. It returns a default_value_type{}
-//! object, which does the actual reading.
-//
-template <typename object_type, typename default_type>
-default_value_type<object_type,default_type>
-default_value(object_type& object, const default_type defaultvalue=default_type()) {
- return default_value_type<object_type,default_type>(object, defaultvalue);
-}
-
-//! This version of operator>>() reads default_value_type{} from an input stream.
-//
-template <typename object_type, typename default_type>
-std::istream& operator>> (std::istream& is,
- default_value_type<object_type, default_type> dv) {
- if (is) {
- if (is >> dv.object)
- ;
- else {
- is.clear(is.rdstate() & ~std::ios::failbit); // clear failbit
- dv.object = dv.defaultvalue;
- }
- }
- return is;
-}
-
-// inline F random1() { return rand()/(RAND_MAX+1.0); }
-inline F random1() { return mt_genrand_res53(); }
-
-#endif // UTILITY_H
diff --git a/gi/pyp-topics/src/workers.hh b/gi/pyp-topics/src/workers.hh
deleted file mode 100644
index 95b18947..00000000
--- a/gi/pyp-topics/src/workers.hh
+++ /dev/null
@@ -1,275 +0,0 @@
-/**
- Basic thread-pool tools using Boost.Thread.
- (Jan Botha, 7/2010)
-
- --Simple usage--
- Use SimpleWorker.
- Example, call a function that returns an int in a new thread:
- typedef boost::function<int()> JobType;
- JobType job = boost::bind(funcname);
- //or boost::bind(&class::funcname, this) for a member function
- SimpleWorker<JobType, int> worker(job);
- int result = worker.getResult(); //blocks until result is ready
-
- --Extended usage--
- Use WorkerPool, which uses Queuemt (a synchronized queue) and Worker.
- Example:
- (same context and typedef
- WorkerPool<JobType, int> pool(num_threads);
- JobType job = ...
- pool.addJob(job);
- ...
- pool.get_result(); //blocks until all workers are done, returns the some of their results.
-
- Jobs added to a WorkerPool need to be the same type. A WorkerPool instance should not be reused (e.g. adding jobs) after calling get_result().
-*/
-
-#ifndef WORKERS_HH
-#define WORKERS_HH
-
-#include <iostream>
-#include <boost/bind.hpp>
-#include <boost/function.hpp>
-#include <queue>
-#include <boost/ptr_container/ptr_vector.hpp>
-#include <boost/thread/thread.hpp>
-#include <boost/thread/mutex.hpp>
-#include <boost/thread/shared_mutex.hpp>
-#include <boost/thread/future.hpp>
-#include <boost/thread/condition.hpp>
-
-#include <boost/date_time/posix_time/posix_time_types.hpp>
-#include "timing.h"
-
-/** Implements a synchronized queue*/
-template<typename J>
-class Queuemt
-{
-
-public:
- boost::condition_variable_any cond;
- const bool& running;
-
- Queuemt() { }
- Queuemt(const bool& running) : running(running), maxsize(0), qsize(0)
- {
- }
-
- ~Queuemt() {
- }
-
- J pop()
- {
- J job;
- {
- boost::unique_lock<boost::shared_mutex> qlock(q_mutex);
- while (running && qsize == 0)
- cond.wait(qlock);
-
- if (qsize > 0)
- {
- job = q.front();
- q.pop();
- --qsize;
- }
- }
- if (job)
- cond.notify_one();
- return job;
-
- }
-
- void push(J job)
- {
- {
- boost::unique_lock<boost::shared_mutex> lock(q_mutex);
- q.push(job);
- ++qsize;
- }
- if (qsize > maxsize)
- maxsize = qsize;
-
- cond.notify_one();
- }
-
- int getMaxsize()
- {
- return maxsize;
- }
-
- int size()
- {
- return qsize;
- }
-
-private:
- boost::shared_mutex q_mutex;
- std::queue<J> q;
- int maxsize;
- volatile int qsize;
-};
-
-
-template<typename J, typename R>
-class Worker
-{
-typedef boost::packaged_task<R> PackagedTask;
-public:
- Worker(Queuemt<J>& queue, int id, int num_workers) :
- q(queue), tasktime(0.0), id(id), num_workers(num_workers)
- {
- PackagedTask task(boost::bind(&Worker<J, R>::run, this));
- future = task.get_future();
- boost::thread t(boost::move(task));
- }
-
- R run() //this is called upon thread creation
- {
- R wresult = 0;
- while (isRunning())
- {
- J job = q.pop();
-
- if (job)
- {
- timer.Reset();
- wresult += job();
- tasktime += timer.Elapsed();
- }
- }
- return wresult;
- }
-
- R getResult()
- {
- if (!future.is_ready())
- future.wait();
- assert(future.is_ready());
- return future.get();
- }
-
- double getTaskTime()
- {
- return tasktime;
- }
-
-private:
-
- Queuemt<J>& q;
-
- boost::unique_future<R> future;
-
- bool isRunning()
- {
- return q.running || q.size() > 0;
- }
-
- Timer timer;
- double tasktime;
- int id;
- int num_workers;
-};
-
-template<typename J, typename R>
-class WorkerPool
-{
-typedef boost::packaged_task<R> PackagedTask;
-typedef Worker<J,R> WJR;
-typedef boost::ptr_vector<WJR> WorkerVector;
-public:
-
- WorkerPool(int num_workers)
- {
- q.reset(new Queuemt<J>(running));
- running = true;
- for (int i = 0; i < num_workers; ++i)
- workers.push_back( new Worker<J, R>(*q, i, num_workers) );
- }
-
- ~WorkerPool()
- {
- }
-
- R get_result()
- {
- running = false;
- q->cond.notify_all();
- R tmp = 0;
- double tasktime = 0.0;
- for (typename WorkerVector::iterator it = workers.begin(); it != workers.end(); it++)
- {
- R res = it->getResult();
- tmp += res;
- //std::cerr << "tasktime: " << it->getTaskTime() << std::endl;
- tasktime += it->getTaskTime();
- }
-// std::cerr << " maxQ = " << q->getMaxsize() << std::endl;
- return tmp;
- }
-
- void addJob(J job)
- {
- q->push(job);
- }
-
-private:
-
- WorkerVector workers;
-
- boost::shared_ptr<Queuemt<J> > q;
-
- bool running;
-};
-
-///////////////////
-template <typename J, typename R>
-class SimpleWorker
-{
-typedef boost::packaged_task<R> PackagedTask;
-public:
- SimpleWorker(J& job) : job(job), tasktime(0.0)
- {
- PackagedTask task(boost::bind(&SimpleWorker<J, R>::run, this));
- future = task.get_future();
- boost::thread t(boost::move(task));
- }
-
- R run() //this is called upon thread creation
- {
- R wresult = 0;
-
- assert(job);
- timer.Reset();
- wresult = job();
- tasktime = timer.Elapsed();
- std::cerr << tasktime << " s" << std::endl;
- return wresult;
- }
-
- R getResult()
- {
- if (!future.is_ready())
- future.wait();
- assert(future.is_ready());
- return future.get();
- }
-
- double getTaskTime()
- {
- return tasktime;
- }
-
-private:
-
- J job;
-
- boost::unique_future<R> future;
-
- Timer timer;
- double tasktime;
-
-};
-
-
-
-#endif
diff --git a/gi/scripts/buck2utf8.pl b/gi/scripts/buck2utf8.pl
deleted file mode 100755
index 1acfae8d..00000000
--- a/gi/scripts/buck2utf8.pl
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/usr/bin/perl -w
-use strict;
-use utf8;
-binmode(STDOUT, ":utf8");
-while(<>) {
- chomp;
- my @words = split /\s+/;
- for my $w (@words) {
- $_ = $w;
- if ($w =~ /^__NTK__/o) {
- s/__NTK__//go;
- next if /^$/;
- print STDOUT "$_ ";
- next;
- }
-s/tR/\x{0679}/g; # retroflex t
-s/dR/\x{0688}/g; # retroflex d
-s/rR/\x{0691}/g; # retroflex r
-s/p/\x{067E}/g; # peh
-s/c/\x{0686}/g; # tcheh
-s/g/\x{06AF}/g; # geh (G=ghain)
-s/@/\x{06BE}/g; # heh doachashmee
-s/h'/\x{06c2}/g; # heh goal + hamza
-s/h/\x{06c1}/g; # heh goal
-s/J/\x{0698}/g; # zheh (rare, usually persian loan words)
-s/k/\x{06A9}/g; # k
-s/Y'/\x{06d3}/g; # yeh barree + hamza above (ligature)
-s/y/\x{06cc}/g; # same as ya' in arabic
-s/Y/\x{06d2}/g; # yeh barree
-s/N/\x{06BA}/g; # Ghunna
-
- s/\'/\x{0621}/g;
- s/\|/\x{0622}/g;
- s/\>/\x{0623}/g;
- s/\&/\x{0624}/g;
- s/\</\x{0625}/g;
- s/\}/\x{0626}/g;
- s/A/\x{0627}/g;
- s/b/\x{0628}/g;
- s/t/\x{062A}/g;
- s/v/\x{062B}/g;
- s/j/\x{062C}/g;
- s/H/\x{062D}/g;
- s/x/\x{062E}/g;
- s/d/\x{062F}/g;
- s/\*/\x{0630}/g;
- s/r/\x{0631}/g;
- s/z/\x{0632}/g;
- s/s/\x{0633}/g;
- s/\$/\x{0634}/g;
- s/S/\x{0635}/g;
- s/D/\x{0636}/g;
- s/T/\x{0637}/g;
- s/Z/\x{0638}/g;
- s/E/\x{0639}/g;
- s/g/\x{063A}/g;
- s/_/\x{0640}/g;
- s/f/\x{0641}/g;
- s/q/\x{0642}/g;
- s/k/\x{0643}/g;
- s/l/\x{0644}/g;
- s/m/\x{0645}/g;
- s/n/\x{0646}/g;
- s/h/\x{0647}/g;
- s/w/\x{0648}/g;
- s/Y/\x{0649}/g;
- s/y/\x{064A}/g;
- s/F/\x{064B}/g;
- s/N/\x{064C}/g;
- s/K/\x{064D}/g;
- s/a/\x{064E}/g;
- s/u/\x{064F}/g;
- s/i/\x{0650}/g;
- s/\~/\x{0651}/g;
- s/o/\x{0652}/g;
- s/\`/\x{0670}/g;
- s/\{/\x{0671}/g;
- s/P/\x{067E}/g;
- s/J/\x{0686}/g;
- s/V/\x{06A4}/g;
- s/G/\x{06AF}/g;
-
-
-print STDOUT "$_ ";
- }
- print STDOUT "\n";
-}