From 0da1f6de1b33bbff5cb99b1938bb07d050479f10 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Wed, 14 Dec 2011 21:02:50 -0800 Subject: random incomplete metric stuff, including string subsequence kernel impl --- utils/kernel_string_subseq.h | 51 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 utils/kernel_string_subseq.h (limited to 'utils') diff --git a/utils/kernel_string_subseq.h b/utils/kernel_string_subseq.h new file mode 100644 index 00000000..516e8b89 --- /dev/null +++ b/utils/kernel_string_subseq.h @@ -0,0 +1,51 @@ +#ifndef _KERNEL_STRING_SUBSEQ_H_ +#define _KERNEL_STRING_SUBSEQ_H_ + +#include +#include +#include + +template +float ssk(const T* s, const size_t s_size, const T* t, const size_t t_size, const float lambda) { + assert(N > 0); + boost::multi_array kp(boost::extents[N + 1][s_size + 1][t_size + 1]); + const float l2 = lambda * lambda; + for (unsigned j = 0; j < s_size; ++j) + for (unsigned k = 0; k < t_size; ++k) + kp[0][j][k] = 1.0f; + for (unsigned i = 0; i < N; ++i) { + for (unsigned j = 0; j < s_size; ++j) { + float kpp = 0.0f; + for (unsigned k = 0; k < t_size; ++k) { + kpp = lambda * (kpp + lambda * (s[j]==t[k]) * kp[i][j][k]); + kp[i + 1][j + 1][k + 1] = lambda * kp[i + 1][j][k + 1] + kpp; + } + } + } + float kn = 0.0f; + for (int i = 0; i < N; ++i) + for (int j = 0; j < s_size; ++j) + for (int k = 0; k < t_size; ++k) + kn += l2 * (s[j] == t[k]) * kp[i][j][k]; + return kn; +} + +template +float ssk(const std::vector& s, const std::vector& t, const float lambda) { + float kst = ssk(&s[0], s.size(), &t[0], t.size(), lambda); + if (!kst) return 0.0f; + float kss = ssk(&s[0], s.size(), &s[0], s.size(), lambda); + float ktt = ssk(&t[0], t.size(), &t[0], t.size(), lambda); + return kst / std::sqrt(kss * ktt); +} + +template +float ssk(const std::string& s, const std::string& t, const float lambda) { + float kst = ssk(&s[0], s.size(), &t[0], t.size(), lambda); + if (!kst) return 0.0f; + float kss = ssk(&s[0], s.size(), &s[0], s.size(), lambda); + float ktt = ssk(&t[0], t.size(), &t[0], t.size(), lambda); + return kst / std::sqrt(kss * ktt); +} + +#endif -- cgit v1.2.3 From 2eb3bb96c6f780c477585b33273fc0c0d56c80e4 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 20 Dec 2011 15:51:11 -0500 Subject: new scorer interface is implemented, but not used --- mteval/Makefile.am | 2 +- mteval/ns.cc | 67 ++++++++++++++++++++------ mteval/ns.h | 23 +++++---- mteval/ns_comb.cc | 87 +++++++++++++++++++++++++++++++++ mteval/ns_comb.h | 19 ++++++++ mteval/ns_ext.cc | 130 ++++++++++++++++++++++++++++++++++++++++++++++++++ mteval/ns_ext.h | 21 ++++++++ mteval/ns_ter.cc | 126 ++++++++++-------------------------------------- mteval/ns_ter.h | 1 + mteval/scorer_test.cc | 12 +++-- utils/stringlib.h | 7 +++ 11 files changed, 362 insertions(+), 133 deletions(-) create mode 100644 mteval/ns_comb.cc create mode 100644 mteval/ns_comb.h create mode 100644 mteval/ns_ext.cc create mode 100644 mteval/ns_ext.h (limited to 'utils') diff --git a/mteval/Makefile.am b/mteval/Makefile.am index 95845090..6679d949 100644 --- a/mteval/Makefile.am +++ b/mteval/Makefile.am @@ -10,7 +10,7 @@ endif noinst_LIBRARIES = libmteval.a -libmteval_a_SOURCES = ter.cc comb_scorer.cc aer_scorer.cc scorer.cc external_scorer.cc +libmteval_a_SOURCES = ter.cc comb_scorer.cc aer_scorer.cc scorer.cc external_scorer.cc ns.cc ns_ter.cc ns_ext.cc ns_comb.cc fast_score_SOURCES = fast_score.cc fast_score_LDADD = libmteval.a $(top_srcdir)/utils/libutils.a -lz diff --git a/mteval/ns.cc b/mteval/ns.cc index 1045a51f..6139757d 100644 --- a/mteval/ns.cc +++ b/mteval/ns.cc @@ -1,5 +1,7 @@ #include "ns.h" #include "ns_ter.h" +#include "ns_ext.h" +#include "ns_comb.h" #include #include @@ -7,6 +9,9 @@ #include #include +#include "tdict.h" +#include "stringlib.h" + using namespace std; using boost::shared_ptr; @@ -19,6 +24,7 @@ struct DefaultSegmentEvaluator : public SegmentEvaluator { DefaultSegmentEvaluator(const vector >& refs, const EvaluationMetric* em) : refs_(refs), em_(em) {} void Evaluate(const vector& hyp, SufficientStats* out) const { em_->ComputeSufficientStatistics(hyp, refs_, out); + out->id_ = em_->MetricId(); } const vector > refs_; const EvaluationMetric* em_; @@ -28,6 +34,11 @@ shared_ptr EvaluationMetric::CreateSegmentEvaluator(const vect return shared_ptr(new DefaultSegmentEvaluator(refs, this)); } +#define MAX_SS_VECTOR_SIZE 50 +unsigned EvaluationMetric::SufficientStatisticsVectorSize() const { + return MAX_SS_VECTOR_SIZE; +} + void EvaluationMetric::ComputeSufficientStatistics(const vector&, const vector >&, SufficientStats*) const { @@ -35,6 +46,12 @@ void EvaluationMetric::ComputeSufficientStatistics(const vector&, abort(); } +string EvaluationMetric::DetailedScore(const SufficientStats& stats) const { + ostringstream os; + os << MetricId() << "=" << ComputeScore(stats); + return os.str(); +} + enum BleuType { IBM, Koehn, NIST }; template struct BleuSegmentEvaluator : public SegmentEvaluator { @@ -57,7 +74,7 @@ struct BleuSegmentEvaluator : public SegmentEvaluator { void Evaluate(const vector& hyp, SufficientStats* out) const { out->fields.resize(N + N + 2); - out->evaluation_metric = evaluation_metric; + out->id_ = evaluation_metric->MetricId(); for (unsigned i = 0; i < N+N+2; ++i) out->fields[i] = 0; ComputeNgramStats(hyp, &out->fields[0], &out->fields[N], true); @@ -157,7 +174,12 @@ struct BleuSegmentEvaluator : public SegmentEvaluator { template struct BleuMetric : public EvaluationMetric { BleuMetric() : EvaluationMetric("IBM_BLEU") {} - float ComputeScore(const SufficientStats& stats) const { + unsigned SufficientStatisticsVectorSize() const { return N*2 + 2; } + shared_ptr CreateSegmentEvaluator(const vector >& refs) const { + return shared_ptr(new BleuSegmentEvaluator(refs, this)); + } + float ComputeBreakdown(const SufficientStats& stats, float* bp, vector* out) const { + if (out) { out->clear(); } float log_bleu = 0; int count = 0; for (int i = 0; i < N; ++i) { @@ -166,7 +188,7 @@ struct BleuMetric : public EvaluationMetric { // smooth bleu if (!cor_count) { cor_count = 0.01; } float lprec = log(cor_count) - log(stats.fields[i+N]); // log(hyp_ngram_counts[i]); - // if (precs) precs->push_back(exp(lprec)); + if (out) out->push_back(exp(lprec)); log_bleu += lprec; ++count; } @@ -178,32 +200,51 @@ struct BleuMetric : public EvaluationMetric { if (hyp_len < ref_len) lbp = (hyp_len - ref_len) / hyp_len; log_bleu += lbp; - //if (bp) *bp = exp(lbp); + if (bp) *bp = exp(lbp); return exp(log_bleu); } - shared_ptr CreateSegmentEvaluator(const vector >& refs) const { - return shared_ptr(new BleuSegmentEvaluator(refs, this)); + string DetailedScore(const SufficientStats& stats) const { + char buf[2000]; + vector precs(N); + float bp; + float bleu = ComputeBreakdown(stats, &bp, &precs); + sprintf(buf, "BLEU = %.2f, %.1f|%.1f|%.1f|%.1f (brev=%.3f)", + bleu*100.0, + precs[0]*100.0, + precs[1]*100.0, + precs[2]*100.0, + precs[3]*100.0, + bp); + return buf; + } + float ComputeScore(const SufficientStats& stats) const { + return ComputeBreakdown(stats, NULL, NULL); } }; -EvaluationMetric* EvaluationMetric::Instance(const string& metric_id) { +EvaluationMetric* EvaluationMetric::Instance(const string& imetric_id) { static bool is_first = true; if (is_first) { instances_["NULL"] = NULL; is_first = false; } + const string metric_id = UppercaseString(imetric_id); map::iterator it = instances_.find(metric_id); if (it == instances_.end()) { EvaluationMetric* m = NULL; - if (metric_id == "IBM_BLEU") { + if (metric_id == "IBM_BLEU") { m = new BleuMetric<4, IBM>; } else if (metric_id == "NIST_BLEU") { m = new BleuMetric<4, NIST>; - } else if (metric_id == "Koehn_BLEU") { + } else if (metric_id == "KOEHN_BLEU") { m = new BleuMetric<4, Koehn>; } else if (metric_id == "TER") { m = new TERMetric; + } else if (metric_id == "METEOR") { + m = new ExternalMetric("METEOR", "java -Xmx1536m -jar /Users/cdyer/software/meteor/meteor-1.3.jar - - -mira -lower -t tune -l en"); + } else if (metric_id.find("COMB:") == 0) { + m = new CombinationMetric(metric_id); } else { cerr << "Implement please: " << metric_id << endl; abort(); @@ -220,9 +261,7 @@ EvaluationMetric* EvaluationMetric::Instance(const string& metric_id) { SufficientStats::SufficientStats(const string& encoded) { istringstream is(encoded); - string type; - is >> type; - evaluation_metric = EvaluationMetric::Instance(type); + is >> id_; float val; while(is >> val) fields.push_back(val); @@ -230,8 +269,8 @@ SufficientStats::SufficientStats(const string& encoded) { void SufficientStats::Encode(string* out) const { ostringstream os; - if (evaluation_metric) - os << evaluation_metric->MetricId(); + if (id_.size() > 0) + os << id_; else os << "NULL"; for (unsigned i = 0; i < fields.size(); ++i) diff --git a/mteval/ns.h b/mteval/ns.h index f19b7509..622265db 100644 --- a/mteval/ns.h +++ b/mteval/ns.h @@ -7,18 +7,15 @@ #include #include "wordid.h" -class EvaluationMetric; - class SufficientStats { public: - SufficientStats() : evaluation_metric() {} + SufficientStats() : id_() {} explicit SufficientStats(const std::string& encoded); - explicit SufficientStats(const EvaluationMetric* s) : evaluation_metric(s) {} - SufficientStats(const EvaluationMetric* s, const std::vector& f) : - evaluation_metric(s), fields(f) {} + SufficientStats(const std::string& mid, const std::vector& f) : + id_(mid), fields(f) {} SufficientStats& operator+=(const SufficientStats& delta) { - if (delta.evaluation_metric) evaluation_metric = delta.evaluation_metric; + if (id_.empty() && delta.id_.size()) id_ = delta.id_; if (fields.size() != delta.fields.size()) fields.resize(std::max(fields.size(), delta.fields.size())); for (unsigned i = 0; i < delta.fields.size(); ++i) @@ -26,7 +23,7 @@ class SufficientStats { return *this; } SufficientStats& operator-=(const SufficientStats& delta) { - if (delta.evaluation_metric) evaluation_metric = delta.evaluation_metric; + if (id_.empty() && delta.id_.size()) id_ = delta.id_; if (fields.size() != delta.fields.size()) fields.resize(std::max(fields.size(), delta.fields.size())); for (unsigned i = 0; i < delta.fields.size(); ++i) @@ -53,7 +50,7 @@ class SufficientStats { } void Encode(std::string* out) const; - const EvaluationMetric* evaluation_metric; + std::string id_; std::vector fields; }; @@ -73,13 +70,13 @@ struct SegmentEvaluator { }; // Instructions for implementing a new metric -// Override MetricId() and give the metric a unique string name (no spaces) // To Instance(), add something that creates the metric +// Implement ComputeScore(const SufficientStats& stats) const; // Implement ONE of the following: // 1) void ComputeSufficientStatistics(const std::vector >& refs, SufficientStats* out) const; // 2) a new SegmentEvaluator class AND CreateSegmentEvaluator(const std::vector >& refs) const; -// The later (#2) is only used when it is necessary to precompute per-segment data from a set of refs -// Implement ComputeScore(const SufficientStats& stats) const; +// [The later (#2) is only used when it is necessary to precompute per-segment data from a set of refs] +// OPTIONAL: Override SufficientStatisticsVectorSize() if it is easy to do so class EvaluationMetric { public: static EvaluationMetric* Instance(const std::string& metric_id = "IBM_BLEU"); @@ -91,7 +88,9 @@ class EvaluationMetric { public: const std::string& MetricId() const { return name_; } + virtual unsigned SufficientStatisticsVectorSize() const; virtual float ComputeScore(const SufficientStats& stats) const = 0; + virtual std::string DetailedScore(const SufficientStats& stats) const; virtual boost::shared_ptr CreateSegmentEvaluator(const std::vector >& refs) const; virtual void ComputeSufficientStatistics(const std::vector& hyp, const std::vector >& refs, diff --git a/mteval/ns_comb.cc b/mteval/ns_comb.cc new file mode 100644 index 00000000..41c634cd --- /dev/null +++ b/mteval/ns_comb.cc @@ -0,0 +1,87 @@ +#include "ns_comb.h" + +#include + +#include "stringlib.h" + +using namespace std; + +// e.g. COMB:IBM_BLEU=0.5;TER=0.5 +CombinationMetric::CombinationMetric(const std::string& cmd) : + EvaluationMetric(cmd), + total_size() { + if (cmd.find("COMB:") != 0 || cmd.size() < 9) { + cerr << "Error in combination metric specifier: " << cmd << endl; + exit(1); + } + string mix = cmd.substr(5); + vector comps; + Tokenize(cmd.substr(5), ';', &comps); + if(comps.size() < 2) { + cerr << "Error in combination metric specifier: " << cmd << endl; + exit(1); + } + vector cwpairs; + for (unsigned i = 0; i < comps.size(); ++i) { + Tokenize(comps[i], '=', &cwpairs); + if (cwpairs.size() != 2) { cerr << "Error in combination metric specifier: " << cmd << endl; exit(1); } + metrics.push_back(EvaluationMetric::Instance(cwpairs[0])); + coeffs.push_back(atof(cwpairs[1].c_str())); + offsets.push_back(total_size); + total_size += metrics.back()->SufficientStatisticsVectorSize(); + cerr << (i > 0 ? " + " : "( ") << coeffs.back() << " * " << cwpairs[0]; + } + cerr << " )\n"; +} + +struct CombinationSegmentEvaluator : public SegmentEvaluator { + CombinationSegmentEvaluator(const string& id, + const vector >& refs, + const vector& metrics, + const vector& offsets, + const unsigned ts) : id_(id), offsets_(offsets), total_size_(ts), component_evaluators_(metrics.size()) { + for (unsigned i = 0; i < metrics.size(); ++i) + component_evaluators_[i] = metrics[i]->CreateSegmentEvaluator(refs); + } + virtual void Evaluate(const std::vector& hyp, SufficientStats* out) const { + out->id_ = id_; + out->fields.resize(total_size_); + for (unsigned i = 0; i < component_evaluators_.size(); ++i) { + SufficientStats t; + component_evaluators_[i]->Evaluate(hyp, &t); + for (unsigned j = 0; j < t.fields.size(); ++j) { + unsigned op = j + offsets_[i]; + assert(op < out->fields.size()); + out->fields[op] = t[j]; + } + } + } + const string& id_; + const vector& offsets_; + const unsigned total_size_; + vector > component_evaluators_; +}; + +boost::shared_ptr CombinationMetric::CreateSegmentEvaluator(const std::vector >& refs) const { + boost::shared_ptr res; + res.reset(new CombinationSegmentEvaluator(MetricId(), refs, metrics, offsets, total_size)); + return res; +} + +float CombinationMetric::ComputeScore(const SufficientStats& stats) const { + float tot = 0; + for (unsigned i = 0; i < metrics.size(); ++i) { + SufficientStats t; + unsigned next = total_size; + if (i + 1 < offsets.size()) next = offsets[i+1]; + for (unsigned j = offsets[i]; j < next; ++j) + t.fields.push_back(stats[j]); + tot += metrics[i]->ComputeScore(t) * coeffs[i]; + } + return tot; +} + +unsigned CombinationMetric::SufficientStatisticsVectorSize() const { + return total_size; +} + diff --git a/mteval/ns_comb.h b/mteval/ns_comb.h new file mode 100644 index 00000000..140e7e6a --- /dev/null +++ b/mteval/ns_comb.h @@ -0,0 +1,19 @@ +#ifndef _NS_COMB_H_ +#define _NS_COMB_H_ + +#include "ns.h" + +class CombinationMetric : public EvaluationMetric { + public: + CombinationMetric(const std::string& cmd); + virtual boost::shared_ptr CreateSegmentEvaluator(const std::vector >& refs) const; + virtual float ComputeScore(const SufficientStats& stats) const; + virtual unsigned SufficientStatisticsVectorSize() const; + private: + std::vector metrics; + std::vector coeffs; + std::vector offsets; + unsigned total_size; +}; + +#endif diff --git a/mteval/ns_ext.cc b/mteval/ns_ext.cc new file mode 100644 index 00000000..956708af --- /dev/null +++ b/mteval/ns_ext.cc @@ -0,0 +1,130 @@ +#include "ns_ext.h" + +#include // popen +#include +#include +#include +#include +#include +#include + +#include "stringlib.h" +#include "tdict.h" + +using namespace std; + +struct NScoreServer { + NScoreServer(const std::string& cmd); + ~NScoreServer(); + + float ComputeScore(const std::vector& fields); + void Evaluate(const std::vector >& refs, const std::vector& hyp, std::vector* fields); + + private: + void RequestResponse(const std::string& request, std::string* response); + int p2c[2]; + int c2p[2]; +}; + +NScoreServer::NScoreServer(const string& cmd) { + cerr << "Invoking " << cmd << " ..." << endl; + if (pipe(p2c) < 0) { perror("pipe"); exit(1); } + if (pipe(c2p) < 0) { perror("pipe"); exit(1); } + pid_t cpid = fork(); + if (cpid < 0) { perror("fork"); exit(1); } + if (cpid == 0) { // child + close(p2c[1]); + close(c2p[0]); + dup2(p2c[0], 0); + close(p2c[0]); + dup2(c2p[1], 1); + close(c2p[1]); + cerr << "Exec'ing from child " << cmd << endl; + vector vargs; + SplitOnWhitespace(cmd, &vargs); + const char** cargv = static_cast(malloc(sizeof(const char*) * vargs.size())); + for (unsigned i = 1; i < vargs.size(); ++i) cargv[i-1] = vargs[i].c_str(); + cargv[vargs.size() - 1] = NULL; + execvp(vargs[0].c_str(), (char* const*)cargv); + } else { // parent + close(c2p[1]); + close(p2c[0]); + } + string dummy; + RequestResponse("SCORE ||| Reference initialization string . ||| Testing initialization string .", &dummy); + assert(dummy.size() > 0); + cerr << "Connection established.\n"; +} + +NScoreServer::~NScoreServer() { + // TODO close stuff, join stuff +} + +float NScoreServer::ComputeScore(const vector& fields) { + ostringstream os; + os << "EVAL |||"; + for (unsigned i = 0; i < fields.size(); ++i) + os << ' ' << fields[i]; + string sres; + RequestResponse(os.str(), &sres); + return strtod(sres.c_str(), NULL); +} + +void NScoreServer::Evaluate(const vector >& refs, const vector& hyp, vector* fields) { + ostringstream os; + os << "SCORE"; + for (unsigned i = 0; i < refs.size(); ++i) { + os << " |||"; + for (unsigned j = 0; j < refs[i].size(); ++j) { + os << ' ' << TD::Convert(refs[i][j]); + } + } + os << " |||"; + for (unsigned i = 0; i < hyp.size(); ++i) { + os << ' ' << TD::Convert(hyp[i]); + } + string sres; + RequestResponse(os.str(), &sres); + istringstream is(sres); + float val; + fields->clear(); + while(is >> val) + fields->push_back(val); +} + +#define MAX_BUF 16000 + +void NScoreServer::RequestResponse(const string& request, string* response) { +// cerr << "@SERVER: " << request << endl; + string x = request + "\n"; + write(p2c[1], x.c_str(), x.size()); + char buf[MAX_BUF]; + size_t n = read(c2p[0], buf, MAX_BUF); + while (n < MAX_BUF && buf[n-1] != '\n') + n += read(c2p[0], &buf[n], MAX_BUF - n); + + buf[n-1] = 0; + if (n < 2) { + cerr << "Malformed response: " << buf << endl; + } + *response = Trim(buf, " \t\n"); +// cerr << "@RESPONSE: '" << *response << "'\n"; +} + +void ExternalMetric::ComputeSufficientStatistics(const std::vector& hyp, + const std::vector >& refs, + SufficientStats* out) const { + eval_server->Evaluate(refs, hyp, &out->fields); +} + +float ExternalMetric::ComputeScore(const SufficientStats& stats) const { + eval_server->ComputeScore(stats.fields); +} + +ExternalMetric::ExternalMetric(const string& metric_name, const std::string& command) : + EvaluationMetric(metric_name), + eval_server(new NScoreServer(command)) {} + +ExternalMetric::~ExternalMetric() { + delete eval_server; +} diff --git a/mteval/ns_ext.h b/mteval/ns_ext.h new file mode 100644 index 00000000..78badb2e --- /dev/null +++ b/mteval/ns_ext.h @@ -0,0 +1,21 @@ +#ifndef _NS_EXTERNAL_SCORER_H_ +#define _NS_EXTERNAL_SCORER_H_ + +#include "ns.h" + +struct NScoreServer; +class ExternalMetric : public EvaluationMetric { + public: + ExternalMetric(const std::string& metricid, const std::string& command); + ~ExternalMetric(); + + virtual void ComputeSufficientStatistics(const std::vector& hyp, + const std::vector >& refs, + SufficientStats* out) const; + virtual float ComputeScore(const SufficientStats& stats) const; + + protected: + NScoreServer* eval_server; +}; + +#endif diff --git a/mteval/ns_ter.cc b/mteval/ns_ter.cc index 14dc6e49..8c969e58 100644 --- a/mteval/ns_ter.cc +++ b/mteval/ns_ter.cc @@ -1,15 +1,11 @@ #include "ns_ter.h" -#include #include #include #include -#include #include #include -#include #include -#include #include "tdict.h" static const bool ter_use_average_ref_len = true; @@ -25,7 +21,7 @@ static const unsigned kDUMMY_LAST_ENTRY = 5; using namespace std; using namespace std::tr1; -#if 0 +namespace NewScorer { struct COSTS { static const float substitution; @@ -82,7 +78,7 @@ class TERScorerImpl { enum TransType { MATCH, SUBSTITUTION, INSERTION, DELETION }; explicit TERScorerImpl(const vector& ref) : ref_(ref) { - for (int i = 0; i < ref.size(); ++i) + for (unsigned i = 0; i < ref.size(); ++i) rwexists_.insert(ref[i]); } @@ -95,7 +91,7 @@ class TERScorerImpl { } private: - vector ref_; + const vector& ref_; set rwexists_; typedef unordered_map, set, boost::hash > > NgramToIntsMap; @@ -421,68 +417,7 @@ class TERScorerImpl { } }; -class TERScore : public ScoreBase { - friend class TERScorer; - - public: - - TERScore() : stats(0,kDUMMY_LAST_ENTRY) {} - float ComputePartialScore() const { return 0.0;} - float ComputeScore() const { - float edits = static_cast(stats[kINSERTIONS] + stats[kDELETIONS] + stats[kSUBSTITUTIONS] + stats[kSHIFTS]); - return edits / static_cast(stats[kREF_WORDCOUNT]); - } - void ScoreDetails(string* details) const; - void PlusPartialEquals(const Score& rhs, int oracle_e_cover, int oracle_f_cover, int src_len){} - void PlusEquals(const Score& delta, const float scale) { - if (scale==1) - stats += static_cast(delta).stats; - if (scale==-1) - stats -= static_cast(delta).stats; - throw std::runtime_error("TERScore::PlusEquals with scale != +-1"); - } - void PlusEquals(const Score& delta) { - stats += static_cast(delta).stats; - } - - ScoreP GetZero() const { - return ScoreP(new TERScore); - } - ScoreP GetOne() const { - return ScoreP(new TERScore); - } - void Subtract(const Score& rhs, Score* res) const { - static_cast(res)->stats = stats - static_cast(rhs).stats; - } - void Encode(std::string* out) const { - ostringstream os; - os << stats[kINSERTIONS] << ' ' - << stats[kDELETIONS] << ' ' - << stats[kSUBSTITUTIONS] << ' ' - << stats[kSHIFTS] << ' ' - << stats[kREF_WORDCOUNT]; - *out = os.str(); - } - bool IsAdditiveIdentity() const { - for (int i = 0; i < kDUMMY_LAST_ENTRY; ++i) - if (stats[i] != 0) return false; - return true; - } - private: - valarray stats; -}; - -ScoreP TERScorer::ScoreFromString(const std::string& data) { - istringstream is(data); - TERScore* r = new TERScore; - is >> r->stats[TERScore::kINSERTIONS] - >> r->stats[TERScore::kDELETIONS] - >> r->stats[TERScore::kSUBSTITUTIONS] - >> r->stats[TERScore::kSHIFTS] - >> r->stats[TERScore::kREF_WORDCOUNT]; - return ScoreP(r); -} - +#if 0 void TERScore::ScoreDetails(std::string* details) const { char buf[200]; sprintf(buf, "TER = %.2f, %3d|%3d|%3d|%3d (len=%d)", @@ -494,54 +429,43 @@ void TERScore::ScoreDetails(std::string* details) const { stats[kREF_WORDCOUNT]); *details = buf; } +#endif -TERScorer::~TERScorer() { - for (vector::iterator i = impl_.begin(); i != impl_.end(); ++i) - delete *i; -} +} // namespace NewScorer -TERScorer::TERScorer(const vector >& refs) : impl_(refs.size()) { +void TERMetric::ComputeSufficientStatistics(const vector& hyp, + const vector >& refs, + SufficientStats* out) const { + out->fields.resize(kDUMMY_LAST_ENTRY); + float best_score = numeric_limits::max(); + unsigned avg_len = 0; for (int i = 0; i < refs.size(); ++i) - impl_[i] = new TERScorerImpl(refs[i]); -} + avg_len += refs[i].size(); + avg_len /= refs.size(); -ScoreP TERScorer::ScoreCCandidate(const vector& hyp) const { - return ScoreP(); -} - -ScoreP TERScorer::ScoreCandidate(const std::vector& hyp) const { - float best_score = numeric_limits::max(); - TERScore* res = new TERScore; - int avg_len = 0; - for (int i = 0; i < impl_.size(); ++i) - avg_len += impl_[i]->GetRefLength(); - avg_len /= impl_.size(); - for (int i = 0; i < impl_.size(); ++i) { + for (int i = 0; i < refs.size(); ++i) { int subs, ins, dels, shifts; - float score = impl_[i]->Calculate(hyp, &subs, &ins, &dels, &shifts); + NewScorer::TERScorerImpl ter(refs[i]); + float score = ter.Calculate(hyp, &subs, &ins, &dels, &shifts); // cerr << "Component TER cost: " << score << endl; if (score < best_score) { - res->stats[TERScore::kINSERTIONS] = ins; - res->stats[TERScore::kDELETIONS] = dels; - res->stats[TERScore::kSUBSTITUTIONS] = subs; - res->stats[TERScore::kSHIFTS] = shifts; + out->fields[kINSERTIONS] = ins; + out->fields[kDELETIONS] = dels; + out->fields[kSUBSTITUTIONS] = subs; + out->fields[kSHIFTS] = shifts; if (ter_use_average_ref_len) { - res->stats[TERScore::kREF_WORDCOUNT] = avg_len; + out->fields[kREF_WORDCOUNT] = avg_len; } else { - res->stats[TERScore::kREF_WORDCOUNT] = impl_[i]->GetRefLength(); + out->fields[kREF_WORDCOUNT] = refs[i].size(); } best_score = score; } } - return ScoreP(res); } -#endif -void TERMetric::ComputeSufficientStatistics(const vector& hyp, - const vector >& refs, - SufficientStats* out) const { - out->fields.resize(kDUMMY_LAST_ENTRY); +unsigned TERMetric::SufficientStatisticsVectorSize() const { + return kDUMMY_LAST_ENTRY; } float TERMetric::ComputeScore(const SufficientStats& stats) const { diff --git a/mteval/ns_ter.h b/mteval/ns_ter.h index bb90f95e..6c020cfa 100644 --- a/mteval/ns_ter.h +++ b/mteval/ns_ter.h @@ -9,6 +9,7 @@ class TERMetric : public EvaluationMetric { TERMetric() : EvaluationMetric("TER") {} public: + virtual unsigned SufficientStatisticsVectorSize() const; virtual void ComputeSufficientStatistics(const std::vector& hyp, const std::vector >& refs, SufficientStats* out) const; diff --git a/mteval/scorer_test.cc b/mteval/scorer_test.cc index 09da250c..73159557 100644 --- a/mteval/scorer_test.cc +++ b/mteval/scorer_test.cc @@ -205,20 +205,22 @@ TEST_F(ScorerTest, Kernel) { } TEST_F(ScorerTest, NewScoreAPI) { - EvaluationMetric* metric = EvaluationMetric::Instance("IBM_BLEU"); + //EvaluationMetric* metric = EvaluationMetric::Instance("IBM_BLEU"); + //EvaluationMetric* metric = EvaluationMetric::Instance("METEOR"); + EvaluationMetric* metric = EvaluationMetric::Instance("COMB:IBM_BLEU=0.5;TER=-0.5"); boost::shared_ptr e1 = metric->CreateSegmentEvaluator(refs0); boost::shared_ptr e2 = metric->CreateSegmentEvaluator(refs1); SufficientStats stats1; - e1->Evaluate(hyp2, &stats1); + e1->Evaluate(hyp1, &stats1); SufficientStats stats2; - e2->Evaluate(hyp1, &stats2); + e2->Evaluate(hyp2, &stats2); stats1 += stats2; string ss; stats1.Encode(&ss); cerr << "SS: " << ss << endl; cerr << metric->ComputeScore(stats1) << endl; - SufficientStats statse("IBM_BLEU 53 32 18 11 65 63 61 59 65 72"); - cerr << metric->ComputeScore(statse) << endl; + //SufficientStats statse("IBM_BLEU 53 32 18 11 65 63 61 59 65 72"); + //cerr << metric->ComputeScore(statse) << endl; } int main(int argc, char **argv) { diff --git a/utils/stringlib.h b/utils/stringlib.h index cafbdac3..f457e1e4 100644 --- a/utils/stringlib.h +++ b/utils/stringlib.h @@ -125,6 +125,13 @@ inline std::string LowercaseString(const std::string& in) { return res; } +inline std::string UppercaseString(const std::string& in) { + std::string res(in.size(),' '); + for (int i = 0; i < in.size(); ++i) + res[i] = toupper(in[i]); + return res; +} + inline int CountSubstrings(const std::string& str, const std::string& sub) { size_t p = 0; int res = 0; -- cgit v1.2.3 From a144fb07effc59a3aa269d7fd5f3d0ab9dfe5e54 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 3 Jan 2012 16:59:11 -0500 Subject: multi-floor chinese restaurant described by wood&teh (2009) --- utils/Makefile.am | 6 +- utils/mfcr.h | 354 +++++++++++++++++++++++++++++++++++++++++++++++++++++ utils/mfcr_test.cc | 72 +++++++++++ 3 files changed, 430 insertions(+), 2 deletions(-) create mode 100644 utils/mfcr.h create mode 100644 utils/mfcr_test.cc (limited to 'utils') diff --git a/utils/Makefile.am b/utils/Makefile.am index df667655..3e559c75 100644 --- a/utils/Makefile.am +++ b/utils/Makefile.am @@ -1,8 +1,8 @@ bin_PROGRAMS = reconstruct_weights -noinst_PROGRAMS = ts phmt -TESTS = ts phmt +noinst_PROGRAMS = ts phmt mfcr_test +TESTS = ts phmt mfcr_test if HAVE_GTEST noinst_PROGRAMS += \ @@ -40,6 +40,8 @@ phmt_SOURCES = phmt.cc ts_SOURCES = ts.cc dict_test_SOURCES = dict_test.cc dict_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) +mfcr_test_SOURCES = mfcr_test.cc +mfcr_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) weights_test_SOURCES = weights_test.cc weights_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) logval_test_SOURCES = logval_test.cc diff --git a/utils/mfcr.h b/utils/mfcr.h new file mode 100644 index 00000000..3eb133fc --- /dev/null +++ b/utils/mfcr.h @@ -0,0 +1,354 @@ +#ifndef _MFCR_H_ +#define _MFCR_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "sampler.h" +#include "slice_sampler.h" + +struct TableCount { + TableCount() : count(), floor() {} + TableCount(int c, int f) : count(c), floor(f) { + assert(f >= 0); + } + int count; // count or delta (may be 0, <0, or >0) + unsigned char floor; // from which floor? +}; + +std::ostream& operator<<(std::ostream& o, const TableCount& tc) { + return o << "[c=" << tc.count << " floor=" << static_cast(tc.floor) << ']'; +} + +// Multi-Floor Chinese Restaurant as proposed by Wood & Teh (AISTATS, 2009) to simulate +// graphical Pitman-Yor processes. +// http://jmlr.csail.mit.edu/proceedings/papers/v5/wood09a/wood09a.pdf +// +// Implementation is based on Blunsom, Cohn, Goldwater, & Johnson (ACL 2009) and code +// referenced therein. +// http://www.aclweb.org/anthology/P/P09/P09-2085.pdf +// +template > +class MFCR { + public: + + MFCR(unsigned num_floors, double d, double alpha) : + num_floors_(num_floors), + num_tables_(), + num_customers_(), + d_(d), + alpha_(alpha), + d_prior_alpha_(std::numeric_limits::quiet_NaN()), + d_prior_beta_(std::numeric_limits::quiet_NaN()), + alpha_prior_shape_(std::numeric_limits::quiet_NaN()), + alpha_prior_rate_(std::numeric_limits::quiet_NaN()) {} + + MFCR(unsigned num_floors, double d_alpha, double d_beta, double alpha_shape, double alpha_rate, double d = 0.9, double alpha = 10.0) : + num_floors_(num_floors), + num_tables_(), + num_customers_(), + d_(d), + alpha_(alpha), + d_prior_alpha_(d_alpha), + d_prior_beta_(d_beta), + alpha_prior_shape_(alpha_shape), + alpha_prior_rate_(alpha_rate) {} + + double d() const { return d_; } + double alpha() const { return alpha_; } + + bool has_d_prior() const { + return !std::isnan(d_prior_alpha_); + } + + bool has_alpha_prior() const { + return !std::isnan(alpha_prior_shape_); + } + + void clear() { + num_tables_ = 0; + num_customers_ = 0; + dish_locs_.clear(); + } + + unsigned num_tables() const { + return num_tables_; + } + + unsigned num_tables(const Dish& dish) const { + const typename std::tr1::unordered_map::const_iterator it = dish_locs_.find(dish); + if (it == dish_locs_.end()) return 0; + return it->second.table_counts_.size(); + } + + // this is not terribly efficient but it should not typically be necessary to execute this query + unsigned num_tables(const Dish& dish, const unsigned floor) const { + const typename std::tr1::unordered_map::const_iterator it = dish_locs_.find(dish); + if (it == dish_locs_.end()) return 0; + unsigned c = 0; + for (typename std::list::const_iterator i = it->second.table_counts_.begin(); + i != it->second.table_counts_.end(); ++i) { + if (i->floor == floor) ++c; + } + return c; + } + + unsigned num_customers() const { + return num_customers_; + } + + unsigned num_customers(const Dish& dish) const { + const typename std::tr1::unordered_map::const_iterator it = dish_locs_.find(dish); + if (it == dish_locs_.end()) return 0; + return it->total_dish_count_; + } + + // returns (delta, floor) indicating whether a new table (delta) was opened and on which floor + TableCount increment(const Dish& dish, const std::vector& p0s, const std::vector& lambdas, MT19937* rng) { + assert(p0s.size() == num_floors_); + assert(lambdas.size() == num_floors_); + + DishLocations& loc = dish_locs_[dish]; + // marg_p0 = marginal probability of opening a new table on any floor with label dish + const double marg_p0 = std::inner_product(p0s.begin(), p0s.end(), lambdas.begin(), 0.0); + assert(marg_p0 <= 1.0); + int floor = -1; + bool share_table = false; + if (loc.total_dish_count_) { + const double p_empty = (alpha_ + num_tables_ * d_) * marg_p0; + const double p_share = (loc.total_dish_count_ - loc.table_counts_.size() * d_); + share_table = rng->SelectSample(p_empty, p_share); + } + if (share_table) { + double r = rng->next() * (loc.total_dish_count_ - loc.table_counts_.size() * d_); + for (typename std::list::iterator ti = loc.table_counts_.begin(); + ti != loc.table_counts_.end(); ++ti) { + r -= ti->count - d_; + if (r <= 0.0) { + ++ti->count; + floor = ti->floor; + break; + } + } + if (r > 0.0) { + std::cerr << "Serious error: r=" << r << std::endl; + Print(&std::cerr); + assert(r <= 0.0); + } + } else { // sit at currently empty table -- must sample what floor + double r = rng->next() * marg_p0; + for (unsigned i = 0; i < p0s.size(); ++i) { + r -= p0s[i] * lambdas[i]; + if (r <= 0.0) { + floor = i; + break; + } + } + assert(floor >= 0); + loc.table_counts_.push_back(TableCount(1, floor)); + ++num_tables_; + } + ++loc.total_dish_count_; + ++num_customers_; + return (share_table ? TableCount(0, floor) : TableCount(1, floor)); + } + + // returns first = -1 or 0, indicating whether a table was closed, and on what floor (second) + TableCount decrement(const Dish& dish, MT19937* rng) { + DishLocations& loc = dish_locs_[dish]; + assert(loc.total_dish_count_); + int floor = -1; + int delta = 0; + if (loc.total_dish_count_ == 1) { + floor = loc.table_counts_.front().floor; + dish_locs_.erase(dish); + --num_tables_; + --num_customers_; + delta = -1; + } else { + // sample customer to remove UNIFORMLY. that is, do NOT use the d + // here. if you do, it will introduce (unwanted) bias! + double r = rng->next() * loc.total_dish_count_; + --loc.total_dish_count_; + --num_customers_; + for (typename std::list::iterator ti = loc.table_counts_.begin(); + ti != loc.table_counts_.end(); ++ti) { + r -= ti->count; + if (r <= 0.0) { + floor = ti->floor; + if ((--ti->count) == 0) { + --num_tables_; + delta = -1; + loc.table_counts_.erase(ti); + } + break; + } + } + if (r > 0.0) { + std::cerr << "Serious error: r=" << r << std::endl; + Print(&std::cerr); + assert(r <= 0.0); + } + } + return TableCount(delta, floor); + } + + double prob(const Dish& dish, const std::vector& p0s, const std::vector& lambdas) const { + assert(p0s.size() == num_floors_); + assert(lambdas.size() == num_floors_); + const double marg_p0 = std::inner_product(p0s.begin(), p0s.end(), lambdas.begin(), 0.0); + assert(marg_p0 <= 1.0); + const typename std::tr1::unordered_map::const_iterator it = dish_locs_.find(dish); + const double r = num_tables_ * d_ + alpha_; + if (it == dish_locs_.end()) { + return r * marg_p0 / (num_customers_ + alpha_); + } else { + return (it->second.total_dish_count_ - d_ * it->second.table_counts_.size() + r * marg_p0) / + (num_customers_ + alpha_); + } + } + + double log_crp_prob() const { + return log_crp_prob(d_, alpha_); + } + + static double log_beta_density(const double& x, const double& alpha, const double& beta) { + assert(x > 0.0); + assert(x < 1.0); + assert(alpha > 0.0); + assert(beta > 0.0); + const double lp = (alpha-1)*log(x)+(beta-1)*log(1-x)+lgamma(alpha+beta)-lgamma(alpha)-lgamma(beta); + return lp; + } + + static double log_gamma_density(const double& x, const double& shape, const double& rate) { + assert(x >= 0.0); + assert(shape > 0.0); + assert(rate > 0.0); + const double lp = (shape-1)*log(x) - shape*log(rate) - x/rate - lgamma(shape); + return lp; + } + + // taken from http://en.wikipedia.org/wiki/Chinese_restaurant_process + // does not include draws from G_w's + double log_crp_prob(const double& d, const double& alpha) const { + double lp = 0.0; + if (has_d_prior()) + lp = log_beta_density(d, d_prior_alpha_, d_prior_beta_); + if (has_alpha_prior()) + lp += log_gamma_density(alpha, alpha_prior_shape_, alpha_prior_rate_); + assert(lp <= 0.0); + if (num_customers_) { + if (d > 0.0) { + const double r = lgamma(1.0 - d); + lp += lgamma(alpha) - lgamma(alpha + num_customers_) + + num_tables_ * log(d) + lgamma(alpha / d + num_tables_) + - lgamma(alpha / d); + assert(std::isfinite(lp)); + for (typename std::tr1::unordered_map::const_iterator it = dish_locs_.begin(); + it != dish_locs_.end(); ++it) { + const DishLocations& cur = it->second; + for (std::list::const_iterator ti = cur.table_counts_.begin(); ti != cur.table_counts_.end(); ++ti) { + lp += lgamma(ti->count - d) - r; + } + } + } else { + assert(!"not implemented yet"); + } + } + assert(std::isfinite(lp)); + return lp; + } + + void resample_hyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) { + assert(has_d_prior() || has_alpha_prior()); + DiscountResampler dr(*this); + ConcentrationResampler cr(*this); + for (int iter = 0; iter < nloop; ++iter) { + if (has_alpha_prior()) { + alpha_ = slice_sampler1d(cr, alpha_, *rng, 0.0, + std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); + } + if (has_d_prior()) { + d_ = slice_sampler1d(dr, d_, *rng, std::numeric_limits::min(), + 1.0, 0.0, niterations, 100*niterations); + } + } + alpha_ = slice_sampler1d(cr, alpha_, *rng, 0.0, + std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); + } + + struct DiscountResampler { + DiscountResampler(const MFCR& crp) : crp_(crp) {} + const MFCR& crp_; + double operator()(const double& proposed_d) const { + return crp_.log_crp_prob(proposed_d, crp_.alpha_); + } + }; + + struct ConcentrationResampler { + ConcentrationResampler(const MFCR& crp) : crp_(crp) {} + const MFCR& crp_; + double operator()(const double& proposed_alpha) const { + return crp_.log_crp_prob(crp_.d_, proposed_alpha); + } + }; + + struct DishLocations { + DishLocations() : total_dish_count_() {} + unsigned total_dish_count_; // customers at all tables with this dish + std::list table_counts_; // list<> gives O(1) deletion and insertion, which we want + // .size() is the number of tables for this dish + }; + + void Print(std::ostream* out) const { + (*out) << "MFCR(d=" << d_ << ",alpha=" << alpha_ << ") customers=" << num_customers_ << std::endl; + for (typename std::tr1::unordered_map::const_iterator it = dish_locs_.begin(); + it != dish_locs_.end(); ++it) { + (*out) << it->first << " (" << it->second.total_dish_count_ << " on " << it->second.table_counts_.size() << " tables): "; + for (typename std::list::const_iterator i = it->second.table_counts_.begin(); + i != it->second.table_counts_.end(); ++i) { + (*out) << " " << *i; + } + (*out) << std::endl; + } + } + + typedef typename std::tr1::unordered_map::const_iterator const_iterator; + const_iterator begin() const { + return dish_locs_.begin(); + } + const_iterator end() const { + return dish_locs_.end(); + } + + unsigned num_floors_; + unsigned num_tables_; + unsigned num_customers_; + std::tr1::unordered_map dish_locs_; + + double d_; + double alpha_; + + // optional beta prior on d_ (NaN if no prior) + double d_prior_alpha_; + double d_prior_beta_; + + // optional gamma prior on alpha_ (NaN if no prior) + double alpha_prior_shape_; + double alpha_prior_rate_; +}; + +template +std::ostream& operator<<(std::ostream& o, const MFCR& c) { + c.Print(&o); + return o; +} + +#endif diff --git a/utils/mfcr_test.cc b/utils/mfcr_test.cc new file mode 100644 index 00000000..7c45a37c --- /dev/null +++ b/utils/mfcr_test.cc @@ -0,0 +1,72 @@ +#include "mfcr.h" + +#include +#include +#include + +#include "sampler.h" + +using namespace std; + +void test_exch(MT19937* rng) { + MFCR crp(2, 0.5, 3.0); + vector lambdas(2); + vector p0s(2); + lambdas[0] = 0.2; + lambdas[1] = 0.8; + p0s[0] = 1.0; + p0s[1] = 1.0; + + double tot = 0; + double tot2 = 0; + double xt = 0; + int cust = 10; + vector hist(cust + 1, 0), hist2(cust + 1, 0); + for (int i = 0; i < cust; ++i) { crp.increment(1, p0s, lambdas, rng); } + const int samples = 100000; + const bool simulate = true; + for (int k = 0; k < samples; ++k) { + if (!simulate) { + crp.clear(); + for (int i = 0; i < cust; ++i) { crp.increment(1, p0s, lambdas, rng); } + } else { + int da = rng->next() * cust; + bool a = rng->next() < 0.45; + if (a) { + for (int i = 0; i < da; ++i) { crp.increment(1, p0s, lambdas, rng); } + for (int i = 0; i < da; ++i) { crp.decrement(1, rng); } + xt += 1.0; + } else { + for (int i = 0; i < da; ++i) { crp.decrement(1, rng); } + for (int i = 0; i < da; ++i) { crp.increment(1, p0s, lambdas, rng); } + } + } + int c = crp.num_tables(1); + ++hist[c]; + tot += c; + int c2 = crp.num_tables(1,0); // tables on floor 0 with dish 1 + ++hist2[c2]; + tot2 += c2; + } + cerr << cust << " = " << crp.num_customers() << endl; + cerr << "P(a) = " << (xt / samples) << endl; + cerr << "E[num tables] = " << (tot / samples) << endl; + double error = fabs((tot / samples) - 6.894); + cerr << " error = " << error << endl; + for (int i = 1; i <= cust; ++i) + cerr << i << ' ' << (hist[i]) << endl; + cerr << "E[num tables on floor 0] = " << (tot2 / samples) << endl; + double error2 = fabs((tot2 / samples) - 1.379); + cerr << " error2 = " << error2 << endl; + for (int i = 1; i <= cust; ++i) + cerr << i << ' ' << (hist2[i]) << endl; + assert(error < 0.05); // these can fail with very low probability + assert(error2 < 0.05); +}; + +int main(int argc, char** argv) { + MT19937 rng; + test_exch(&rng); + return 0; +} + -- cgit v1.2.3 From 203c3c3357b9ed8cfe44932c2bf5ea19eba6238c Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Fri, 27 Jan 2012 13:19:27 -0500 Subject: migration to new metric api for vest, clean up of unsupported/not functional code --- mteval/mbr_kbest.cc | 21 +- utils/fast_sparse_vector.h | 6 + vest/dist-vest.pl | 22 +-- vest/mbr_kbest.cc | 138 ------------- vest/mr_vest_generate_mapper_input.cc | 356 ++++++---------------------------- vest/mr_vest_map.cc | 16 +- 6 files changed, 84 insertions(+), 475 deletions(-) delete mode 100644 vest/mbr_kbest.cc (limited to 'utils') diff --git a/mteval/mbr_kbest.cc b/mteval/mbr_kbest.cc index 64a6a8bf..b5e4750c 100644 --- a/mteval/mbr_kbest.cc +++ b/mteval/mbr_kbest.cc @@ -5,7 +5,7 @@ #include "prob.h" #include "tdict.h" -#include "scorer.h" +#include "ns.h" #include "filelib.h" #include "stringlib.h" @@ -17,7 +17,7 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) { po::options_description opts("Configuration options"); opts.add_options() ("scale,a",po::value()->default_value(1.0), "Posterior scaling factor (alpha)") - ("loss_function,l",po::value()->default_value("bleu"), "Loss function") + ("evaluation_metric,m",po::value()->default_value("ibm_bleu"), "Evaluation metric") ("input,i",po::value()->default_value("-"), "File to read k-best lists from") ("output_list,L", "Show reranked list as output") ("help,h", "Help"); @@ -75,13 +75,14 @@ bool ReadKBestList(istream* in, string* sent_id, vector, pro int main(int argc, char** argv) { po::variables_map conf; InitCommandLine(argc, argv, &conf); - const string metric = conf["loss_function"].as(); + const string smetric = conf["evaluation_metric"].as(); + EvaluationMetric* metric = EvaluationMetric::Instance(smetric); + const bool is_loss = (UppercaseString(smetric) == "TER"); const bool output_list = conf.count("output_list") > 0; const string file = conf["input"].as(); const double mbr_scale = conf["scale"].as(); cerr << "Posterior scaling factor (alpha) = " << mbr_scale << endl; - ScoreType type = ScoreTypeFromString(metric); vector, prob_t> > list; ReadFile rf(file); string sent_id; @@ -99,15 +100,15 @@ int main(int argc, char** argv) { vector mbr_scores(output_list ? list.size() : 0); double mbr_loss = numeric_limits::max(); for (int i = 0 ; i < list.size(); ++i) { - vector > refs(1, list[i].first); - //cerr << i << ": " << list[i].second <<"\t" << TD::GetString(list[i].first) << endl; - ScorerP scorer = SentenceScorer::CreateSentenceScorer(type, refs); + const vector > refs(1, list[i].first); + double wl_acc = 0; for (int j = 0; j < list.size(); ++j) { if (i != j) { - ScoreP s = scorer->ScoreCandidate(list[j].first); - double loss = 1.0 - s->ComputeScore(); - if (type == TER || type == AER) loss = 1.0 - loss; + SufficientStats ss; + metric->ComputeSufficientStatistics(list[j].first, refs, &ss); + double loss = 1.0 - metric->ComputeScore(ss); + if (is_loss) loss = 1.0 - loss; double weighted_loss = loss * (joints[j] / marginal).as_float(); wl_acc += weighted_loss; if ((!output_list) && wl_acc > mbr_loss) break; diff --git a/utils/fast_sparse_vector.h b/utils/fast_sparse_vector.h index 1301581a..17fa47bf 100644 --- a/utils/fast_sparse_vector.h +++ b/utils/fast_sparse_vector.h @@ -178,6 +178,12 @@ class FastSparseVector { T l2norm() const { return sqrt(l2norm_sq()); } + T pnorm(const double p) const { + T sum = T(); + for (const_iterator it = begin(), e = end(); it != e; ++it) + sum += pow(fabs(it->second), p); + return pow(sum, 1.0 / p); + } // if values are binary, gives |A intersect B|/|A union B| template S tanimoto_coef(const FastSparseVector &vec) const { diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl index 8cde748b..1ec8c6b1 100755 --- a/vest/dist-vest.pl +++ b/vest/dist-vest.pl @@ -65,8 +65,6 @@ my $oraclen=0; my $oracleb=20; my $bleu_weight=1; my $use_make = 1; # use make to parallelize line search -my $dirargs=''; -my $density_prune; my $useqsub; my $pass_suffix = ''; my $cpbin=1; @@ -75,7 +73,6 @@ Getopt::Long::Configure("no_auto_abbrev"); if (GetOptions( "decoder=s" => \$decoderOpt, "jobs=i" => \$jobs, - "density-prune=f" => \$density_prune, "dont-clean" => \$disable_clean, "pass-suffix=s" => \$pass_suffix, "dry-run" => \$dryrun, @@ -87,15 +84,7 @@ if (GetOptions( "normalize=s" => \$normalize, "pmem=s" => \$pmem, "cpbin!" => \$cpbin, - "rand-directions=i" => \$rand_directions, - "random_directions=i" => \$rand_directions, - "bleu_weight=s" => \$bleu_weight, - "no-primary!" => \$noprimary, - "max-similarity=s" => \$maxsim, - "oracle-directions=i" => \$oraclen, - "n-oracle=i" => \$oraclen, - "oracle-batch=i" => \$oracleb, - "directions-args=s" => \$dirargs, + "random-directions=i" => \$rand_directions, "ref-files=s" => \$refFiles, "metric=s" => \$metric, "source-file=s" => \$srcFile, @@ -107,10 +96,6 @@ if (GetOptions( exit; } -if (defined $density_prune) { - die "--density_prune n: n must be greater than 1.0\n" unless $density_prune > 1.0; -} - if ($useqsub) { $use_make = 0; die "LocalEnvironment.pm does not have qsub configuration for this host. Cannot run with --qsub!\n" unless has_qsub(); @@ -328,10 +313,7 @@ while (1){ print STDERR "\nGENERATE OPTIMIZATION STRATEGY (OPT-ITERATION $opt_iter/$optimization_iters)\n"; print STDERR unchecked_output("date"); $icc++; - my $nop=$noprimary?"--no_primary":""; - my $targs=$oraclen ? "--decoder_translations='$runFile.gz' ".get_comma_sep_refs('-references',$refFiles):""; - my $bwargs=$bleu_weight!=1 ? "--bleu_weight=$bleu_weight":""; - $cmd="$MAPINPUT -w $inweights -r $dir/hgs $bwargs -s $devSize -d $rand_directions --max_similarity=$maxsim --oracle_directions=$oraclen --oracle_batch=$oracleb $targs $dirargs > $dir/agenda.$im1-$opt_iter"; + $cmd="$MAPINPUT -w $inweights -r $dir/hgs -s $devSize -d $rand_directions > $dir/agenda.$im1-$opt_iter"; print STDERR "COMMAND:\n$cmd\n"; check_call($cmd); check_call("mkdir -p $dir/splag.$im1"); diff --git a/vest/mbr_kbest.cc b/vest/mbr_kbest.cc deleted file mode 100644 index 2867b36b..00000000 --- a/vest/mbr_kbest.cc +++ /dev/null @@ -1,138 +0,0 @@ -#include -#include - -#include - -#include "prob.h" -#include "tdict.h" -#include "scorer.h" -#include "filelib.h" -#include "stringlib.h" - -using namespace std; - -namespace po = boost::program_options; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("scale,a",po::value()->default_value(1.0), "Posterior scaling factor (alpha)") - ("loss_function,l",po::value()->default_value("bleu"), "Loss function") - ("input,i",po::value()->default_value("-"), "File to read k-best lists from") - ("output_list,L", "Show reranked list as output") - ("help,h", "Help"); - po::options_description dcmdline_options; - dcmdline_options.add(opts); - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - bool flag = false; - if (flag || conf->count("help")) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -struct LossComparer { - bool operator()(const pair, double>& a, const pair, double>& b) const { - return a.second < b.second; - } -}; - -bool ReadKBestList(istream* in, string* sent_id, vector, prob_t> >* list) { - static string cache_id; - static pair, prob_t> cache_pair; - list->clear(); - string cur_id; - if (cache_pair.first.size() > 0) { - list->push_back(cache_pair); - cur_id = cache_id; - cache_pair.first.clear(); - } - string line; - string tstr; - while(*in) { - getline(*in, line); - if (line.empty()) continue; - size_t p1 = line.find(" ||| "); - if (p1 == string::npos) { cerr << "Bad format: " << line << endl; abort(); } - size_t p2 = line.find(" ||| ", p1 + 4); - if (p2 == string::npos) { cerr << "Bad format: " << line << endl; abort(); } - size_t p3 = line.rfind(" ||| "); - cache_id = line.substr(0, p1); - tstr = line.substr(p1 + 5, p2 - p1 - 5); - double val = strtod(line.substr(p3 + 5).c_str(), NULL); - TD::ConvertSentence(tstr, &cache_pair.first); - cache_pair.second.logeq(val); - if (cur_id.empty()) cur_id = cache_id; - if (cur_id == cache_id) { - list->push_back(cache_pair); - *sent_id = cur_id; - cache_pair.first.clear(); - } else { break; } - } - return !list->empty(); -} - -int main(int argc, char** argv) { - po::variables_map conf; - InitCommandLine(argc, argv, &conf); - const string metric = conf["loss_function"].as(); - const bool output_list = conf.count("output_list") > 0; - const string file = conf["input"].as(); - const double mbr_scale = conf["scale"].as(); - cerr << "Posterior scaling factor (alpha) = " << mbr_scale << endl; - - ScoreType type = ScoreTypeFromString(metric); - vector, prob_t> > list; - ReadFile rf(file); - string sent_id; - while(ReadKBestList(rf.stream(), &sent_id, &list)) { - vector joints(list.size()); - const prob_t max_score = pow(list.front().second, mbr_scale); - prob_t marginal = prob_t::Zero(); - for (int i = 0 ; i < list.size(); ++i) { - const prob_t joint = pow(list[i].second, mbr_scale) / max_score; - joints[i] = joint; - // cerr << "list[" << i << "] joint=" << log(joint) << endl; - marginal += joint; - } - int mbr_idx = -1; - vector mbr_scores(output_list ? list.size() : 0); - double mbr_loss = numeric_limits::max(); - for (int i = 0 ; i < list.size(); ++i) { - vector > refs(1, list[i].first); - //cerr << i << ": " << list[i].second <<"\t" << TD::GetString(list[i].first) << endl; - ScorerP scorer = SentenceScorer::CreateSentenceScorer(type, refs); - double wl_acc = 0; - for (int j = 0; j < list.size(); ++j) { - if (i != j) { - ScoreP s = scorer->ScoreCandidate(list[j].first); - double loss = 1.0 - s->ComputeScore(); - if (type == TER || type == AER) loss = 1.0 - loss; - double weighted_loss = loss * (joints[j] / marginal); - wl_acc += weighted_loss; - if ((!output_list) && wl_acc > mbr_loss) break; - } - } - if (output_list) mbr_scores[i] = wl_acc; - if (wl_acc < mbr_loss) { - mbr_loss = wl_acc; - mbr_idx = i; - } - } - // cerr << "ML translation: " << TD::GetString(list[0].first) << endl; - cerr << "MBR Best idx: " << mbr_idx << endl; - if (output_list) { - for (int i = 0; i < list.size(); ++i) - list[i].second.logeq(mbr_scores[i]); - sort(list.begin(), list.end(), LossComparer()); - for (int i = 0; i < list.size(); ++i) - cout << sent_id << " ||| " - << TD::GetString(list[i].first) << " ||| " - << log(list[i].second) << endl; - } else { - cout << TD::GetString(list[mbr_idx].first) << endl; - } - } - return 0; -} - diff --git a/vest/mr_vest_generate_mapper_input.cc b/vest/mr_vest_generate_mapper_input.cc index 0c094fd5..59d4f24f 100644 --- a/vest/mr_vest_generate_mapper_input.cc +++ b/vest/mr_vest_generate_mapper_input.cc @@ -1,320 +1,78 @@ -//TODO: debug segfault when references supplied, null shared_ptr when oracle #include #include -#include #include #include -#include "sampler.h" #include "filelib.h" #include "weights.h" #include "line_optimizer.h" -#include "hg.h" -#include "hg_io.h" -#include "scorer.h" -#include "oracle_bleu.h" -#include "ff_bleu.h" - -const bool DEBUG_ORACLE=true; - -//TODO: decide on cdec_ff ffs, or just bleumodel - if just bleumodel, then do existing features on serialized hypergraphs remain? weights (origin) is passed to oracle_bleu.h:ComputeOracle -//void register_feature_functions(); -//FFRegistry ff_registry; -namespace { -void init_bleumodel() { - ff_registry.clear(); - ff_registry.Register(new FFFactory); -} - -struct init_ff { - init_ff() { - init_bleumodel(); - } -}; -//init_ff reg; // order of initialization? ff_registry may not be init yet. call in Run() instead. -} using namespace std; namespace po = boost::program_options; -typedef SparseVector Dir; -typedef Dir Point; - -void compress_similar(vector &dirs,double min_dist,ostream *log=&cerr,bool avg=true,bool verbose=true) { - // return; //TODO: debug - if (min_dist<=0) return; - double max_s=1.-min_dist; - if (log&&verbose) *log<<"max allowed S="< "<add_options() - ("dev_set_size,s",po::value(&dev_set_size),"[REQD] Development set size (# of parallel sentences)") - ("forest_repository,r",po::value(&forest_repository),"[REQD] Path to forest repository") - ("weights,w",po::value(&weights_file),"[REQD] Current feature weights file") - ("optimize_feature,o",po::value >(), "Feature to optimize (if none specified, all weights listed in the weights file will be optimized)") - ("random_directions,d",po::value(&n_random)->default_value(10),"Number of random directions to run the line optimizer in") - ("no_primary,n","don't use the primary (orthogonal each feature alone) directions") - ("oracle_directions,O",po::value(&n_oracle)->default_value(0),"read the forests and choose this many directions based on heading toward a hope max (bleu+modelscore) translation.") - ("oracle_start_random",po::bool_switch(&start_random),"sample random subsets of dev set for ALL oracle directions, not just those after a sequential run through it") - ("oracle_batch,b",po::value(&oracle_batch)->default_value(10),"to produce each oracle direction, sum the 'gradient' over this many sentences") - ("max_similarity,m",po::value(&max_similarity)->default_value(0),"remove directions that are too similar (Tanimoto coeff. less than (1-this)). 0 means don't filter, 1 means only 1 direction allowed?") - ("fear_to_hope,f",po::bool_switch(&fear_to_hope),"for each of the oracle_directions, also include a direction from fear to hope (as well as origin to hope)") - ("no_old_to_hope","don't emit the usual old -> hope oracle") - ("decoder_translations",po::value(&decoder_translations_file)->default_value(""),"one per line decoder 1best translations for computing document BLEU vs. sentences-seen-so-far BLEU") - ; - } - void InitCommandLine(int argc, char *argv[], po::variables_map *conf) { - po::options_description opts("Configuration options"); - AddOptions(&opts); - opts.add_options()("help,h", "Help"); - - po::options_description dcmdline_options; - dcmdline_options.add(opts); - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - po::notify(*conf); - if (conf->count("dev_set_size") == 0) { - cerr << "Please specify the size of the development set using -s N\n"; - goto bad_cmdline; - } - if (conf->count("weights") == 0) { - cerr << "Please specify the starting-point weights using -w \n"; - goto bad_cmdline; - } - if (conf->count("forest_repository") == 0) { - cerr << "Please specify the forest repository location using -r \n"; - goto bad_cmdline; - } - if (n_oracle && oracle.refs.empty()) { - cerr<<"Specify references when using oracle directions\n"; - goto bad_cmdline; - } - if (conf->count("help")) { - cout << dcmdline_options << endl; - exit(0); - } - - return; - bad_cmdline: - cerr << dcmdline_options << endl; - exit(1); +void InitCommandLine(int argc, char** argv, po::variables_map* conf) { + po::options_description opts("Configuration options"); + opts.add_options() + ("dev_set_size,s",po::value(),"[REQD] Development set size (# of parallel sentences)") + ("forest_repository,r",po::value(),"[REQD] Path to forest repository") + ("weights,w",po::value(),"[REQD] Current feature weights file") + ("optimize_feature,o",po::value >(), "Feature to optimize (if none specified, all weights listed in the weights file will be optimized)") + ("random_directions,d",po::value()->default_value(20),"Number of random directions to run the line optimizer in") + ("help,h", "Help"); + po::options_description dcmdline_options; + dcmdline_options.add(opts); + po::store(parse_command_line(argc, argv, dcmdline_options), *conf); + bool flag = false; + if (conf->count("dev_set_size") == 0) { + cerr << "Please specify the size of the development set using -d N\n"; + flag = true; } - - int main(int argc, char *argv[]) { - po::variables_map conf; - InitCommandLine(argc,argv,&conf); - init_bleumodel(); - UseConf(conf); - Run(); - return 0; + if (conf->count("weights") == 0) { + cerr << "Please specify the starting-point weights using -w \n"; + flag = true; } - bool verbose() const { return oracle.verbose; } - void Run() { -// register_feature_functions(); - AddPrimaryAndRandomDirections(); - AddOracleDirections(); - compress_similar(directions,max_similarity,&cerr,true,verbose()); - Print(); + if (conf->count("forest_repository") == 0) { + cerr << "Please specify the forest repository location using -r \n"; + flag = true; } - - - Point origin; // old weights that gave model 1best. - vector optimize_features; - void UseConf(po::variables_map const& conf) { - oracle.UseConf(conf); - include_primary=!conf.count("no_primary"); - old_to_hope=!conf.count("no_old_to_hope"); - - if (conf.count("optimize_feature") > 0) - optimize_features=conf["optimize_feature"].as >(); - Init(); + if (flag || conf->count("help")) { + cerr << dcmdline_options << endl; + exit(1); } +} - string weights_file; - double max_similarity; - unsigned n_oracle, oracle_batch; - string forest_repository; - unsigned dev_set_size; - vector oracles; - vector fids; - string forest_file(unsigned i) const { - ostringstream o; - o << forest_repository << '/' << i << ".json.gz"; - return o.str(); - } - - oracle_directions() { } - - Sentences model_hyps; - - vector model_scores; - bool have_doc; - void Init() { - have_doc=!decoder_translations_file.empty(); - if (have_doc) { - model_hyps.Load(decoder_translations_file); - if (verbose()) model_hyps.Print(cerr,5); - model_scores.resize(model_hyps.size()); - if (dev_set_size!=model_hyps.size()) { - cerr<<"You supplied decoder_translations with a different number of lines ("<ScoreCandidate(model_hyps[i]); - assert(model_scores[i]); - if (verbose()) cerr<<"Before model["<ScoreDetails()<PlusEquals(*model_scores[i]); - if (verbose()) cerr<<"After model["< features; - vector dorigin; - Weights::InitFromFile(weights_file, &dorigin, &features); - if (optimize_features.size()) - features=optimize_features; - Weights::InitSparseVector(dorigin, &origin); - fids.clear(); - AddFeatureIds(features); - oracles.resize(dev_set_size); - } - - void AddFeatureIds(vector const& features) { - int i = fids.size(); - fids.resize(fids.size()+features.size()); - for (; i < features.size(); ++i) - fids[i] = FD::Convert(features[i]); - } - - - std::string decoder_translations_file; // one per line - //TODO: is it worthwhile to get a complete document bleu first? would take a list of 1best translations one per line from the decoders, rather than loading all the forests (expensive). translations are in run.raw.N.gz - new arg - void adjust_doc(unsigned i,double scale=1.) { - oracle.doc_score->PlusEquals(*model_scores[i],scale); - } - - Score &ds() { - return *oracle.doc_score; - } - - Oracle const& ComputeOracle(unsigned i) { - Oracle &o=oracles[i]; - if (o.is_null()) { - if (have_doc) { - if (verbose()) cerr<<"Before removing i="<PlusEquals(*hopesc,1); - cerr<<"With hope: "<PlusEquals(*hopesc,-1); - cerr<<"Without hope: "<ScoreDetails()<=dev_set_size) ? rsg() : b); - - if (old_to_hope) - o2hope+=o.ModelHopeGradient(); - if (fear_to_hope) - fear2hope+=o.FearHopeGradient(); - } - double N=(double)oracle_batch; - if (old_to_hope) { - o2hope/=N; - directions.push_back(o2hope); - } - if (fear_to_hope) { - fear2hope/=N; - directions.push_back(fear2hope); - } +int main(int argc, char** argv) { + RandomNumberGenerator rng; + po::variables_map conf; + InitCommandLine(argc, argv, &conf); + vector features; + SparseVector origin; + vector w; + Weights::InitFromFile(conf["weights"].as(), &w, &features); + Weights::InitSparseVector(w, &origin); + const string forest_repository = conf["forest_repository"].as(); + assert(DirectoryExists(forest_repository)); + if (conf.count("optimize_feature") > 0) + features=conf["optimize_feature"].as >(); + vector > directions; + vector fids(features.size()); + for (int i = 0; i < features.size(); ++i) + fids[i] = FD::Convert(features[i]); + LineOptimizer::CreateOptimizationDirections( + fids, + conf["random_directions"].as(), + &rng, + &directions); + unsigned dev_set_size = conf["dev_set_size"].as(); + for (unsigned i = 0; i < dev_set_size; ++i) { + for (unsigned j = 0; j < directions.size(); ++j) { + cout << forest_repository << '/' << i << ".json.gz " << i << ' '; + print(cout, origin, "=", ";"); + cout << ' '; + print(cout, directions[j], "=", ";"); + cout << endl; } } -}; - -int main(int argc, char** argv) { - oracle_directions od; - return od.main(argc,argv); + return 0; } diff --git a/vest/mr_vest_map.cc b/vest/mr_vest_map.cc index 8f6e085d..7d9625bc 100644 --- a/vest/mr_vest_map.cc +++ b/vest/mr_vest_map.cc @@ -82,20 +82,20 @@ int main(int argc, char** argv) { if (line.empty()) continue; istringstream is(line); int sent_id; - string file, s_origin, s_axis; + string file, s_origin, s_direction; // path-to-file (JSON) sent_ed starting-point search-direction - is >> file >> sent_id >> s_origin >> s_axis; + is >> file >> sent_id >> s_origin >> s_direction; SparseVector origin; - assert(ReadSparseVectorString(s_origin, &origin)); - SparseVector axis; - assert(ReadSparseVectorString(s_axis, &axis)); - // cerr << "File: " << file << "\nAxis: " << axis << "\n X: " << origin << endl; + ReadSparseVectorString(s_origin, &origin); + SparseVector direction; + ReadSparseVectorString(s_direction, &direction); + // cerr << "File: " << file << "\nDir: " << direction << "\n X: " << origin << endl; if (last_file != file) { last_file = file; ReadFile rf(file); HypergraphIO::ReadFromJSON(rf.stream(), &hg); } - ViterbiEnvelopeWeightFunction wf(origin, axis); + ViterbiEnvelopeWeightFunction wf(origin, direction); ViterbiEnvelope ve = Inside(hg, NULL, wf); ErrorSurface es; @@ -104,7 +104,7 @@ int main(int argc, char** argv) { // cerr << "Error surface has " << es.size() << " segments\n"; string val; es.Serialize(&val); - cout << 'M' << ' ' << s_origin << ' ' << s_axis << '\t'; + cout << 'M' << ' ' << s_origin << ' ' << s_direction << '\t'; B64::b64encode(val.c_str(), val.size(), &cout); cout << endl << flush; } -- cgit v1.2.3 From 74c61ef9c9dc5cefbad4aa9513973965dd583ee7 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Fri, 27 Jan 2012 14:30:44 -0500 Subject: Silly windows users, code isn't executable --- utils/agenda.h | 0 utils/batched_append.h | 0 utils/best.h | 0 utils/fast_lexical_cast.hpp | 0 utils/feature_vector.h | 0 utils/ftoa.h | 0 utils/hash.h | 0 utils/have_64_bits.h | 0 utils/indices_after.h | 0 utils/int_or_pointer.h | 0 utils/intern_pool.h | 0 utils/intrusive_refcount.hpp | 0 utils/lvalue_pmap.h | 0 utils/max_plus.h | 0 utils/maybe_update_bound.h | 0 utils/murmur_hash.h | 0 utils/named_enum.h | 0 utils/nan.h | 0 utils/null_deleter.h | 0 utils/null_traits.h | 0 utils/semiring.h | 0 utils/show.h | 0 utils/static_utoa.h | 0 utils/string_to.h | 0 utils/stringlib_test.cc | 0 utils/swap_pod.h | 0 utils/utoa.h | 0 utils/value_array.h | 0 28 files changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 utils/agenda.h mode change 100755 => 100644 utils/batched_append.h mode change 100755 => 100644 utils/best.h mode change 100755 => 100644 utils/fast_lexical_cast.hpp mode change 100755 => 100644 utils/feature_vector.h mode change 100755 => 100644 utils/ftoa.h mode change 100755 => 100644 utils/hash.h mode change 100755 => 100644 utils/have_64_bits.h mode change 100755 => 100644 utils/indices_after.h mode change 100755 => 100644 utils/int_or_pointer.h mode change 100755 => 100644 utils/intern_pool.h mode change 100755 => 100644 utils/intrusive_refcount.hpp mode change 100755 => 100644 utils/lvalue_pmap.h mode change 100755 => 100644 utils/max_plus.h mode change 100755 => 100644 utils/maybe_update_bound.h mode change 100755 => 100644 utils/murmur_hash.h mode change 100755 => 100644 utils/named_enum.h mode change 100755 => 100644 utils/nan.h mode change 100755 => 100644 utils/null_deleter.h mode change 100755 => 100644 utils/null_traits.h mode change 100755 => 100644 utils/semiring.h mode change 100755 => 100644 utils/show.h mode change 100755 => 100644 utils/static_utoa.h mode change 100755 => 100644 utils/string_to.h mode change 100755 => 100644 utils/stringlib_test.cc mode change 100755 => 100644 utils/swap_pod.h mode change 100755 => 100644 utils/utoa.h mode change 100755 => 100644 utils/value_array.h (limited to 'utils') diff --git a/utils/agenda.h b/utils/agenda.h old mode 100755 new mode 100644 diff --git a/utils/batched_append.h b/utils/batched_append.h old mode 100755 new mode 100644 diff --git a/utils/best.h b/utils/best.h old mode 100755 new mode 100644 diff --git a/utils/fast_lexical_cast.hpp b/utils/fast_lexical_cast.hpp old mode 100755 new mode 100644 diff --git a/utils/feature_vector.h b/utils/feature_vector.h old mode 100755 new mode 100644 diff --git a/utils/ftoa.h b/utils/ftoa.h old mode 100755 new mode 100644 diff --git a/utils/hash.h b/utils/hash.h old mode 100755 new mode 100644 diff --git a/utils/have_64_bits.h b/utils/have_64_bits.h old mode 100755 new mode 100644 diff --git a/utils/indices_after.h b/utils/indices_after.h old mode 100755 new mode 100644 diff --git a/utils/int_or_pointer.h b/utils/int_or_pointer.h old mode 100755 new mode 100644 diff --git a/utils/intern_pool.h b/utils/intern_pool.h old mode 100755 new mode 100644 diff --git a/utils/intrusive_refcount.hpp b/utils/intrusive_refcount.hpp old mode 100755 new mode 100644 diff --git a/utils/lvalue_pmap.h b/utils/lvalue_pmap.h old mode 100755 new mode 100644 diff --git a/utils/max_plus.h b/utils/max_plus.h old mode 100755 new mode 100644 diff --git a/utils/maybe_update_bound.h b/utils/maybe_update_bound.h old mode 100755 new mode 100644 diff --git a/utils/murmur_hash.h b/utils/murmur_hash.h old mode 100755 new mode 100644 diff --git a/utils/named_enum.h b/utils/named_enum.h old mode 100755 new mode 100644 diff --git a/utils/nan.h b/utils/nan.h old mode 100755 new mode 100644 diff --git a/utils/null_deleter.h b/utils/null_deleter.h old mode 100755 new mode 100644 diff --git a/utils/null_traits.h b/utils/null_traits.h old mode 100755 new mode 100644 diff --git a/utils/semiring.h b/utils/semiring.h old mode 100755 new mode 100644 diff --git a/utils/show.h b/utils/show.h old mode 100755 new mode 100644 diff --git a/utils/static_utoa.h b/utils/static_utoa.h old mode 100755 new mode 100644 diff --git a/utils/string_to.h b/utils/string_to.h old mode 100755 new mode 100644 diff --git a/utils/stringlib_test.cc b/utils/stringlib_test.cc old mode 100755 new mode 100644 diff --git a/utils/swap_pod.h b/utils/swap_pod.h old mode 100755 new mode 100644 diff --git a/utils/utoa.h b/utils/utoa.h old mode 100755 new mode 100644 diff --git a/utils/value_array.h b/utils/value_array.h old mode 100755 new mode 100644 -- cgit v1.2.3 From 648fd70ec05997003e801e113d825c84e55e01ca Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Wed, 8 Feb 2012 16:22:55 -0500 Subject: move widely duplicated math functions into m.h header --- .gitignore | 1 + gi/pf/base_distributions.cc | 22 +++++------ gi/pf/base_distributions.h | 21 +--------- gi/pf/conditional_pseg.h | 3 +- gi/pf/pfdist.cc | 6 +-- gi/pf/pfnaive.cc | 4 +- phrasinator/gibbs_train_plm.cc | 8 +--- utils/Makefile.am | 5 ++- utils/m.h | 89 ++++++++++++++++++++++++++++++++++++++++++ utils/m_test.cc | 75 +++++++++++++++++++++++++++++++++++ utils/mfcr.h | 22 ++--------- 11 files changed, 194 insertions(+), 62 deletions(-) create mode 100644 utils/m.h create mode 100644 utils/m_test.cc (limited to 'utils') diff --git a/.gitignore b/.gitignore index ab8bf2c7..4f75d153 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ mira/kbest_mira +utils/m_test sa-extract/calignment.c sa-extract/calignment.so sa-extract/cdat.c diff --git a/gi/pf/base_distributions.cc b/gi/pf/base_distributions.cc index d362fd76..d9761005 100644 --- a/gi/pf/base_distributions.cc +++ b/gi/pf/base_distributions.cc @@ -59,7 +59,7 @@ prob_t PhraseConditionalUninformativeUnigramBase::p0(const vector& vsrc, const int flen = vsrc.size() - start_src; const int elen = vtrg.size() - start_trg; prob_t p; - p.logeq(log_poisson(elen, flen + 0.01)); // elen | flen ~Pois(flen + 0.01) + p.logeq(Md::log_poisson(elen, flen + 0.01)); // elen | flen ~Pois(flen + 0.01) //p.logeq(log_poisson(elen, 1)); // elen | flen ~Pois(flen + 0.01) for (int i = 0; i < elen; ++i) p *= u(vtrg[i + start_trg]); // draw e_i ~Uniform @@ -73,7 +73,7 @@ prob_t PhraseConditionalUninformativeBase::p0(const vector& vsrc, const int elen = vtrg.size() - start_trg; prob_t p; //p.logeq(log_poisson(elen, flen + 0.01)); // elen | flen ~Pois(flen + 0.01) - p.logeq(log_poisson(elen, 1)); // elen | flen ~Pois(flen + 0.01) + p.logeq(Md::log_poisson(elen, 1)); // elen | flen ~Pois(flen + 0.01) for (int i = 0; i < elen; ++i) p *= kUNIFORM_TARGET; // draw e_i ~Uniform return p; @@ -113,7 +113,7 @@ prob_t PhraseConditionalBase::p0(const vector& vsrc, const int elen = vtrg.size() - start_trg; prob_t uniform_src_alignment; uniform_src_alignment.logeq(-log(flen + 1)); prob_t p; - p.logeq(log_poisson(elen, flen + 0.01)); // elen | flen ~Pois(flen + 0.01) + p.logeq(Md::log_poisson(elen, flen + 0.01)); // elen | flen ~Pois(flen + 0.01) for (int i = 0; i < elen; ++i) { // for each position i in e-RHS const WordID trg = vtrg[i + start_trg]; prob_t tp = prob_t::Zero(); @@ -139,9 +139,9 @@ prob_t PhraseJointBase::p0(const vector& vsrc, const int elen = vtrg.size() - start_trg; prob_t uniform_src_alignment; uniform_src_alignment.logeq(-log(flen + 1)); prob_t p; - p.logeq(log_poisson(flen, 1.0)); // flen ~Pois(1) + p.logeq(Md::log_poisson(flen, 1.0)); // flen ~Pois(1) // elen | flen ~Pois(flen + 0.01) - prob_t ptrglen; ptrglen.logeq(log_poisson(elen, flen + 0.01)); + prob_t ptrglen; ptrglen.logeq(Md::log_poisson(elen, flen + 0.01)); p *= ptrglen; p *= kUNIFORM_SOURCE.pow(flen); // each f in F ~Uniform for (int i = 0; i < elen; ++i) { // for each position i in E @@ -171,9 +171,9 @@ prob_t PhraseJointBase_BiDir::p0(const vector& vsrc, prob_t uniform_trg_alignment; uniform_trg_alignment.logeq(-log(elen + 1)); prob_t p1; - p1.logeq(log_poisson(flen, 1.0)); // flen ~Pois(1) + p1.logeq(Md::log_poisson(flen, 1.0)); // flen ~Pois(1) // elen | flen ~Pois(flen + 0.01) - prob_t ptrglen; ptrglen.logeq(log_poisson(elen, flen + 0.01)); + prob_t ptrglen; ptrglen.logeq(Md::log_poisson(elen, flen + 0.01)); p1 *= ptrglen; p1 *= kUNIFORM_SOURCE.pow(flen); // each f in F ~Uniform for (int i = 0; i < elen; ++i) { // for each position i in E @@ -193,9 +193,9 @@ prob_t PhraseJointBase_BiDir::p0(const vector& vsrc, } prob_t p2; - p2.logeq(log_poisson(elen, 1.0)); // elen ~Pois(1) + p2.logeq(Md::log_poisson(elen, 1.0)); // elen ~Pois(1) // flen | elen ~Pois(flen + 0.01) - prob_t psrclen; psrclen.logeq(log_poisson(flen, elen + 0.01)); + prob_t psrclen; psrclen.logeq(Md::log_poisson(flen, elen + 0.01)); p2 *= psrclen; p2 *= kUNIFORM_TARGET.pow(elen); // each f in F ~Uniform for (int i = 0; i < flen; ++i) { // for each position i in E @@ -227,9 +227,9 @@ JumpBase::JumpBase() : p(200) { for (int j = min_jump; j <= max_jump; ++j) { prob_t& cp = cpd[j]; if (j < 0) - cp.logeq(log_poisson(1.5-j, 1)); + cp.logeq(Md::log_poisson(1.5-j, 1)); else if (j > 0) - cp.logeq(log_poisson(j, 1)); + cp.logeq(Md::log_poisson(j, 1)); cp.poweq(0.2); z += cp; } diff --git a/gi/pf/base_distributions.h b/gi/pf/base_distributions.h index a23ac32b..0d597c5c 100644 --- a/gi/pf/base_distributions.h +++ b/gi/pf/base_distributions.h @@ -13,24 +13,7 @@ #include "prob.h" #include "tdict.h" #include "sampler.h" - -inline double log_poisson(unsigned x, const double& lambda) { - assert(lambda > 0.0); - return log(lambda) * x - lgamma(x + 1) - lambda; -} - -inline double log_binom_coeff(unsigned n, unsigned k) { - assert(n >= k); - if (n == k) return 0.0; - return lgamma(n + 1) - lgamma(k + 1) - lgamma(n - k + 1); -} - -// http://en.wikipedia.org/wiki/Negative_binomial_distribution -inline double log_negative_binom(unsigned x, unsigned r, double p) { - assert(p > 0.0); - assert(p < 1.0); - return log_binom_coeff(x + r - 1, x) + r * log(1 - p) + x * log(p); -} +#include "m.h" inline std::ostream& operator<<(std::ostream& os, const std::vector& p) { os << '['; @@ -68,7 +51,7 @@ struct Model1 { struct PoissonUniformUninformativeBase { explicit PoissonUniformUninformativeBase(const unsigned ves) : kUNIFORM(1.0 / ves) {} prob_t operator()(const TRule& r) const { - prob_t p; p.logeq(log_poisson(r.e_.size(), 1.0)); + prob_t p; p.logeq(Md::log_poisson(r.e_.size(), 1.0)); prob_t q = kUNIFORM; q.poweq(r.e_.size()); p *= q; return p; diff --git a/gi/pf/conditional_pseg.h b/gi/pf/conditional_pseg.h index 0aa5e8e0..2e9e38fc 100644 --- a/gi/pf/conditional_pseg.h +++ b/gi/pf/conditional_pseg.h @@ -6,6 +6,7 @@ #include #include +#include "m.h" #include "prob.h" #include "ccrp_nt.h" #include "mfcr.h" @@ -210,7 +211,7 @@ struct ConditionalParallelSegementationModel { prob_t AlignProbability(unsigned span) const { prob_t p; - p.logeq(aligns.logprob(span, log_poisson(span, 1.0))); + p.logeq(aligns.logprob(span, Md::log_poisson(span, 1.0))); return p; } diff --git a/gi/pf/pfdist.cc b/gi/pf/pfdist.cc index ef08a165..3d578db2 100644 --- a/gi/pf/pfdist.cc +++ b/gi/pf/pfdist.cc @@ -315,7 +315,7 @@ struct BackwardEstimate { for (int i = 0; i < src_cov.size(); ++i) if (!src_cov[i]) r.push_back(src_[i]); const prob_t uniform_alignment(1.0 / r.size()); - e.logeq(log_poisson(trg_.size() - trg_cov, r.size() - 1)); // p(trg len remaining | src len remaining) + e.logeq(Md::log_poisson(trg_.size() - trg_cov, r.size() - 1)); // p(trg len remaining | src len remaining) for (unsigned j = trg_cov; j < trg_.size(); ++j) { prob_t p; for (unsigned i = 0; i < r.size(); ++i) @@ -352,7 +352,7 @@ struct BackwardEstimateSym { if (!src_cov[i]) r.push_back(src_[i]); r.push_back(0); // NULL word const prob_t uniform_alignment(1.0 / r.size()); - e.logeq(log_poisson(trg_.size() - trg_cov, r.size() - 1)); // p(trg len remaining | src len remaining) + e.logeq(Md::log_poisson(trg_.size() - trg_cov, r.size() - 1)); // p(trg len remaining | src len remaining) for (unsigned j = trg_cov; j < trg_.size(); ++j) { prob_t p; for (unsigned i = 0; i < r.size(); ++i) @@ -367,7 +367,7 @@ struct BackwardEstimateSym { r.pop_back(); const prob_t inv_uniform(1.0 / (trg_.size() - trg_cov + 1.0)); prob_t inv; - inv.logeq(log_poisson(r.size(), trg_.size() - trg_cov)); + inv.logeq(Md::log_poisson(r.size(), trg_.size() - trg_cov)); for (unsigned i = 0; i < r.size(); ++i) { prob_t p; for (unsigned j = trg_cov - 1; j < trg_.size(); ++j) diff --git a/gi/pf/pfnaive.cc b/gi/pf/pfnaive.cc index acba9d22..e1a53f5c 100644 --- a/gi/pf/pfnaive.cc +++ b/gi/pf/pfnaive.cc @@ -77,7 +77,7 @@ struct BackwardEstimateSym { r.push_back(src_[i]); r.push_back(0); // NULL word const prob_t uniform_alignment(1.0 / r.size()); - e.logeq(log_poisson(trg_.size() - trg_cov, r.size() - 1)); // p(trg len remaining | src len remaining) + e.logeq(Md::log_poisson(trg_.size() - trg_cov, r.size() - 1)); // p(trg len remaining | src len remaining) for (unsigned j = trg_cov; j < trg_.size(); ++j) { prob_t p; for (unsigned i = 0; i < r.size(); ++i) @@ -92,7 +92,7 @@ struct BackwardEstimateSym { r.pop_back(); const prob_t inv_uniform(1.0 / (trg_.size() - trg_cov + 1.0)); prob_t inv; - inv.logeq(log_poisson(r.size(), trg_.size() - trg_cov)); + inv.logeq(Md::log_poisson(r.size(), trg_.size() - trg_cov)); for (unsigned i = 0; i < r.size(); ++i) { prob_t p; for (unsigned j = trg_cov - 1; j < trg_.size(); ++j) diff --git a/phrasinator/gibbs_train_plm.cc b/phrasinator/gibbs_train_plm.cc index 29b3d7ea..66b46011 100644 --- a/phrasinator/gibbs_train_plm.cc +++ b/phrasinator/gibbs_train_plm.cc @@ -8,6 +8,7 @@ #include "dict.h" #include "sampler.h" #include "ccrp.h" +#include "m.h" using namespace std; using namespace std::tr1; @@ -95,11 +96,6 @@ void ReadCorpus(const string& filename, vector >* c, set* vocab if (in != &cin) delete in; } -double log_poisson(unsigned x, const double& lambda) { - assert(lambda > 0.0); - return log(lambda) * x - lgamma(x + 1) - lambda; -} - struct UniphraseLM { UniphraseLM(const vector >& corpus, const set& vocab, @@ -128,7 +124,7 @@ struct UniphraseLM { double log_p0(const vector& phrase) const { double len_logprob; if (use_poisson_) - len_logprob = log_poisson(phrase.size(), 1.0); + len_logprob = Md::log_poisson(phrase.size(), 1.0); else len_logprob = log(1 - p_end_) * (phrase.size() -1) + log(p_end_); return log(uniform_word_) * phrase.size() + len_logprob; diff --git a/utils/Makefile.am b/utils/Makefile.am index 3e559c75..a1ea8270 100644 --- a/utils/Makefile.am +++ b/utils/Makefile.am @@ -7,11 +7,12 @@ TESTS = ts phmt mfcr_test if HAVE_GTEST noinst_PROGRAMS += \ dict_test \ + m_test \ weights_test \ logval_test \ small_vector_test -TESTS += small_vector_test logval_test weights_test dict_test +TESTS += small_vector_test logval_test weights_test dict_test m_test endif reconstruct_weights_SOURCES = reconstruct_weights.cc @@ -38,6 +39,8 @@ endif phmt_SOURCES = phmt.cc ts_SOURCES = ts.cc +m_test_SOURCES = m_test.cc +m_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) dict_test_SOURCES = dict_test.cc dict_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) mfcr_test_SOURCES = mfcr_test.cc diff --git a/utils/m.h b/utils/m.h new file mode 100644 index 00000000..b25248c2 --- /dev/null +++ b/utils/m.h @@ -0,0 +1,89 @@ +#ifndef _M_H_ +#define _M_H_ + +#include +#include + +template +struct M { + // support [0, 1, 2 ...) + static inline F log_poisson(unsigned x, const F& lambda) { + assert(lambda > 0.0); + return std::log(lambda) * x - lgamma(x + 1) - lambda; + } + + // support [0, 1, 2 ...) + static inline F log_geometric(unsigned x, const F& p) { + assert(p > 0.0); + assert(p < 1.0); + return std::log(1 - p) * x + std::log(p); + } + + // log of the binomial coefficient + static inline F log_binom_coeff(unsigned n, unsigned k) { + assert(n >= k); + if (n == k) return 0.0; + return lgamma(n + 1) - lgamma(k + 1) - lgamma(n - k + 1); + } + + // http://en.wikipedia.org/wiki/Negative_binomial_distribution + // support [0, 1, 2 ...) + static inline F log_negative_binom(unsigned x, unsigned r, const F& p) { + assert(p > 0.0); + assert(p < 1.0); + return log_binom_coeff(x + r - 1u, x) + r * std::log(F(1) - p) + x * std::log(p); + } + + // this is the Beta function, *not* the beta probability density + // http://mathworld.wolfram.com/BetaFunction.html + static inline F log_beta_fn(const F& x, const F& y) { + return lgamma(x) + lgamma(y) - lgamma(x + y); + } + + // support x >= 0.0 + static F log_gamma_density(const F& x, const F& shape, const F& rate) { + assert(x >= 0.0); + assert(shape > 0.0); + assert(rate > 0.0); + return (shape-1)*std::log(x) - shape*std::log(rate) - x/rate - lgamma(shape); + } + + // this is the Beta *density* p(x ; alpha, beta) + // support x \in (0,1) + static inline F log_beta_density(const F& x, const F& alpha, const F& beta) { + assert(x > 0.0); + assert(x < 1.0); + assert(alpha > 0.0); + assert(beta > 0.0); + return (alpha-1)*std::log(x)+(beta-1)*std::log(1-x) - log_beta_fn(alpha, beta); + } + + // note: this has been adapted so that 0 is in the support of the distribution + // support [0, 1, 2 ...) + static inline F log_yule_simon(unsigned x, const F& rho) { + assert(rho > 0.0); + return std::log(rho) + log_beta_fn(x + 1, rho + 1); + } + + // see http://www.gatsby.ucl.ac.uk/~ywteh/research/compling/hpylm.pdf + // when y=1, sometimes written x^{\overline{n}} or x^{(n)} "Pochhammer symbol" + static inline F log_generalized_factorial(const F& x, const F& n, const F& y = 1.0) { + assert(x > 0.0); + assert(y >= 0.0); + assert(n > 0.0); + if (!n) return 0.0; + if (y == F(1)) { + return lgamma(x + n) - lgamma(x); + } else if (y) { + return n * std::log(y) + lgamma(x/y + n) - lgamma(x/y); + } else { // y == 0.0 + return n * std::log(x); + } + } + +}; + +typedef M Md; +typedef M Mf; + +#endif diff --git a/utils/m_test.cc b/utils/m_test.cc new file mode 100644 index 00000000..fca8f895 --- /dev/null +++ b/utils/m_test.cc @@ -0,0 +1,75 @@ +#include "m.h" + +#include +#include +#include + +using namespace std; + +class MTest : public testing::Test { + public: + MTest() {} + protected: + virtual void SetUp() { } + virtual void TearDown() { } +}; + +TEST_F(MTest, Poisson) { + double prev = 1.0; + double tot = 0; + for (int i = 0; i < 10; ++i) { + double p = Md::log_poisson(i, 0.99); + cerr << "p(i=" << i << ") = " << exp(p) << endl; + EXPECT_LT(p, prev); + tot += exp(p); + prev = p; + } + cerr << " tot=" << tot << endl; + EXPECT_LE(tot, 1.0); +} + +TEST_F(MTest, YuleSimon) { + double prev = 1.0; + double tot = 0; + for (int i = 0; i < 10; ++i) { + double p = Md::log_yule_simon(i, 1.0); + cerr << "p(i=" << i << ") = " << exp(p) << endl; + EXPECT_LT(p, prev); + tot += exp(p); + prev = p; + } + cerr << " tot=" << tot << endl; + EXPECT_LE(tot, 1.0); +} + +TEST_F(MTest, LogGeometric) { + double prev = 1.0; + double tot = 0; + for (int i = 0; i < 10; ++i) { + double p = Md::log_geometric(i, 0.5); + cerr << "p(i=" << i << ") = " << exp(p) << endl; + EXPECT_LT(p, prev); + tot += exp(p); + prev = p; + } + cerr << " tot=" << tot << endl; + EXPECT_LE(tot, 1.0); +} + +TEST_F(MTest, GeneralizedFactorial) { + for (double i = 0.3; i < 10000; i += 0.4) { + double a = Md::log_generalized_factorial(1.0, i); + double b = lgamma(1.0 + i); + EXPECT_FLOAT_EQ(a,b); + } + double gf_3_6 = 3.0 * 4.0 * 5.0 * 6.0 * 7.0 * 8.0; + EXPECT_FLOAT_EQ(Md::log_generalized_factorial(3.0, 6.0), std::log(gf_3_6)); + double gf_314_6 = 3.14 * 4.14 * 5.14 * 6.14 * 7.14 * 8.14; + EXPECT_FLOAT_EQ(Md::log_generalized_factorial(3.14, 6.0), std::log(gf_314_6)); +} + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} + diff --git a/utils/mfcr.h b/utils/mfcr.h index 3eb133fc..396d0205 100644 --- a/utils/mfcr.h +++ b/utils/mfcr.h @@ -12,6 +12,7 @@ #include #include "sampler.h" #include "slice_sampler.h" +#include "m.h" struct TableCount { TableCount() : count(), floor() {} @@ -218,31 +219,14 @@ class MFCR { return log_crp_prob(d_, alpha_); } - static double log_beta_density(const double& x, const double& alpha, const double& beta) { - assert(x > 0.0); - assert(x < 1.0); - assert(alpha > 0.0); - assert(beta > 0.0); - const double lp = (alpha-1)*log(x)+(beta-1)*log(1-x)+lgamma(alpha+beta)-lgamma(alpha)-lgamma(beta); - return lp; - } - - static double log_gamma_density(const double& x, const double& shape, const double& rate) { - assert(x >= 0.0); - assert(shape > 0.0); - assert(rate > 0.0); - const double lp = (shape-1)*log(x) - shape*log(rate) - x/rate - lgamma(shape); - return lp; - } - // taken from http://en.wikipedia.org/wiki/Chinese_restaurant_process // does not include draws from G_w's double log_crp_prob(const double& d, const double& alpha) const { double lp = 0.0; if (has_d_prior()) - lp = log_beta_density(d, d_prior_alpha_, d_prior_beta_); + lp = Md::log_beta_density(d, d_prior_alpha_, d_prior_beta_); if (has_alpha_prior()) - lp += log_gamma_density(alpha, alpha_prior_shape_, alpha_prior_rate_); + lp += Md::log_gamma_density(alpha, alpha_prior_shape_, alpha_prior_rate_); assert(lp <= 0.0); if (num_customers_) { if (d > 0.0) { -- cgit v1.2.3 From 50105660d8c18889e8908cf3e4c583b551dc05af Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Fri, 10 Feb 2012 13:18:59 -0500 Subject: move atools to utils directory --- training/Makefile.am | 4 - training/atools.cc | 369 --------------------------------------------------- utils/Makefile.am | 4 +- utils/atools.cc | 369 +++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 372 insertions(+), 374 deletions(-) delete mode 100644 training/atools.cc create mode 100644 utils/atools.cc (limited to 'utils') diff --git a/training/Makefile.am b/training/Makefile.am index 2a11ae52..d2f1ccc5 100644 --- a/training/Makefile.am +++ b/training/Makefile.am @@ -6,7 +6,6 @@ bin_PROGRAMS = \ mr_reduce_to_weights \ mr_optimize_reduce \ grammar_convert \ - atools \ plftools \ collapse_weights \ mpi_extract_reachable \ @@ -47,9 +46,6 @@ augment_grammar_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/lib test_ngram_SOURCES = test_ngram.cc test_ngram_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz -atools_SOURCES = atools.cc -atools_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz - model1_SOURCES = model1.cc ttables.cc model1_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz diff --git a/training/atools.cc b/training/atools.cc deleted file mode 100644 index 82e30c38..00000000 --- a/training/atools.cc +++ /dev/null @@ -1,369 +0,0 @@ -#include -#include -#include - -#include -#include -#include -#include - -#include "filelib.h" -#include "alignment_pharaoh.h" - -namespace po = boost::program_options; -using namespace std; -using boost::shared_ptr; - -struct Command { - virtual ~Command() {} - virtual string Name() const = 0; - - // returns 1 for alignment grid output [default] - // returns 2 if Summary() should be called [for AER, etc] - virtual int Result() const { return 1; } - - virtual bool RequiresTwoOperands() const { return true; } - virtual void Apply(const Array2D& a, const Array2D& b, Array2D* x) = 0; - void EnsureSize(const Array2D& a, const Array2D& b, Array2D* x) { - x->resize(max(a.width(), b.width()), max(a.height(), b.height())); - } - static bool Safe(const Array2D& a, int i, int j) { - if (i >= 0 && j >= 0 && i < a.width() && j < a.height()) - return a(i,j); - else - return false; - } - virtual void Summary() { assert(!"Summary should have been overridden"); } -}; - -// compute fmeasure, second alignment is reference, first is hyp -struct FMeasureCommand : public Command { - FMeasureCommand() : matches(), num_predicted(), num_in_ref() {} - int Result() const { return 2; } - string Name() const { return "fmeasure"; } - bool RequiresTwoOperands() const { return true; } - void Apply(const Array2D& hyp, const Array2D& ref, Array2D* x) { - (void) x; // AER just computes statistics, not an alignment - int i_len = ref.width(); - int j_len = ref.height(); - for (int i = 0; i < i_len; ++i) { - for (int j = 0; j < j_len; ++j) { - if (ref(i,j)) { - ++num_in_ref; - if (Safe(hyp, i, j)) ++matches; - } - } - } - for (int i = 0; i < hyp.width(); ++i) - for (int j = 0; j < hyp.height(); ++j) - if (hyp(i,j)) ++num_predicted; - } - void Summary() { - if (num_predicted == 0 || num_in_ref == 0) { - cerr << "Insufficient statistics to compute f-measure!\n"; - abort(); - } - const double prec = static_cast(matches) / num_predicted; - const double rec = static_cast(matches) / num_in_ref; - cout << "P: " << prec << endl; - cout << "R: " << rec << endl; - const double f = (2.0 * prec * rec) / (rec + prec); - cout << "F: " << f << endl; - } - int matches; - int num_predicted; - int num_in_ref; -}; - -struct DisplayCommand : public Command { - string Name() const { return "display"; } - bool RequiresTwoOperands() const { return false; } - void Apply(const Array2D& in, const Array2D&, Array2D* x) { - *x = in; - cout << *x << endl; - } -}; - -struct ConvertCommand : public Command { - string Name() const { return "convert"; } - bool RequiresTwoOperands() const { return false; } - void Apply(const Array2D& in, const Array2D&, Array2D* x) { - *x = in; - } -}; - -struct InvertCommand : public Command { - string Name() const { return "invert"; } - bool RequiresTwoOperands() const { return false; } - void Apply(const Array2D& in, const Array2D&, Array2D* x) { - Array2D& res = *x; - res.resize(in.height(), in.width()); - for (int i = 0; i < in.height(); ++i) - for (int j = 0; j < in.width(); ++j) - res(i, j) = in(j, i); - } -}; - -struct IntersectCommand : public Command { - string Name() const { return "intersect"; } - bool RequiresTwoOperands() const { return true; } - void Apply(const Array2D& a, const Array2D& b, Array2D* x) { - EnsureSize(a, b, x); - Array2D& res = *x; - for (int i = 0; i < a.width(); ++i) - for (int j = 0; j < a.height(); ++j) - res(i, j) = Safe(a, i, j) && Safe(b, i, j); - } -}; - -struct UnionCommand : public Command { - string Name() const { return "union"; } - bool RequiresTwoOperands() const { return true; } - void Apply(const Array2D& a, const Array2D& b, Array2D* x) { - EnsureSize(a, b, x); - Array2D& res = *x; - for (int i = 0; i < res.width(); ++i) - for (int j = 0; j < res.height(); ++j) - res(i, j) = Safe(a, i, j) || Safe(b, i, j); - } -}; - -struct RefineCommand : public Command { - RefineCommand() { - neighbors_.push_back(make_pair(1,0)); - neighbors_.push_back(make_pair(-1,0)); - neighbors_.push_back(make_pair(0,1)); - neighbors_.push_back(make_pair(0,-1)); - } - bool RequiresTwoOperands() const { return true; } - - void Align(int i, int j) { - res_(i, j) = true; - is_i_aligned_[i] = true; - is_j_aligned_[j] = true; - } - - bool IsNeighborAligned(int i, int j) const { - for (int k = 0; k < neighbors_.size(); ++k) { - const int di = neighbors_[k].first; - const int dj = neighbors_[k].second; - if (Safe(res_, i + di, j + dj)) - return true; - } - return false; - } - - bool IsNeitherAligned(int i, int j) const { - return !(is_i_aligned_[i] || is_j_aligned_[j]); - } - - bool IsOneOrBothUnaligned(int i, int j) const { - return !(is_i_aligned_[i] && is_j_aligned_[j]); - } - - bool KoehnAligned(int i, int j) const { - return IsOneOrBothUnaligned(i, j) && IsNeighborAligned(i, j); - } - - typedef bool (RefineCommand::*Predicate)(int i, int j) const; - - protected: - void InitRefine( - const Array2D& a, - const Array2D& b) { - res_.clear(); - EnsureSize(a, b, &res_); - in_.clear(); un_.clear(); is_i_aligned_.clear(); is_j_aligned_.clear(); - EnsureSize(a, b, &in_); - EnsureSize(a, b, &un_); - is_i_aligned_.resize(res_.width(), false); - is_j_aligned_.resize(res_.height(), false); - for (int i = 0; i < in_.width(); ++i) - for (int j = 0; j < in_.height(); ++j) { - un_(i, j) = Safe(a, i, j) || Safe(b, i, j); - in_(i, j) = Safe(a, i, j) && Safe(b, i, j); - if (in_(i, j)) Align(i, j); - } - } - // "grow" the resulting alignment using the points in adds - // if they match the constraints determined by pred - void Grow(Predicate pred, bool idempotent, const Array2D& adds) { - if (idempotent) { - for (int i = 0; i < adds.width(); ++i) - for (int j = 0; j < adds.height(); ++j) { - if (adds(i, j) && !res_(i, j) && - (this->*pred)(i, j)) Align(i, j); - } - return; - } - set > p; - for (int i = 0; i < adds.width(); ++i) - for (int j = 0; j < adds.height(); ++j) - if (adds(i, j) && !res_(i, j)) - p.insert(make_pair(i, j)); - bool keep_going = !p.empty(); - while (keep_going) { - keep_going = false; - for (set >::iterator pi = p.begin(); - pi != p.end(); ++pi) { - if ((this->*pred)(pi->first, pi->second)) { - Align(pi->first, pi->second); - p.erase(pi); - keep_going = true; - } - } - } - } - Array2D res_; // refined alignment - Array2D in_; // intersection alignment - Array2D un_; // union alignment - vector is_i_aligned_; - vector is_j_aligned_; - vector > neighbors_; -}; - -struct DiagCommand : public RefineCommand { - DiagCommand() { - neighbors_.push_back(make_pair(1,1)); - neighbors_.push_back(make_pair(-1,1)); - neighbors_.push_back(make_pair(1,-1)); - neighbors_.push_back(make_pair(-1,-1)); - } -}; - -struct GDCommand : public DiagCommand { - string Name() const { return "grow-diag"; } - void Apply(const Array2D& a, const Array2D& b, Array2D* x) { - InitRefine(a, b); - Grow(&RefineCommand::KoehnAligned, false, un_); - *x = res_; - } -}; - -struct GDFCommand : public DiagCommand { - string Name() const { return "grow-diag-final"; } - void Apply(const Array2D& a, const Array2D& b, Array2D* x) { - InitRefine(a, b); - Grow(&RefineCommand::KoehnAligned, false, un_); - Grow(&RefineCommand::IsOneOrBothUnaligned, true, a); - Grow(&RefineCommand::IsOneOrBothUnaligned, true, b); - *x = res_; - } -}; - -struct GDFACommand : public DiagCommand { - string Name() const { return "grow-diag-final-and"; } - void Apply(const Array2D& a, const Array2D& b, Array2D* x) { - InitRefine(a, b); - Grow(&RefineCommand::KoehnAligned, false, un_); - Grow(&RefineCommand::IsNeitherAligned, true, a); - Grow(&RefineCommand::IsNeitherAligned, true, b); - *x = res_; - } -}; - -map > commands; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - ostringstream os; - os << "[REQ] Operation to perform:"; - for (map >::iterator it = commands.begin(); - it != commands.end(); ++it) { - os << ' ' << it->first; - } - string cstr = os.str(); - opts.add_options() - ("input_1,i", po::value(), "[REQUIRED] Alignment 1 file, - for STDIN") - ("input_2,j", po::value(), "Alignment 2 file, - for STDIN") - ("command,c", po::value()->default_value("convert"), cstr.c_str()) - ("help,h", "Print this help message and exit"); - po::options_description clo("Command line options"); - po::options_description dcmdline_options; - dcmdline_options.add(opts); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - po::notify(*conf); - - if (conf->count("help") || conf->count("input_1") == 0 || conf->count("command") == 0) { - cerr << dcmdline_options << endl; - exit(1); - } - const string cmd = (*conf)["command"].as(); - if (commands.count(cmd) == 0) { - cerr << "Don't understand command: " << cmd << endl; - exit(1); - } - if (commands[cmd]->RequiresTwoOperands()) { - if (conf->count("input_2") == 0) { - cerr << "Command '" << cmd << "' requires two alignment files\n"; - exit(1); - } - if ((*conf)["input_1"].as() == "-" && (*conf)["input_2"].as() == "-") { - cerr << "Both inputs cannot be STDIN\n"; - exit(1); - } - } else { - if (conf->count("input_2") != 0) { - cerr << "Command '" << cmd << "' requires only one alignment file\n"; - exit(1); - } - } -} - -template static void AddCommand() { - C* c = new C; - commands[c->Name()].reset(c); -} - -int main(int argc, char **argv) { - AddCommand(); - AddCommand(); - AddCommand(); - AddCommand(); - AddCommand(); - AddCommand(); - AddCommand(); - AddCommand(); - AddCommand(); - po::variables_map conf; - InitCommandLine(argc, argv, &conf); - Command& cmd = *commands[conf["command"].as()]; - boost::shared_ptr rf1(new ReadFile(conf["input_1"].as())); - boost::shared_ptr rf2; - if (cmd.RequiresTwoOperands()) - rf2.reset(new ReadFile(conf["input_2"].as())); - istream* in1 = rf1->stream(); - istream* in2 = NULL; - if (rf2) in2 = rf2->stream(); - while(*in1) { - string line1; - string line2; - getline(*in1, line1); - if (in2) { - getline(*in2, line2); - if ((*in1 && !*in2) || (*in2 && !*in1)) { - cerr << "Mismatched number of lines!\n"; - exit(1); - } - } - if (line1.empty() && !*in1) break; - shared_ptr > out(new Array2D); - shared_ptr > a1 = AlignmentPharaoh::ReadPharaohAlignmentGrid(line1); - if (in2) { - shared_ptr > a2 = AlignmentPharaoh::ReadPharaohAlignmentGrid(line2); - cmd.Apply(*a1, *a2, out.get()); - } else { - Array2D dummy; - cmd.Apply(*a1, dummy, out.get()); - } - - if (cmd.Result() == 1) { - AlignmentPharaoh::SerializePharaohFormat(*out, &cout); - } - } - if (cmd.Result() == 2) - cmd.Summary(); - return 0; -} - diff --git a/utils/Makefile.am b/utils/Makefile.am index a1ea8270..6e0678de 100644 --- a/utils/Makefile.am +++ b/utils/Makefile.am @@ -1,5 +1,5 @@ -bin_PROGRAMS = reconstruct_weights +bin_PROGRAMS = reconstruct_weights atools noinst_PROGRAMS = ts phmt mfcr_test TESTS = ts phmt mfcr_test @@ -17,6 +17,8 @@ endif reconstruct_weights_SOURCES = reconstruct_weights.cc +atools_SOURCES = atools.cc + noinst_LIBRARIES = libutils.a libutils_a_SOURCES = \ diff --git a/utils/atools.cc b/utils/atools.cc new file mode 100644 index 00000000..c0a91731 --- /dev/null +++ b/utils/atools.cc @@ -0,0 +1,369 @@ +#include +#include +#include + +#include +#include +#include +#include + +#include "filelib.h" +#include "alignment_pharaoh.h" + +namespace po = boost::program_options; +using namespace std; +using boost::shared_ptr; + +struct Command { + virtual ~Command() {} + virtual string Name() const = 0; + + // returns 1 for alignment grid output [default] + // returns 2 if Summary() should be called [for AER, etc] + virtual int Result() const { return 1; } + + virtual bool RequiresTwoOperands() const { return true; } + virtual void Apply(const Array2D& a, const Array2D& b, Array2D* x) = 0; + void EnsureSize(const Array2D& a, const Array2D& b, Array2D* x) { + x->resize(max(a.width(), b.width()), max(a.height(), b.height())); + } + static bool Safe(const Array2D& a, int i, int j) { + if (i >= 0 && j >= 0 && i < a.width() && j < a.height()) + return a(i,j); + else + return false; + } + virtual void Summary() { assert(!"Summary should have been overridden"); } +}; + +// compute fmeasure, second alignment is reference, first is hyp +struct FMeasureCommand : public Command { + FMeasureCommand() : matches(), num_predicted(), num_in_ref() {} + int Result() const { return 2; } + string Name() const { return "fmeasure"; } + bool RequiresTwoOperands() const { return true; } + void Apply(const Array2D& hyp, const Array2D& ref, Array2D* x) { + (void) x; // AER just computes statistics, not an alignment + int i_len = ref.width(); + int j_len = ref.height(); + for (int i = 0; i < i_len; ++i) { + for (int j = 0; j < j_len; ++j) { + if (ref(i,j)) { + ++num_in_ref; + if (Safe(hyp, i, j)) ++matches; + } + } + } + for (int i = 0; i < hyp.width(); ++i) + for (int j = 0; j < hyp.height(); ++j) + if (hyp(i,j)) ++num_predicted; + } + void Summary() { + if (num_predicted == 0 || num_in_ref == 0) { + cerr << "Insufficient statistics to compute f-measure!\n"; + abort(); + } + const double prec = static_cast(matches) / num_predicted; + const double rec = static_cast(matches) / num_in_ref; + cout << "P: " << prec << endl; + cout << "R: " << rec << endl; + const double f = (2.0 * prec * rec) / (rec + prec); + cout << "F: " << f << endl; + } + int matches; + int num_predicted; + int num_in_ref; +}; + +struct DisplayCommand : public Command { + string Name() const { return "display"; } + bool RequiresTwoOperands() const { return false; } + void Apply(const Array2D& in, const Array2D&, Array2D* x) { + *x = in; + cout << *x << endl; + } +}; + +struct ConvertCommand : public Command { + string Name() const { return "convert"; } + bool RequiresTwoOperands() const { return false; } + void Apply(const Array2D& in, const Array2D&, Array2D* x) { + *x = in; + } +}; + +struct InvertCommand : public Command { + string Name() const { return "invert"; } + bool RequiresTwoOperands() const { return false; } + void Apply(const Array2D& in, const Array2D&, Array2D* x) { + Array2D& res = *x; + res.resize(in.height(), in.width()); + for (int i = 0; i < in.height(); ++i) + for (int j = 0; j < in.width(); ++j) + res(i, j) = in(j, i); + } +}; + +struct IntersectCommand : public Command { + string Name() const { return "intersect"; } + bool RequiresTwoOperands() const { return true; } + void Apply(const Array2D& a, const Array2D& b, Array2D* x) { + EnsureSize(a, b, x); + Array2D& res = *x; + for (int i = 0; i < a.width(); ++i) + for (int j = 0; j < a.height(); ++j) + res(i, j) = Safe(a, i, j) && Safe(b, i, j); + } +}; + +struct UnionCommand : public Command { + string Name() const { return "union"; } + bool RequiresTwoOperands() const { return true; } + void Apply(const Array2D& a, const Array2D& b, Array2D* x) { + EnsureSize(a, b, x); + Array2D& res = *x; + for (int i = 0; i < res.width(); ++i) + for (int j = 0; j < res.height(); ++j) + res(i, j) = Safe(a, i, j) || Safe(b, i, j); + } +}; + +struct RefineCommand : public Command { + RefineCommand() { + neighbors_.push_back(make_pair(1,0)); + neighbors_.push_back(make_pair(-1,0)); + neighbors_.push_back(make_pair(0,1)); + neighbors_.push_back(make_pair(0,-1)); + } + bool RequiresTwoOperands() const { return true; } + + void Align(int i, int j) { + res_(i, j) = true; + is_i_aligned_[i] = true; + is_j_aligned_[j] = true; + } + + bool IsNeighborAligned(int i, int j) const { + for (int k = 0; k < neighbors_.size(); ++k) { + const int di = neighbors_[k].first; + const int dj = neighbors_[k].second; + if (Safe(res_, i + di, j + dj)) + return true; + } + return false; + } + + bool IsNeitherAligned(int i, int j) const { + return !(is_i_aligned_[i] || is_j_aligned_[j]); + } + + bool IsOneOrBothUnaligned(int i, int j) const { + return !(is_i_aligned_[i] && is_j_aligned_[j]); + } + + bool KoehnAligned(int i, int j) const { + return IsOneOrBothUnaligned(i, j) && IsNeighborAligned(i, j); + } + + typedef bool (RefineCommand::*Predicate)(int i, int j) const; + + protected: + void InitRefine( + const Array2D& a, + const Array2D& b) { + res_.clear(); + EnsureSize(a, b, &res_); + in_.clear(); un_.clear(); is_i_aligned_.clear(); is_j_aligned_.clear(); + EnsureSize(a, b, &in_); + EnsureSize(a, b, &un_); + is_i_aligned_.resize(res_.width(), false); + is_j_aligned_.resize(res_.height(), false); + for (int i = 0; i < in_.width(); ++i) + for (int j = 0; j < in_.height(); ++j) { + un_(i, j) = Safe(a, i, j) || Safe(b, i, j); + in_(i, j) = Safe(a, i, j) && Safe(b, i, j); + if (in_(i, j)) Align(i, j); + } + } + // "grow" the resulting alignment using the points in adds + // if they match the constraints determined by pred + void Grow(Predicate pred, bool idempotent, const Array2D& adds) { + if (idempotent) { + for (int i = 0; i < adds.width(); ++i) + for (int j = 0; j < adds.height(); ++j) { + if (adds(i, j) && !res_(i, j) && + (this->*pred)(i, j)) Align(i, j); + } + return; + } + set > p; + for (int i = 0; i < adds.width(); ++i) + for (int j = 0; j < adds.height(); ++j) + if (adds(i, j) && !res_(i, j)) + p.insert(make_pair(i, j)); + bool keep_going = !p.empty(); + while (keep_going) { + keep_going = false; + for (set >::iterator pi = p.begin(); + pi != p.end(); ++pi) { + if ((this->*pred)(pi->first, pi->second)) { + Align(pi->first, pi->second); + p.erase(pi); + keep_going = true; + } + } + } + } + Array2D res_; // refined alignment + Array2D in_; // intersection alignment + Array2D un_; // union alignment + vector is_i_aligned_; + vector is_j_aligned_; + vector > neighbors_; +}; + +struct DiagCommand : public RefineCommand { + DiagCommand() { + neighbors_.push_back(make_pair(1,1)); + neighbors_.push_back(make_pair(-1,1)); + neighbors_.push_back(make_pair(1,-1)); + neighbors_.push_back(make_pair(-1,-1)); + } +}; + +struct GDCommand : public DiagCommand { + string Name() const { return "grow-diag"; } + void Apply(const Array2D& a, const Array2D& b, Array2D* x) { + InitRefine(a, b); + Grow(&RefineCommand::KoehnAligned, false, un_); + *x = res_; + } +}; + +struct GDFCommand : public DiagCommand { + string Name() const { return "grow-diag-final"; } + void Apply(const Array2D& a, const Array2D& b, Array2D* x) { + InitRefine(a, b); + Grow(&RefineCommand::KoehnAligned, false, un_); + Grow(&RefineCommand::IsOneOrBothUnaligned, true, a); + Grow(&RefineCommand::IsOneOrBothUnaligned, true, b); + *x = res_; + } +}; + +struct GDFACommand : public DiagCommand { + string Name() const { return "grow-diag-final-and"; } + void Apply(const Array2D& a, const Array2D& b, Array2D* x) { + InitRefine(a, b); + Grow(&RefineCommand::KoehnAligned, false, un_); + Grow(&RefineCommand::IsNeitherAligned, true, a); + Grow(&RefineCommand::IsNeitherAligned, true, b); + *x = res_; + } +}; + +map > commands; + +void InitCommandLine(int argc, char** argv, po::variables_map* conf) { + po::options_description opts("Configuration options"); + ostringstream os; + os << "Operation to perform:"; + for (map >::iterator it = commands.begin(); + it != commands.end(); ++it) { + os << ' ' << it->first; + } + string cstr = os.str(); + opts.add_options() + ("input_1,i", po::value(), "[REQUIRED] Alignment 1 file, - for STDIN") + ("input_2,j", po::value(), "Alignment 2 file, - for STDIN") + ("command,c", po::value()->default_value("convert"), cstr.c_str()) + ("help,h", "Print this help message and exit"); + po::options_description clo("Command line options"); + po::options_description dcmdline_options; + dcmdline_options.add(opts); + + po::store(parse_command_line(argc, argv, dcmdline_options), *conf); + po::notify(*conf); + + if (conf->count("help") || conf->count("input_1") == 0 || conf->count("command") == 0) { + cerr << dcmdline_options << endl; + exit(1); + } + const string cmd = (*conf)["command"].as(); + if (commands.count(cmd) == 0) { + cerr << "Don't understand command: " << cmd << endl; + exit(1); + } + if (commands[cmd]->RequiresTwoOperands()) { + if (conf->count("input_2") == 0) { + cerr << "Command '" << cmd << "' requires two alignment files\n"; + exit(1); + } + if ((*conf)["input_1"].as() == "-" && (*conf)["input_2"].as() == "-") { + cerr << "Both inputs cannot be STDIN\n"; + exit(1); + } + } else { + if (conf->count("input_2") != 0) { + cerr << "Command '" << cmd << "' requires only one alignment file\n"; + exit(1); + } + } +} + +template static void AddCommand() { + C* c = new C; + commands[c->Name()].reset(c); +} + +int main(int argc, char **argv) { + AddCommand(); + AddCommand(); + AddCommand(); + AddCommand(); + AddCommand(); + AddCommand(); + AddCommand(); + AddCommand(); + AddCommand(); + po::variables_map conf; + InitCommandLine(argc, argv, &conf); + Command& cmd = *commands[conf["command"].as()]; + boost::shared_ptr rf1(new ReadFile(conf["input_1"].as())); + boost::shared_ptr rf2; + if (cmd.RequiresTwoOperands()) + rf2.reset(new ReadFile(conf["input_2"].as())); + istream* in1 = rf1->stream(); + istream* in2 = NULL; + if (rf2) in2 = rf2->stream(); + while(*in1) { + string line1; + string line2; + getline(*in1, line1); + if (in2) { + getline(*in2, line2); + if ((*in1 && !*in2) || (*in2 && !*in1)) { + cerr << "Mismatched number of lines!\n"; + exit(1); + } + } + if (line1.empty() && !*in1) break; + shared_ptr > out(new Array2D); + shared_ptr > a1 = AlignmentPharaoh::ReadPharaohAlignmentGrid(line1); + if (in2) { + shared_ptr > a2 = AlignmentPharaoh::ReadPharaohAlignmentGrid(line2); + cmd.Apply(*a1, *a2, out.get()); + } else { + Array2D dummy; + cmd.Apply(*a1, dummy, out.get()); + } + + if (cmd.Result() == 1) { + AlignmentPharaoh::SerializePharaohFormat(*out, &cout); + } + } + if (cmd.Result() == 2) + cmd.Summary(); + return 0; +} + -- cgit v1.2.3 From a38b3fa383412e56eb958db998662c026bc08f4b Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Fri, 17 Feb 2012 13:01:54 -0500 Subject: boost version checking, check for Eigen, get rid of old digamma stuff --- configure.ac | 21 +++++++++++++++------ training/em_utils.h | 24 ------------------------ training/model1.cc | 1 - training/mr_em_adapted_reduce.cc | 6 +++--- training/ttables.h | 4 ++-- utils/m.h | 6 ++++++ 6 files changed, 26 insertions(+), 36 deletions(-) delete mode 100644 training/em_utils.h (limited to 'utils') diff --git a/configure.ac b/configure.ac index cd78ee72..aa79027f 100644 --- a/configure.ac +++ b/configure.ac @@ -9,7 +9,7 @@ esac AC_PROG_CC AC_PROG_CXX AC_LANG_CPLUSPLUS -BOOST_REQUIRE +BOOST_REQUIRE([1.44]) BOOST_PROGRAM_OPTIONS AC_ARG_ENABLE(mpi, [ --enable-mpi Build MPI binaries, assumes mpi.h is present ], @@ -38,7 +38,7 @@ then CPPFLAGS="$CPPFLAGS -I${with_cmph}/include" AC_CHECK_HEADER(cmph.h, - [AC_DEFINE([HAVE_CMPH], [], [flag for cmph perfect hashing library])], + [AC_DEFINE([HAVE_CMPH], [1], [flag for cmph perfect hashing library])], [AC_MSG_ERROR([Cannot find cmph library!])]) LDFLAGS="$LDFLAGS -L${with_cmph}/lib" @@ -46,6 +46,18 @@ then AM_CONDITIONAL([HAVE_CMPH], true) fi +if test "x$with_eigen" != 'xno' +then + SAVE_CPPFLAGS="$CPPFLAGS" + CPPFLAGS="$CPPFLAGS -I${with_eigen}" + + AC_CHECK_HEADER(Eigen, + [AC_DEFINE([HAVE_EIGEN], [1], [flag for Eigen linear algebra library])], + [AC_MSG_ERROR([Cannot find Eigen!])]) + + AM_CONDITIONAL([HAVE_EIGEN], true) +fi + #BOOST_THREADS CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" LDFLAGS="$LDFLAGS $BOOST_PROGRAM_OPTIONS_LDFLAGS" @@ -53,11 +65,8 @@ LDFLAGS="$LDFLAGS $BOOST_PROGRAM_OPTIONS_LDFLAGS" LIBS="$LIBS $BOOST_PROGRAM_OPTIONS_LIBS" # $BOOST_THREAD_LIBS" -AC_CHECK_HEADER(boost/math/special_functions/digamma.hpp, - [AC_DEFINE([HAVE_BOOST_DIGAMMA], [], [flag for boost::math::digamma])]) - AC_CHECK_HEADER(google/dense_hash_map, - [AC_DEFINE([HAVE_SPARSEHASH], [], [flag for google::dense_hash_map])]) + [AC_DEFINE([HAVE_SPARSEHASH], [1], [flag for google::dense_hash_map])]) AC_PROG_INSTALL GTEST_LIB_CHECK(1.0) diff --git a/training/em_utils.h b/training/em_utils.h deleted file mode 100644 index 37762978..00000000 --- a/training/em_utils.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef _EM_UTILS_H_ -#define _EM_UTILS_H_ - -#include "config.h" -#ifdef HAVE_BOOST_DIGAMMA -#include -using boost::math::digamma; -#else -#warning Using Mark Johnsons digamma() -#include -inline double digamma(double x) { - double result = 0, xx, xx2, xx4; - assert(x > 0); - for ( ; x < 7; ++x) - result -= 1/x; - x -= 1.0/2.0; - xx = 1.0/x; - xx2 = xx*xx; - xx4 = xx2*xx2; - result += log(x)+(1./24.)*xx2-(7.0/960.0)*xx4+(31.0/8064.0)*xx4*xx2-(127.0/30720.0)*xx4*xx4; - return result; -} -#endif -#endif diff --git a/training/model1.cc b/training/model1.cc index 40249aa3..a87d388f 100644 --- a/training/model1.cc +++ b/training/model1.cc @@ -9,7 +9,6 @@ #include "filelib.h" #include "ttables.h" #include "tdict.h" -#include "em_utils.h" namespace po = boost::program_options; using namespace std; diff --git a/training/mr_em_adapted_reduce.cc b/training/mr_em_adapted_reduce.cc index d4c16a2f..f65b5440 100644 --- a/training/mr_em_adapted_reduce.cc +++ b/training/mr_em_adapted_reduce.cc @@ -10,7 +10,7 @@ #include "fdict.h" #include "weights.h" #include "sparse_vector.h" -#include "em_utils.h" +#include "m.h" using namespace std; namespace po = boost::program_options; @@ -63,11 +63,11 @@ void Maximize(const bool use_vb, assert(tot > 0.0); double ltot = log(tot); if (use_vb) - ltot = digamma(tot + total_event_types * alpha); + ltot = Md::digamma(tot + total_event_types * alpha); for (SparseVector::const_iterator it = counts.begin(); it != counts.end(); ++it) { if (use_vb) { - pc->set_value(it->first, NoZero(digamma(it->second + alpha) - ltot)); + pc->set_value(it->first, NoZero(Md::digamma(it->second + alpha) - ltot)); } else { pc->set_value(it->first, NoZero(log(it->second) - ltot)); } diff --git a/training/ttables.h b/training/ttables.h index 50d85a68..bf3351d2 100644 --- a/training/ttables.h +++ b/training/ttables.h @@ -4,9 +4,9 @@ #include #include +#include "m.h" #include "wordid.h" #include "tdict.h" -#include "em_utils.h" class TTable { public: @@ -39,7 +39,7 @@ class TTable { for (Word2Double::iterator it = cpd.begin(); it != cpd.end(); ++it) tot += it->second + alpha; for (Word2Double::iterator it = cpd.begin(); it != cpd.end(); ++it) - it->second = exp(digamma(it->second + alpha) - digamma(tot)); + it->second = exp(Md::digamma(it->second + alpha) - Md::digamma(tot)); } counts.clear(); } diff --git a/utils/m.h b/utils/m.h index b25248c2..5e45efee 100644 --- a/utils/m.h +++ b/utils/m.h @@ -3,6 +3,7 @@ #include #include +#include template struct M { @@ -81,6 +82,11 @@ struct M { } } + // digamma is the first derivative of the log-gamma function + static inline F digamma(const F& x) { + return boost::math::digamma(x); + } + }; typedef M Md; -- cgit v1.2.3 From 9007216a43c5572c2c343a1700ac79fb35b7d82f Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sat, 25 Feb 2012 21:22:27 -0500 Subject: really slow hiero lm --- gi/pf/Makefile.am | 4 +- gi/pf/hierolm.cc | 309 +++++++++++++++++++++++++++++++++++++++++++++ phrasinator/ccrp.h | 294 ------------------------------------------- utils/ccrp.h | 340 ++++++++++++++++++++++++++++++++++++++++++++++++++ utils/ccrp_onetable.h | 12 ++ utils/sampler.h | 2 +- 6 files changed, 665 insertions(+), 296 deletions(-) create mode 100644 gi/pf/hierolm.cc delete mode 100644 phrasinator/ccrp.h create mode 100644 utils/ccrp.h (limited to 'utils') diff --git a/gi/pf/Makefile.am b/gi/pf/Makefile.am index 8d43f36d..ed5b6fd3 100644 --- a/gi/pf/Makefile.am +++ b/gi/pf/Makefile.am @@ -1,4 +1,4 @@ -bin_PROGRAMS = cbgi brat dpnaive pfbrat pfdist itg pfnaive condnaive align-lexonly align-lexonly-pyp +bin_PROGRAMS = cbgi brat dpnaive pfbrat pfdist itg pfnaive condnaive align-lexonly align-lexonly-pyp hierolm noinst_LIBRARIES = libpf.a libpf_a_SOURCES = base_distributions.cc reachability.cc cfg_wfst_composer.cc corpus.cc unigrams.cc ngram_base.cc @@ -9,6 +9,8 @@ align_lexonly_pyp_SOURCES = align-lexonly-pyp.cc itg_SOURCES = itg.cc +hierolm_SOURCES = hierolm.cc + condnaive_SOURCES = condnaive.cc dpnaive_SOURCES = dpnaive.cc diff --git a/gi/pf/hierolm.cc b/gi/pf/hierolm.cc new file mode 100644 index 00000000..afb12fef --- /dev/null +++ b/gi/pf/hierolm.cc @@ -0,0 +1,309 @@ +#include +#include +#include + +#include +#include +#include + +#include "inside_outside.h" +#include "hg.h" +#include "bottom_up_parser.h" +#include "fdict.h" +#include "grammar.h" +#include "m.h" +#include "trule.h" +#include "tdict.h" +#include "filelib.h" +#include "dict.h" +#include "sampler.h" +#include "ccrp.h" +#include "ccrp_onetable.h" + +using namespace std; +using namespace tr1; +namespace po = boost::program_options; + +shared_ptr prng; + +void InitCommandLine(int argc, char** argv, po::variables_map* conf) { + po::options_description opts("Configuration options"); + opts.add_options() + ("samples,s",po::value()->default_value(1000),"Number of samples") + ("input,i",po::value(),"Read parallel data from") + ("random_seed,S",po::value(), "Random seed"); + po::options_description clo("Command line options"); + clo.add_options() + ("config", po::value(), "Configuration file") + ("help,h", "Print this help message and exit"); + po::options_description dconfig_options, dcmdline_options; + dconfig_options.add(opts); + dcmdline_options.add(opts).add(clo); + + po::store(parse_command_line(argc, argv, dcmdline_options), *conf); + if (conf->count("config")) { + ifstream config((*conf)["config"].as().c_str()); + po::store(po::parse_config_file(config, dconfig_options), *conf); + } + po::notify(*conf); + + if (conf->count("help") || (conf->count("input") == 0)) { + cerr << dcmdline_options << endl; + exit(1); + } +} + +void ReadCorpus(const string& filename, + vector >* e, + set* vocab_e) { + e->clear(); + vocab_e->clear(); + istream* in; + if (filename == "-") + in = &cin; + else + in = new ifstream(filename.c_str()); + assert(*in); + string line; + while(*in) { + getline(*in, line); + if (line.empty() && !*in) break; + e->push_back(vector()); + vector& le = e->back(); + TD::ConvertSentence(line, &le); + for (unsigned i = 0; i < le.size(); ++i) + vocab_e->insert(le[i]); + } + if (in != &cin) delete in; +} + +struct Grid { + // a b c d e + // 0 - 0 - - + vector grid; +}; + +struct BaseRuleModel { + explicit BaseRuleModel(unsigned term_size, + unsigned nonterm_size = 1) : + unif_term(1.0 / term_size), + unif_nonterm(1.0 / nonterm_size) {} + prob_t operator()(const TRule& r) const { + prob_t p; p.logeq(Md::log_poisson(1.0, r.f_.size())); + const prob_t term_prob((2.0 + 0.01*r.f_.size()) / (r.f_.size() + 2)); + const prob_t nonterm_prob(1.0 - term_prob.as_float()); + for (unsigned i = 0; i < r.f_.size(); ++i) { + if (r.f_[i] <= 0) { // nonterminal + p *= nonterm_prob; + p *= unif_nonterm; + } else { // terminal + p *= term_prob; + p *= unif_term; + } + } + return p; + } + const prob_t unif_term, unif_nonterm; +}; + +struct HieroLMModel { + explicit HieroLMModel(unsigned vocab_size) : p0(vocab_size), x(1,1,1,1) {} + + prob_t Prob(const TRule& r) const { + return x.probT(r, p0(r)); + } + + int Increment(const TRule& r, MT19937* rng) { + return x.incrementT(r, p0(r), rng); + // return x.increment(r); + } + + int Decrement(const TRule& r, MT19937* rng) { + return x.decrement(r, rng); + //return x.decrement(r); + } + + prob_t Likelihood() const { + prob_t p; + p.logeq(x.log_crp_prob()); + for (CCRP::const_iterator it = x.begin(); it != x.end(); ++it) { + prob_t tp = p0(it->first); + tp.poweq(it->second.table_counts_.size()); + p *= tp; + } + //for (CCRP_OneTable::const_iterator it = x.begin(); it != x.end(); ++it) + // p *= p0(it->first); + return p; + } + + void ResampleHyperparameters(MT19937* rng) { + x.resample_hyperparameters(rng); + cerr << " d=" << x.discount() << ", alpha=" << x.concentration() << endl; + } + + const BaseRuleModel p0; + CCRP x; + //CCRP_OneTable x; +}; + +vector tofreelist; + +HieroLMModel* plm; + +struct NPGrammarIter : public GrammarIter, public RuleBin { + NPGrammarIter() : arity() { tofreelist.push_back(this); } + NPGrammarIter(const TRulePtr& inr, const int a, int symbol) : arity(a + (symbol < 0 ? 1 : 0)) { + if (inr) { + r.reset(new TRule(*inr)); + } else { + static const int kLHS = -TD::Convert("X"); + r.reset(new TRule); + r->lhs_ = kLHS; + } + TRule& rr = *r; + rr.f_.push_back(symbol); + rr.e_.push_back(symbol < 0 ? (1-int(arity)) : symbol); + tofreelist.push_back(this); + } + virtual int GetNumRules() const { + if (r) return 1; else return 0; + } + virtual TRulePtr GetIthRule(int) const { + return r; + } + virtual int Arity() const { + return arity; + } + virtual const RuleBin* GetRules() const { + if (!r) return NULL; else return this; + } + virtual const GrammarIter* Extend(int symbol) const { + return new NPGrammarIter(r, arity, symbol); + } + const unsigned char arity; + TRulePtr r; +}; + +struct NPGrammar : public Grammar { + virtual const GrammarIter* GetRoot() const { + return new NPGrammarIter; + } +}; + +void SampleDerivation(const Hypergraph& hg, MT19937* rng, vector* sampled_deriv, HieroLMModel* plm) { + HieroLMModel& lm = *plm; + vector node_probs; + const prob_t total_prob = Inside(hg, &node_probs); + queue q; + q.push(hg.nodes_.size() - 3); + while(!q.empty()) { + unsigned cur_node_id = q.front(); +// cerr << "NODE=" << cur_node_id << endl; + q.pop(); + const Hypergraph::Node& node = hg.nodes_[cur_node_id]; + const unsigned num_in_edges = node.in_edges_.size(); + unsigned sampled_edge = 0; + if (num_in_edges == 1) { + sampled_edge = node.in_edges_[0]; + } else { + //prob_t z; + assert(num_in_edges > 1); + SampleSet ss; + for (unsigned j = 0; j < num_in_edges; ++j) { + const Hypergraph::Edge& edge = hg.edges_[node.in_edges_[j]]; + prob_t p = edge.edge_prob_; + for (unsigned k = 0; k < edge.tail_nodes_.size(); ++k) + p *= node_probs[edge.tail_nodes_[k]]; + ss.add(p); +// cerr << log(ss[j]) << " ||| " << edge.rule_->AsString() << endl; + //z += p; + } +// for (unsigned j = 0; j < num_in_edges; ++j) { +// const Hypergraph::Edge& edge = hg.edges_[node.in_edges_[j]]; +// cerr << exp(log(ss[j] / z)) << " ||| " << edge.rule_->AsString() << endl; +// } +// cerr << " --- \n"; + sampled_edge = node.in_edges_[rng->SelectSample(ss)]; + } + sampled_deriv->push_back(sampled_edge); + const Hypergraph::Edge& edge = hg.edges_[sampled_edge]; + for (unsigned j = 0; j < edge.tail_nodes_.size(); ++j) { + q.push(edge.tail_nodes_[j]); + } + } + for (unsigned i = 0; i < sampled_deriv->size(); ++i) { + cerr << *hg.edges_[(*sampled_deriv)[i]].rule_ << endl; + } +} + +void IncrementDerivation(const Hypergraph& hg, const vector& d, HieroLMModel* plm, MT19937* rng) { + for (unsigned i = 0; i < d.size(); ++i) + plm->Increment(*hg.edges_[d[i]].rule_, rng); +} + +void DecrementDerivation(const Hypergraph& hg, const vector& d, HieroLMModel* plm, MT19937* rng) { + for (unsigned i = 0; i < d.size(); ++i) + plm->Decrement(*hg.edges_[d[i]].rule_, rng); +} + +int main(int argc, char** argv) { + po::variables_map conf; + vector grammars; + grammars.push_back(GrammarPtr(new NPGrammar)); + + InitCommandLine(argc, argv, &conf); + const unsigned samples = conf["samples"].as(); + + if (conf.count("random_seed")) + prng.reset(new MT19937(conf["random_seed"].as())); + else + prng.reset(new MT19937); + MT19937& rng = *prng; + + vector > corpuse; + set vocabe; + cerr << "Reading corpus...\n"; + ReadCorpus(conf["input"].as(), &corpuse, &vocabe); + cerr << "E-corpus size: " << corpuse.size() << " sentences\t (" << vocabe.size() << " word types)\n"; + HieroLMModel lm(vocabe.size()); + + plm = &lm; + ExhaustiveBottomUpParser parser("X", grammars); + + Hypergraph hg; + const int kX = -TD::Convert("X"); + const int kLP = FD::Convert("LogProb"); + SparseVector v; v.set_value(kLP, 1.0); + vector > derivs(corpuse.size()); + for (int SS=0; SS < samples; ++SS) { + for (int ci = 0; ci < corpuse.size(); ++ci) { + vector& src = corpuse[ci]; + Lattice lat(src.size()); + for (unsigned i = 0; i < src.size(); ++i) + lat[i].push_back(LatticeArc(src[i], 0.0, 1)); + cerr << TD::GetString(src) << endl; + hg.clear(); + parser.Parse(lat, &hg); // exhaustive parse + DecrementDerivation(hg, derivs[ci], &lm, &rng); + for (unsigned i = 0; i < hg.edges_.size(); ++i) { + TRule& r = *hg.edges_[i].rule_; + if (r.lhs_ == kX) + hg.edges_[i].edge_prob_ = lm.Prob(r); + } + vector d; + SampleDerivation(hg, &rng, &d, &lm); + derivs[ci] = d; + IncrementDerivation(hg, derivs[ci], &lm, &rng); + if (tofreelist.size() > 100000) { + cerr << "Freeing ... "; + for (unsigned i = 0; i < tofreelist.size(); ++i) + delete tofreelist[i]; + tofreelist.clear(); + cerr << "Freed.\n"; + } + } + cerr << "LLH=" << lm.Likelihood() << endl; + } + return 0; +} + diff --git a/phrasinator/ccrp.h b/phrasinator/ccrp.h deleted file mode 100644 index 9acf12ab..00000000 --- a/phrasinator/ccrp.h +++ /dev/null @@ -1,294 +0,0 @@ -#ifndef _CCRP_H_ -#define _CCRP_H_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include "sampler.h" -#include "slice_sampler.h" - -// Chinese restaurant process (Pitman-Yor parameters) with table tracking. - -template > -class CCRP { - public: - CCRP(double disc, double conc) : - num_tables_(), - num_customers_(), - discount_(disc), - concentration_(conc), - discount_prior_alpha_(std::numeric_limits::quiet_NaN()), - discount_prior_beta_(std::numeric_limits::quiet_NaN()), - concentration_prior_shape_(std::numeric_limits::quiet_NaN()), - concentration_prior_rate_(std::numeric_limits::quiet_NaN()) {} - - CCRP(double d_alpha, double d_beta, double c_shape, double c_rate, double d = 0.1, double c = 10.0) : - num_tables_(), - num_customers_(), - discount_(d), - concentration_(c), - discount_prior_alpha_(d_alpha), - discount_prior_beta_(d_beta), - concentration_prior_shape_(c_shape), - concentration_prior_rate_(c_rate) {} - - double discount() const { return discount_; } - double concentration() const { return concentration_; } - - bool has_discount_prior() const { - return !std::isnan(discount_prior_alpha_); - } - - bool has_concentration_prior() const { - return !std::isnan(concentration_prior_shape_); - } - - void clear() { - num_tables_ = 0; - num_customers_ = 0; - dish_locs_.clear(); - } - - unsigned num_tables() const { - return num_tables_; - } - - unsigned num_tables(const Dish& dish) const { - const typename std::tr1::unordered_map::const_iterator it = dish_locs_.find(dish); - if (it == dish_locs_.end()) return 0; - return it->second.table_counts_.size(); - } - - unsigned num_customers() const { - return num_customers_; - } - - unsigned num_customers(const Dish& dish) const { - const typename std::tr1::unordered_map::const_iterator it = dish_locs_.find(dish); - if (it == dish_locs_.end()) return 0; - return it->total_dish_count_; - } - - // returns +1 or 0 indicating whether a new table was opened - int increment(const Dish& dish, const double& p0, MT19937* rng) { - DishLocations& loc = dish_locs_[dish]; - bool share_table = false; - if (loc.total_dish_count_) { - const double p_empty = (concentration_ + num_tables_ * discount_) * p0; - const double p_share = (loc.total_dish_count_ - loc.table_counts_.size() * discount_); - share_table = rng->SelectSample(p_empty, p_share); - } - if (share_table) { - double r = rng->next() * (loc.total_dish_count_ - loc.table_counts_.size() * discount_); - for (typename std::list::iterator ti = loc.table_counts_.begin(); - ti != loc.table_counts_.end(); ++ti) { - r -= (*ti - discount_); - if (r <= 0.0) { - ++(*ti); - break; - } - } - if (r > 0.0) { - std::cerr << "Serious error: r=" << r << std::endl; - Print(&std::cerr); - assert(r <= 0.0); - } - } else { - loc.table_counts_.push_back(1u); - ++num_tables_; - } - ++loc.total_dish_count_; - ++num_customers_; - return (share_table ? 0 : 1); - } - - // returns -1 or 0, indicating whether a table was closed - int decrement(const Dish& dish, MT19937* rng) { - DishLocations& loc = dish_locs_[dish]; - assert(loc.total_dish_count_); - if (loc.total_dish_count_ == 1) { - dish_locs_.erase(dish); - --num_tables_; - --num_customers_; - return -1; - } else { - int delta = 0; - // sample customer to remove UNIFORMLY. that is, do NOT use the discount - // here. if you do, it will introduce (unwanted) bias! - double r = rng->next() * loc.total_dish_count_; - --loc.total_dish_count_; - for (typename std::list::iterator ti = loc.table_counts_.begin(); - ti != loc.table_counts_.end(); ++ti) { - r -= *ti; - if (r <= 0.0) { - if ((--(*ti)) == 0) { - --num_tables_; - delta = -1; - loc.table_counts_.erase(ti); - } - break; - } - } - if (r > 0.0) { - std::cerr << "Serious error: r=" << r << std::endl; - Print(&std::cerr); - assert(r <= 0.0); - } - --num_customers_; - return delta; - } - } - - double prob(const Dish& dish, const double& p0) const { - const typename std::tr1::unordered_map::const_iterator it = dish_locs_.find(dish); - const double r = num_tables_ * discount_ + concentration_; - if (it == dish_locs_.end()) { - return r * p0 / (num_customers_ + concentration_); - } else { - return (it->second.total_dish_count_ - discount_ * it->second.table_counts_.size() + r * p0) / - (num_customers_ + concentration_); - } - } - - double log_crp_prob() const { - return log_crp_prob(discount_, concentration_); - } - - static double log_beta_density(const double& x, const double& alpha, const double& beta) { - assert(x > 0.0); - assert(x < 1.0); - assert(alpha > 0.0); - assert(beta > 0.0); - const double lp = (alpha-1)*log(x)+(beta-1)*log(1-x)+lgamma(alpha+beta)-lgamma(alpha)-lgamma(beta); - return lp; - } - - static double log_gamma_density(const double& x, const double& shape, const double& rate) { - assert(x >= 0.0); - assert(shape > 0.0); - assert(rate > 0.0); - const double lp = (shape-1)*log(x) - shape*log(rate) - x/rate - lgamma(shape); - return lp; - } - - // taken from http://en.wikipedia.org/wiki/Chinese_restaurant_process - // does not include P_0's - double log_crp_prob(const double& discount, const double& concentration) const { - double lp = 0.0; - if (has_discount_prior()) - lp = log_beta_density(discount, discount_prior_alpha_, discount_prior_beta_); - if (has_concentration_prior()) - lp += log_gamma_density(concentration, concentration_prior_shape_, concentration_prior_rate_); - assert(lp <= 0.0); - if (num_customers_) { - if (discount > 0.0) { - const double r = lgamma(1.0 - discount); - lp += lgamma(concentration) - lgamma(concentration + num_customers_) - + num_tables_ * log(discount) + lgamma(concentration / discount + num_tables_) - - lgamma(concentration / discount); - assert(std::isfinite(lp)); - for (typename std::tr1::unordered_map::const_iterator it = dish_locs_.begin(); - it != dish_locs_.end(); ++it) { - const DishLocations& cur = it->second; - for (std::list::const_iterator ti = cur.table_counts_.begin(); ti != cur.table_counts_.end(); ++ti) { - lp += lgamma(*ti - discount) - r; - } - } - } else { - assert(!"not implemented yet"); - } - } - assert(std::isfinite(lp)); - return lp; - } - - void resample_hyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) { - assert(has_discount_prior() || has_concentration_prior()); - DiscountResampler dr(*this); - ConcentrationResampler cr(*this); - for (int iter = 0; iter < nloop; ++iter) { - if (has_concentration_prior()) { - concentration_ = slice_sampler1d(cr, concentration_, *rng, 0.0, - std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); - } - if (has_discount_prior()) { - discount_ = slice_sampler1d(dr, discount_, *rng, std::numeric_limits::min(), - 1.0, 0.0, niterations, 100*niterations); - } - } - concentration_ = slice_sampler1d(cr, concentration_, *rng, 0.0, - std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); - } - - struct DiscountResampler { - DiscountResampler(const CCRP& crp) : crp_(crp) {} - const CCRP& crp_; - double operator()(const double& proposed_discount) const { - return crp_.log_crp_prob(proposed_discount, crp_.concentration_); - } - }; - - struct ConcentrationResampler { - ConcentrationResampler(const CCRP& crp) : crp_(crp) {} - const CCRP& crp_; - double operator()(const double& proposed_concentration) const { - return crp_.log_crp_prob(crp_.discount_, proposed_concentration); - } - }; - - struct DishLocations { - DishLocations() : total_dish_count_() {} - unsigned total_dish_count_; // customers at all tables with this dish - std::list table_counts_; // list<> gives O(1) deletion and insertion, which we want - // .size() is the number of tables for this dish - }; - - void Print(std::ostream* out) const { - std::cerr << "PYP(d=" << discount_ << ",c=" << concentration_ << ") customers=" << num_customers_ << std::endl; - for (typename std::tr1::unordered_map::const_iterator it = dish_locs_.begin(); - it != dish_locs_.end(); ++it) { - (*out) << it->first << " (" << it->second.total_dish_count_ << " on " << it->second.table_counts_.size() << " tables): "; - for (typename std::list::const_iterator i = it->second.table_counts_.begin(); - i != it->second.table_counts_.end(); ++i) { - (*out) << " " << *i; - } - (*out) << std::endl; - } - } - - typedef typename std::tr1::unordered_map::const_iterator const_iterator; - const_iterator begin() const { - return dish_locs_.begin(); - } - const_iterator end() const { - return dish_locs_.end(); - } - - unsigned num_tables_; - unsigned num_customers_; - std::tr1::unordered_map dish_locs_; - - double discount_; - double concentration_; - - // optional beta prior on discount_ (NaN if no prior) - double discount_prior_alpha_; - double discount_prior_beta_; - - // optional gamma prior on concentration_ (NaN if no prior) - double concentration_prior_shape_; - double concentration_prior_rate_; -}; - -template -std::ostream& operator<<(std::ostream& o, const CCRP& c) { - c.Print(&o); - return o; -} - -#endif diff --git a/utils/ccrp.h b/utils/ccrp.h new file mode 100644 index 00000000..1a9e3ed5 --- /dev/null +++ b/utils/ccrp.h @@ -0,0 +1,340 @@ +#ifndef _CCRP_H_ +#define _CCRP_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "sampler.h" +#include "slice_sampler.h" + +// Chinese restaurant process (Pitman-Yor parameters) with table tracking. + +template > +class CCRP { + public: + CCRP(double disc, double conc) : + num_tables_(), + num_customers_(), + discount_(disc), + concentration_(conc), + discount_prior_alpha_(std::numeric_limits::quiet_NaN()), + discount_prior_beta_(std::numeric_limits::quiet_NaN()), + concentration_prior_shape_(std::numeric_limits::quiet_NaN()), + concentration_prior_rate_(std::numeric_limits::quiet_NaN()) {} + + CCRP(double d_alpha, double d_beta, double c_shape, double c_rate, double d = 0.9, double c = 1.0) : + num_tables_(), + num_customers_(), + discount_(d), + concentration_(c), + discount_prior_alpha_(d_alpha), + discount_prior_beta_(d_beta), + concentration_prior_shape_(c_shape), + concentration_prior_rate_(c_rate) {} + + double discount() const { return discount_; } + double concentration() const { return concentration_; } + + bool has_discount_prior() const { + return !std::isnan(discount_prior_alpha_); + } + + bool has_concentration_prior() const { + return !std::isnan(concentration_prior_shape_); + } + + void clear() { + num_tables_ = 0; + num_customers_ = 0; + dish_locs_.clear(); + } + + unsigned num_tables() const { + return num_tables_; + } + + unsigned num_tables(const Dish& dish) const { + const typename std::tr1::unordered_map::const_iterator it = dish_locs_.find(dish); + if (it == dish_locs_.end()) return 0; + return it->second.table_counts_.size(); + } + + unsigned num_customers() const { + return num_customers_; + } + + unsigned num_customers(const Dish& dish) const { + const typename std::tr1::unordered_map::const_iterator it = dish_locs_.find(dish); + if (it == dish_locs_.end()) return 0; + return it->total_dish_count_; + } + + // returns +1 or 0 indicating whether a new table was opened + int increment(const Dish& dish, const double& p0, MT19937* rng) { + DishLocations& loc = dish_locs_[dish]; + bool share_table = false; + if (loc.total_dish_count_) { + const double p_empty = (concentration_ + num_tables_ * discount_) * p0; + const double p_share = (loc.total_dish_count_ - loc.table_counts_.size() * discount_); + share_table = rng->SelectSample(p_empty, p_share); + } + if (share_table) { + double r = rng->next() * (loc.total_dish_count_ - loc.table_counts_.size() * discount_); + for (typename std::list::iterator ti = loc.table_counts_.begin(); + ti != loc.table_counts_.end(); ++ti) { + r -= (*ti - discount_); + if (r <= 0.0) { + ++(*ti); + break; + } + } + if (r > 0.0) { + std::cerr << "Serious error: r=" << r << std::endl; + Print(&std::cerr); + assert(r <= 0.0); + } + } else { + loc.table_counts_.push_back(1u); + ++num_tables_; + } + ++loc.total_dish_count_; + ++num_customers_; + return (share_table ? 0 : 1); + } + + // returns +1 or 0 indicating whether a new table was opened + template + int incrementT(const Dish& dish, const T& p0, MT19937* rng) { + DishLocations& loc = dish_locs_[dish]; + bool share_table = false; + if (loc.total_dish_count_) { + const T p_empty = T(concentration_ + num_tables_ * discount_) * p0; + const T p_share = T(loc.total_dish_count_ - loc.table_counts_.size() * discount_); + share_table = rng->SelectSample(p_empty, p_share); + } + if (share_table) { + double r = rng->next() * (loc.total_dish_count_ - loc.table_counts_.size() * discount_); + for (typename std::list::iterator ti = loc.table_counts_.begin(); + ti != loc.table_counts_.end(); ++ti) { + r -= (*ti - discount_); + if (r <= 0.0) { + ++(*ti); + break; + } + } + if (r > 0.0) { + std::cerr << "Serious error: r=" << r << std::endl; + Print(&std::cerr); + assert(r <= 0.0); + } + } else { + loc.table_counts_.push_back(1u); + ++num_tables_; + } + ++loc.total_dish_count_; + ++num_customers_; + return (share_table ? 0 : 1); + } + + // returns -1 or 0, indicating whether a table was closed + int decrement(const Dish& dish, MT19937* rng) { + DishLocations& loc = dish_locs_[dish]; + assert(loc.total_dish_count_); + if (loc.total_dish_count_ == 1) { + dish_locs_.erase(dish); + --num_tables_; + --num_customers_; + return -1; + } else { + int delta = 0; + // sample customer to remove UNIFORMLY. that is, do NOT use the discount + // here. if you do, it will introduce (unwanted) bias! + double r = rng->next() * loc.total_dish_count_; + --loc.total_dish_count_; + for (typename std::list::iterator ti = loc.table_counts_.begin(); + ti != loc.table_counts_.end(); ++ti) { + r -= *ti; + if (r <= 0.0) { + if ((--(*ti)) == 0) { + --num_tables_; + delta = -1; + loc.table_counts_.erase(ti); + } + break; + } + } + if (r > 0.0) { + std::cerr << "Serious error: r=" << r << std::endl; + Print(&std::cerr); + assert(r <= 0.0); + } + --num_customers_; + return delta; + } + } + + double prob(const Dish& dish, const double& p0) const { + const typename std::tr1::unordered_map::const_iterator it = dish_locs_.find(dish); + const double r = num_tables_ * discount_ + concentration_; + if (it == dish_locs_.end()) { + return r * p0 / (num_customers_ + concentration_); + } else { + return (it->second.total_dish_count_ - discount_ * it->second.table_counts_.size() + r * p0) / + (num_customers_ + concentration_); + } + } + + template + T probT(const Dish& dish, const T& p0) const { + const typename std::tr1::unordered_map::const_iterator it = dish_locs_.find(dish); + const T r = T(num_tables_ * discount_ + concentration_); + if (it == dish_locs_.end()) { + return r * p0 / T(num_customers_ + concentration_); + } else { + return (T(it->second.total_dish_count_ - discount_ * it->second.table_counts_.size()) + r * p0) / + T(num_customers_ + concentration_); + } + } + + double log_crp_prob() const { + return log_crp_prob(discount_, concentration_); + } + + static double log_beta_density(const double& x, const double& alpha, const double& beta) { + assert(x > 0.0); + assert(x < 1.0); + assert(alpha > 0.0); + assert(beta > 0.0); + const double lp = (alpha-1)*log(x)+(beta-1)*log(1-x)+lgamma(alpha+beta)-lgamma(alpha)-lgamma(beta); + return lp; + } + + static double log_gamma_density(const double& x, const double& shape, const double& rate) { + assert(x >= 0.0); + assert(shape > 0.0); + assert(rate > 0.0); + const double lp = (shape-1)*log(x) - shape*log(rate) - x/rate - lgamma(shape); + return lp; + } + + // taken from http://en.wikipedia.org/wiki/Chinese_restaurant_process + // does not include P_0's + double log_crp_prob(const double& discount, const double& concentration) const { + double lp = 0.0; + if (has_discount_prior()) + lp = log_beta_density(discount, discount_prior_alpha_, discount_prior_beta_); + if (has_concentration_prior()) + lp += log_gamma_density(concentration, concentration_prior_shape_, concentration_prior_rate_); + assert(lp <= 0.0); + if (num_customers_) { + if (discount > 0.0) { + const double r = lgamma(1.0 - discount); + lp += lgamma(concentration) - lgamma(concentration + num_customers_) + + num_tables_ * log(discount) + lgamma(concentration / discount + num_tables_) + - lgamma(concentration / discount); + assert(std::isfinite(lp)); + for (typename std::tr1::unordered_map::const_iterator it = dish_locs_.begin(); + it != dish_locs_.end(); ++it) { + const DishLocations& cur = it->second; + for (std::list::const_iterator ti = cur.table_counts_.begin(); ti != cur.table_counts_.end(); ++ti) { + lp += lgamma(*ti - discount) - r; + } + } + } else { + assert(!"not implemented yet"); + } + } + assert(std::isfinite(lp)); + return lp; + } + + void resample_hyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) { + assert(has_discount_prior() || has_concentration_prior()); + DiscountResampler dr(*this); + ConcentrationResampler cr(*this); + for (int iter = 0; iter < nloop; ++iter) { + if (has_concentration_prior()) { + concentration_ = slice_sampler1d(cr, concentration_, *rng, 0.0, + std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); + } + if (has_discount_prior()) { + discount_ = slice_sampler1d(dr, discount_, *rng, std::numeric_limits::min(), + 1.0, 0.0, niterations, 100*niterations); + } + } + concentration_ = slice_sampler1d(cr, concentration_, *rng, 0.0, + std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); + } + + struct DiscountResampler { + DiscountResampler(const CCRP& crp) : crp_(crp) {} + const CCRP& crp_; + double operator()(const double& proposed_discount) const { + return crp_.log_crp_prob(proposed_discount, crp_.concentration_); + } + }; + + struct ConcentrationResampler { + ConcentrationResampler(const CCRP& crp) : crp_(crp) {} + const CCRP& crp_; + double operator()(const double& proposed_concentration) const { + return crp_.log_crp_prob(crp_.discount_, proposed_concentration); + } + }; + + struct DishLocations { + DishLocations() : total_dish_count_() {} + unsigned total_dish_count_; // customers at all tables with this dish + std::list table_counts_; // list<> gives O(1) deletion and insertion, which we want + // .size() is the number of tables for this dish + }; + + void Print(std::ostream* out) const { + std::cerr << "PYP(d=" << discount_ << ",c=" << concentration_ << ") customers=" << num_customers_ << std::endl; + for (typename std::tr1::unordered_map::const_iterator it = dish_locs_.begin(); + it != dish_locs_.end(); ++it) { + (*out) << it->first << " (" << it->second.total_dish_count_ << " on " << it->second.table_counts_.size() << " tables): "; + for (typename std::list::const_iterator i = it->second.table_counts_.begin(); + i != it->second.table_counts_.end(); ++i) { + (*out) << " " << *i; + } + (*out) << std::endl; + } + } + + typedef typename std::tr1::unordered_map::const_iterator const_iterator; + const_iterator begin() const { + return dish_locs_.begin(); + } + const_iterator end() const { + return dish_locs_.end(); + } + + unsigned num_tables_; + unsigned num_customers_; + std::tr1::unordered_map dish_locs_; + + double discount_; + double concentration_; + + // optional beta prior on discount_ (NaN if no prior) + double discount_prior_alpha_; + double discount_prior_beta_; + + // optional gamma prior on concentration_ (NaN if no prior) + double concentration_prior_shape_; + double concentration_prior_rate_; +}; + +template +std::ostream& operator<<(std::ostream& o, const CCRP& c) { + c.Print(&o); + return o; +} + +#endif diff --git a/utils/ccrp_onetable.h b/utils/ccrp_onetable.h index a868af9a..b63737d1 100644 --- a/utils/ccrp_onetable.h +++ b/utils/ccrp_onetable.h @@ -117,6 +117,18 @@ class CCRP_OneTable { } } + template + T probT(const Dish& dish, const T& p0) const { + const typename DishMapType::const_iterator it = dish_counts_.find(dish); + const T r(num_tables_ * discount_ + concentration_); + if (it == dish_counts_.end()) { + return r * p0 / T(num_customers_ + concentration_); + } else { + return (T(it->second - discount_) + r * p0) / + T(num_customers_ + concentration_); + } + } + double log_crp_prob() const { return log_crp_prob(discount_, concentration_); } diff --git a/utils/sampler.h b/utils/sampler.h index 153e7ef1..22c873d4 100644 --- a/utils/sampler.h +++ b/utils/sampler.h @@ -48,7 +48,7 @@ struct RandomNumberGenerator { template size_t SelectSample(const F& a, const F& b, double T = 1.0) { if (T == 1.0) { - if (this->next() > (a / (a + b))) return 1; else return 0; + if (F(this->next()) > (a / (a + b))) return 1; else return 0; } else { assert(!"not implemented"); } -- cgit v1.2.3 From 54bcfb835232d190a5ab6f0bd825de8a50dae126 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Wed, 29 Feb 2012 01:12:40 -0500 Subject: cleanup, mpi-ify lblmodel --- training/lbl_model.cc | 179 +++++++------- utils/agenda.h | 140 ----------- utils/best.h | 32 --- utils/corpus_tools.cc | 62 +++++ utils/corpus_tools.h | 19 ++ utils/d_ary_heap.h | 568 --------------------------------------------- utils/ftoa.h | 403 -------------------------------- utils/int_or_pointer.h | 70 ------ utils/intern_pool.h | 158 ------------- utils/lvalue_pmap.h | 31 --- utils/max_plus.h | 201 ---------------- utils/maybe_update_bound.h | 17 -- utils/nan.h | 42 ---- utils/string_to.h | 314 ------------------------- 14 files changed, 178 insertions(+), 2058 deletions(-) delete mode 100644 utils/agenda.h delete mode 100644 utils/best.h create mode 100644 utils/corpus_tools.cc create mode 100644 utils/corpus_tools.h delete mode 100644 utils/d_ary_heap.h delete mode 100644 utils/ftoa.h delete mode 100644 utils/int_or_pointer.h delete mode 100644 utils/intern_pool.h delete mode 100644 utils/lvalue_pmap.h delete mode 100644 utils/max_plus.h delete mode 100644 utils/maybe_update_bound.h delete mode 100644 utils/nan.h delete mode 100644 utils/string_to.h (limited to 'utils') diff --git a/training/lbl_model.cc b/training/lbl_model.cc index 2af848b5..def5075a 100644 --- a/training/lbl_model.cc +++ b/training/lbl_model.cc @@ -12,11 +12,17 @@ #include // memset #include +#ifdef HAVE_MPI +#include +#include +namespace mpi = boost::mpi; +#endif #include #include #include #include +#include "corpus_tools.h" #include "optimize.h" #include "array2d.h" #include "m.h" @@ -29,9 +35,9 @@ namespace po = boost::program_options; using namespace std; #define kDIMENSIONS 100 -typedef Eigen::Matrix RVector; -typedef Eigen::Matrix RTVector; -typedef Eigen::Matrix TMatrix; +typedef Eigen::Matrix RVector; +typedef Eigen::Matrix RTVector; +typedef Eigen::Matrix TMatrix; vector r_src, r_trg; bool InitCommandLine(int argc, char** argv, po::variables_map* conf) { @@ -39,8 +45,8 @@ bool InitCommandLine(int argc, char** argv, po::variables_map* conf) { opts.add_options() ("input,i",po::value(),"Input file") ("iterations,I",po::value()->default_value(1000),"Number of iterations of training") - ("regularization_strength,C",po::value()->default_value(0.1),"L2 regularization strength (0 for no regularization)") - ("eta", po::value()->default_value(0.1f), "Eta for SGD") + ("regularization_strength,C",po::value()->default_value(0.1),"L2 regularization strength (0 for no regularization)") + ("eta", po::value()->default_value(0.1f), "Eta for SGD") ("source_embeddings,f", po::value(), "File containing source embeddings (if unset, random vectors will be used)") ("target_embeddings,e", po::value(), "File containing target embeddings (if unset, random vectors will be used)") ("random_seed,s", po::value(), "Random seed") @@ -70,7 +76,7 @@ bool InitCommandLine(int argc, char** argv, po::variables_map* conf) { } void Normalize(RVector* v) { - float norm = v->norm(); + double norm = v->norm(); assert(norm > 0.0f); *v /= norm; } @@ -80,7 +86,7 @@ void Flatten(const TMatrix& m, vector* v) { v->resize(kDIMENSIONS * kDIMENSIONS); for (unsigned i = 0; i < kDIMENSIONS; ++i) for (unsigned j = 0; j < kDIMENSIONS; ++j) { - assert(boost::math::isnormal(m(i, j))); + assert(boost::math::isfinite(m(i, j))); (*v)[c++] = m(i,j); } } @@ -89,7 +95,7 @@ void Unflatten(const vector& v, TMatrix* m) { unsigned c = 0; for (unsigned i = 0; i < kDIMENSIONS; ++i) for (unsigned j = 0; j < kDIMENSIONS; ++j) { - assert(boost::math::isnormal(v[c])); + assert(boost::math::isfinite(v[c])); (*m)(i, j) = v[c++]; } } @@ -162,14 +168,25 @@ void LoadEmbeddings(const string& filename, vector* pv) { } int main(int argc, char** argv) { +#ifdef HAVE_MPI + std::cerr << "**MPI enabled.\n"; + mpi::environment env(argc, argv); + mpi::communicator world; + const int size = world.size(); + const int rank = world.rank(); +#else + std::cerr << "**MPI disabled.\n"; + const int rank = 0; + const int size = 1; +#endif po::variables_map conf; if (!InitCommandLine(argc, argv, &conf)) return 1; const string fname = conf["input"].as(); - const float reg_strength = conf["regularization_strength"].as(); + const double reg_strength = conf["regularization_strength"].as(); const bool has_l2 = reg_strength; assert(reg_strength >= 0.0f); const int ITERATIONS = conf["iterations"].as(); - const float eta = conf["eta"].as(); + const double eta = conf["eta"].as(); const double diagonal_tension = conf["diagonal_tension"].as(); bool SGD = false; if (diagonal_tension < 0.0) { @@ -181,61 +198,44 @@ int main(int argc, char** argv) { unsigned lc = 0; vector unnormed_a_i; - string line; - string ssrc, strg; bool flag = false; - Lattice src, trg; + vector > srcs, trgs; vector vocab_e; - { // read through corpus, initialize int map, check lines are good - set svocab_e; - cerr << "INITIAL READ OF " << fname << endl; - ReadFile rf(fname); - istream& in = *rf.stream(); - while(getline(in, line)) { - ++lc; - if (lc % 1000 == 0) { cerr << '.'; flag = true; } - if (lc %50000 == 0) { cerr << " [" << lc << "]\n" << flush; flag = false; } - ParseTranslatorInput(line, &ssrc, &strg); - LatticeTools::ConvertTextToLattice(ssrc, &src); - LatticeTools::ConvertTextToLattice(strg, &trg); - if (src.size() == 0 || trg.size() == 0) { - cerr << "Error: " << lc << "\n" << line << endl; - assert(src.size() > 0); - assert(trg.size() > 0); - } - if (src.size() > unnormed_a_i.size()) - unnormed_a_i.resize(src.size()); - for (unsigned i = 0; i < trg.size(); ++i) { - assert(trg[i].size() == 1); - svocab_e.insert(trg[i][0].label); - } - } + { + set svocab_e, svocab_f; + CorpusTools::ReadFromFile(fname, &srcs, NULL, &trgs, &svocab_e, rank, size); copy(svocab_e.begin(), svocab_e.end(), back_inserter(vocab_e)); } - if (flag) cerr << endl; cerr << "Number of target word types: " << vocab_e.size() << endl; - const float num_examples = lc; + const double num_examples = lc; - LBFGSOptimizer lbfgs(kDIMENSIONS * kDIMENSIONS, 100); + boost::shared_ptr lbfgs; + if (rank == 0) + lbfgs.reset(new LBFGSOptimizer(kDIMENSIONS * kDIMENSIONS, 100)); r_trg.resize(TD::NumWords() + 1); r_src.resize(TD::NumWords() + 1); + vector > trg_pos(TD::NumWords() + 1); + if (conf.count("random_seed")) { srand(conf["random_seed"].as()); } else { - unsigned seed = time(NULL); + unsigned seed = time(NULL) + rank * 100; cerr << "Random seed: " << seed << endl; srand(seed); } - TMatrix t = TMatrix::Random() / 50.0; - for (unsigned i = 1; i < r_trg.size(); ++i) { - r_trg[i] = RVector::Random(); - r_src[i] = RVector::Random(); + + TMatrix t; + if (rank == 0) { + t = TMatrix::Random() / 50.0; + for (unsigned i = 1; i < r_trg.size(); ++i) { + r_trg[i] = RVector::Random(); + r_src[i] = RVector::Random(); + } + if (conf.count("source_embeddings")) + LoadEmbeddings(conf["source_embeddings"].as(), &r_src); + if (conf.count("target_embeddings")) + LoadEmbeddings(conf["target_embeddings"].as(), &r_trg); } - if (conf.count("source_embeddings")) - LoadEmbeddings(conf["source_embeddings"].as(), &r_src); - if (conf.count("target_embeddings")) - LoadEmbeddings(conf["target_embeddings"].as(), &r_trg); - vector > trg_pos(TD::NumWords() + 1); // do optimization TMatrix g = TMatrix::Zero(); @@ -243,22 +243,25 @@ int main(int argc, char** argv) { vector z_src; vector flat_g, flat_t; Flatten(t, &flat_t); - for (int iter = 0; iter < ITERATIONS; ++iter) { + bool converged = false; + // TODO broadcast embeddings + for (int iter = 0; !converged && iter < ITERATIONS; ++iter) { +#ifdef HAVE_MPI + mpi::broadcast(world, &flat_t[0], flat_t.size(), 0); +#endif + Unflatten(flat_t, &t); cerr << "ITERATION " << (iter + 1) << endl; - ReadFile rf(fname); - istream& in = *rf.stream(); double likelihood = 0; double denom = 0.0; lc = 0; flag = false; g *= 0; - while(getline(in, line)) { + for (unsigned i = 0; i < srcs.size(); ++i) { + const vector& src = srcs[i]; + const vector& trg = trgs[i]; ++lc; - if (lc % 1000 == 0) { cerr << '.'; flag = true; } - if (lc %50000 == 0) { cerr << " [" << lc << "]\n" << flush; flag = false; } - ParseTranslatorInput(line, &ssrc, &strg); - LatticeTools::ConvertTextToLattice(ssrc, &src); - LatticeTools::ConvertTextToLattice(strg, &trg); + if (rank == 0 && lc % 1000 == 0) { cerr << '.'; flag = true; } + if (rank == 0 && lc %50000 == 0) { cerr << " [" << lc << "]\n" << flush; flag = false; } denom += trg.size(); exp_src.clear(); exp_src.resize(src.size(), TMatrix::Zero()); @@ -266,10 +269,10 @@ int main(int argc, char** argv) { Array2D exp_refs(src.size(), trg.size(), TMatrix::Zero()); Array2D z_refs(src.size(), trg.size(), 0.0); for (unsigned j = 0; j < trg.size(); ++j) - trg_pos[trg[j][0].label].insert(j); + trg_pos[trg[j]].insert(j); for (unsigned i = 0; i < src.size(); ++i) { - const RVector& r_s = r_src[src[i][0].label]; + const RVector& r_s = r_src[src[i]]; const RTVector pred = r_s.transpose() * t; TMatrix& exp_m = exp_src[i]; double& z = z_src[i]; @@ -293,7 +296,7 @@ int main(int argc, char** argv) { } } for (unsigned j = 0; j < trg.size(); ++j) - trg_pos[trg[j][0].label].clear(); + trg_pos[trg[j]].clear(); // model expectations for a single target generation with // uniform alignment prior @@ -323,8 +326,8 @@ int main(int argc, char** argv) { // TODO handle alignment prob } if (ref_z <= 0) { - cerr << "TRG=" << TD::Convert(trg[j][0].label) << endl; - cerr << " LINE=" << line << endl; + cerr << "TRG=" << TD::Convert(trg[j]) << endl; + cerr << " LINE=" << lc << " (RANK=" << rank << "/" << size << ")" << endl; cerr << " REF_EXP=\n" << ref_exp << endl; cerr << " M_EXP=\n" << m_exp << endl; abort(); @@ -339,30 +342,42 @@ int main(int argc, char** argv) { } } - if (iter == (ITERATIONS - 1) || lc == 28) { cerr << al << endl; } + if (rank == 0 && (iter == (ITERATIONS - 1) || lc < 12)) { cerr << al << endl; } } - if (flag) { cerr << endl; } + if (flag && rank == 0) { cerr << endl; } - const double base2_likelihood = likelihood / log(2); - cerr << " log_e likelihood: " << likelihood << endl; - cerr << " log_2 likelihood: " << base2_likelihood << endl; - cerr << " cross entropy: " << (-base2_likelihood / denom) << endl; - cerr << " perplexity: " << pow(2.0, -base2_likelihood / denom) << endl; + double obj = 0; if (!SGD) { Flatten(g, &flat_g); - double obj = -likelihood; - if (has_l2) { - const double r = ApplyRegularization(reg_strength, - flat_t, - &flat_g); - obj += r; - cerr << " regularization: " << r << endl; + obj = -likelihood; + // TODO - reduce gradient + } + + if (rank == 0) { + double gn = 0; + for (unsigned i = 0; i < flat_g.size(); ++i) + gn += flat_g[i]*flat_g[i]; + const double base2_likelihood = likelihood / log(2); + cerr << " log_e likelihood: " << likelihood << endl; + cerr << " log_2 likelihood: " << base2_likelihood << endl; + cerr << " cross entropy: " << (-base2_likelihood / denom) << endl; + cerr << " perplexity: " << pow(2.0, -base2_likelihood / denom) << endl; + cerr << " gradient norm: " << sqrt(gn) << endl; + if (!SGD) { + if (has_l2) { + const double r = ApplyRegularization(reg_strength, + flat_t, + &flat_g); + obj += r; + cerr << " regularization: " << r << endl; + } + lbfgs->Optimize(obj, flat_g, &flat_t); + converged = (lbfgs->HasConverged()); } - lbfgs.Optimize(obj, flat_g, &flat_t); - Unflatten(flat_t, &t); - if (lbfgs.HasConverged()) break; } - cerr << t << endl; +#ifdef HAVE_MPI + mpi::broadcast(world, converged, 0); +#endif } cerr << "TRANSLATION MATRIX:" << endl << t << endl; return 0; diff --git a/utils/agenda.h b/utils/agenda.h deleted file mode 100644 index d4f13696..00000000 --- a/utils/agenda.h +++ /dev/null @@ -1,140 +0,0 @@ -#ifndef AGENDA_H -#define AGENDA_H - -#define DBG_AGENDA(x) x -/* - a priority queue where you expect to queue the same item at different - priorities several times before finally popping it. higher priority = better. - so in best first you'd be using negative cost or e^-cost (probabilities, in - other words). - - this means you have a way to look up a key and see its location in the queue, - so its priority can be adjusted (or, simpler implementation: so when you pop, - you see if you've already popped before at a lower cost, and skip the - subsequent pops). - - it's assumed that you'll never queue an item @ a better priority after it has - already been popped. that is, the agenda will track already completed items. - maybe in the future i will let you recompute a cheaper way to reach things - after first-pop also, it's assumed that we're always improving prios of - existing items, never making them worse (even though technically this is - possible and sensible if it hasn't been popped yet). - - simple binary max heap for now. there are better practical options w/ - superior cache locaility. movements in the heap need to update a record for - that key of where the key went. i do this by creating canonical key pointers - out of boost object pools (if the key were lightweight e.g. an int, then it - would make sense to use the hash lookup too - - since i'm doing key hashing to start with, i also allow you to attach some - arbitrary data (value) payload beyond key+priority. - - hash map from key to done (has been popped) -> set where doneness is marked in key item? - - a slightly different way to make an adjustable heap would be to use - tree-structured parent/children links intrusively (or mapped by key) in the - key, rather than indices in a compact binary-tree heap - - */ - -#include "best.h" -#include "intern_pool.h" -#include "d_ary_heap.h" -#include "lvalue_pmap.h" -#include -#include - -/* -template -struct priority_traits { - typedef typename P::priority_type priority_type; -}; -*/ - -typedef best_t agenda_best_t; -typedef unsigned agenda_location_t; - -PMAP_MEMBER_INDIRECT(LocationMap,agenda_location_t,location) -PMAP_MEMBER_INDIRECT(PriorityMap,agenda_best_t,priority) - -struct Less { - typedef bool result_type; - template - bool operator()(A const& a,B const& b) const { return a,class HashKey=boost::hash,class EqKey=std::equal_to, class Pool=boost::object_pool > -struct Agenda : intern_pool { - typedef intern_pool Intern; // inherited because I want to use construct() - /* this is less generic than it could be, because I want to use a single hash mapping to intern to canonical mutable object pointers, where the property maps are just lvalue accessors */ - typedef typename KeyF::result_type Key; - typedef Item * Handle; - typedef LocationMap LocMap; - typedef PriorityMap PrioMap; - LocMap locmap; - PrioMap priomap; // note: priomap[item] is set by caller before giving us the item; then tracks best (for canonicalized item) thereafter - - Better better; - //NOT NEEDED: initialize function object state (there is none) - - typedef Item *ItemC; //canonicalized pointer - typedef Item *ItemP; - static const std::size_t heap_arity=4; // might be fastest possible (depends on key size probably - cache locality is bad w/ arity=2) - typedef std::vector HeapStorage; - typedef d_ary_heap_indirect Heap; - Heap q; - - // please don't call q.push etc. directly. - void add(ItemP i) { - bool fresh=interneq(i); - DBG_AGENDA(assert(fresh && !q.contains(i))); - q.push(i); - } - bool improve(ItemP i) { - ItemP c=i; - bool fresh=interneq(c); - if (fresh) { - add(c); - return true; - } - DBG_AGENDA(assert(q.contains(c))); - return q.maybe_improve(priomap[i]); - } - inline bool empty() { - return q.empty(); - } - // no need to destroy the canon. item because we want to remember the best cost and reject more expensive ways of using it). - ItemC pop() { - ItemC r=q.top(); - q.pop(); - return r; - } - void pop_discard() { - q.pop(); - } - - ItemC top() { - DBG_AGENDA(assert(!empty())); - return q.top(); - } - - agenda_best_t best() const { - return q.best(); //TODO: cache/track the global best? - } - - agenda_best_t second_best() const { - return q.second_best(); - } - - // add only if worse than queue current best, otherwise evaluate immediately (e.g. for early stopping w/ expensive to compute additional cost). return true if postponed (added) - bool postpone(ItemP i) { - if (better(priomap[i],best())) return false; - return improve(i); - } - - Agenda(unsigned reserve=1000000,LocMap const& lm=LocMap(),PrioMap const& pm=PrioMap(),EqKey const& eq=EqKey(),Better const& better=Better()) : locmap(lm), priomap(pm), better(better), q(priomap,locmap,better,reserve) { } -}; - -#endif diff --git a/utils/best.h b/utils/best.h deleted file mode 100644 index ed15e0be..00000000 --- a/utils/best.h +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef UTILS__BEST_H -#define UTILS__BEST_H - -#include "max_plus.h" - -typedef MaxPlus best_t; - -inline bool better(best_t const& a,best_t const& b) { - return a.v_>b.v_; // intentionally reversed, so default min-heap, sort, etc. put best first. -} - -inline bool operator <(best_t const& a,best_t const& b) { - return a.v_>b.v_; // intentionally reversed, so default min-heap, sort, etc. put best first. -} -struct BetterP { - inline bool operator ()(best_t const& a,best_t const& b) const { - return a.v_>b.v_; // intentionally reversed, so default min-heap, sort, etc. put best first. - } -}; - -inline void maybe_improve(best_t &a,best_t const& b) { - if (a.v_>b.v_) - a.v_=b.v_; -} - -template -inline void maybe_improve(best_t &a,O const& b) { - if (a.v_>b.v_) - a.v_=b.v_; -} - -#endif diff --git a/utils/corpus_tools.cc b/utils/corpus_tools.cc new file mode 100644 index 00000000..a0542b6e --- /dev/null +++ b/utils/corpus_tools.cc @@ -0,0 +1,62 @@ +#include "corpus_tools.h" + +#include + +#include "tdict.h" +#include "filelib.h" +#include "verbose.h" + +using namespace std; + +void CorpusTools::ReadFromFile(const string& filename, + vector >* src, + set* src_vocab, + vector >* trg, + set* trg_vocab, + int rank, + int size) { + assert(rank >= 0); + assert(size > 0); + assert(rank < size); + if (src) src->clear(); + if (src_vocab) src_vocab->clear(); + if (trg) trg->clear(); + if (trg_vocab) trg_vocab->clear(); + const int expected_fields = 1 + (trg == NULL ? 0 : 1); + if (!SILENT) cerr << "Reading from " << filename << " ...\n"; + ReadFile rf(filename); + istream& in = *rf.stream(); + string line; + int lc = 0; + static const WordID kDIV = TD::Convert("|||"); + vector tmp; + while(getline(in, line)) { + const bool skip = (lc % size != rank); + ++lc; + if (skip) continue; + TD::ConvertSentence(line, &tmp); + src->push_back(vector()); + vector* d = &src->back(); + set* v = src_vocab; + int s = 0; + for (unsigned i = 0; i < tmp.size(); ++i) { + if (tmp[i] == kDIV) { + ++s; + if (s > 1) { cerr << "Unexpected format in line " << lc << ": " << line << endl; abort(); } + assert(trg); + trg->push_back(vector()); + d = &trg->back(); + v = trg_vocab; + } else { + d->push_back(tmp[i]); + if (v) v->insert(tmp[i]); + } + } + ++s; + if (expected_fields != s) { + cerr << "Wrong number of fields in line " << lc << ": " << line << endl; abort(); + } + } +} + + diff --git a/utils/corpus_tools.h b/utils/corpus_tools.h new file mode 100644 index 00000000..97bdaa94 --- /dev/null +++ b/utils/corpus_tools.h @@ -0,0 +1,19 @@ +#ifndef _CORPUS_TOOLS_H_ +#define _CORPUS_TOOLS_H_ + +#include +#include +#include +#include "wordid.h" + +struct CorpusTools { + static void ReadFromFile(const std::string& filename, + std::vector >* src, + std::set* src_vocab = NULL, + std::vector >* trg = NULL, + std::set* trg_vocab = NULL, + int rank = 0, + int size = 1); +}; + +#endif diff --git a/utils/d_ary_heap.h b/utils/d_ary_heap.h deleted file mode 100644 index 1270638a..00000000 --- a/utils/d_ary_heap.h +++ /dev/null @@ -1,568 +0,0 @@ -#ifndef D_ARY_HEAP_H -#define D_ARY_HEAP_H - -#include "show.h" -#define DDARY(x) - -#define D_ARY_PUSH_GRAEHL 0 // untested -#define D_ARY_POP_GRAEHL 0 // untested -#define D_ARY_DOWN_GRAEHL 0 // untested -#define D_ARY_UP_GRAEHL 0 // untested -#define D_ARY_APPEND_ALWAYS_PUSH 1 // heapify (0) is untested. otherwise switch between push and heapify depending on size (cache effects, existing items vs. # appended ones) - -#define D_ARY_TRACK_OUT_OF_HEAP 0 // shouldn't need to track, because in contains() false positives looking up stale or random loc map values are impossible - we just check key. note: if you enable this, you must init location to D_ARY_HEAP_NULL_INDEX yourself until it's been added or popped -#define D_ARY_VERIFY_HEAP 1 -// This is a very expensive test so it should be disabled even when NDEBUG is not defined - -# undef D_ARY_HEAP_NULL_INDEX -# define D_ARY_HEAP_NULL_INDEX (-1) // you may init location to this. - -/* adapted from boost/graph/detail/d_ary_heap.hpp - - local modifications: - - clear, heapify, append range/container, Size type template arg, reserve constructor arg - - hole+move rather than swap. note: swap would be more efficient for heavyweight keys, until move ctors exist - - don't set locmap to -1 when removing from heap (waste of time) - - // unlike arity=2 case, you don't gain anything by having indices start at 1, with 0-based child indices - // root @1, A=2, children indices m={0,1}: parent(i)=i/2, child(i,m)=2*i+m - // root @0: parent(i)=(i-1)/A child(i,n)=i*A+n+1 - can't improve on this except child(i,m)=i*A+m - (integer division, a/b=floor(a/b), so (i-1)/A = ceil(i/A)-1, or greatest int less than (i/A)) - - actually, no need to adjust child index, since child is called only once and inline - - e.g. for A=3 gorn address in tree -> index - - () = root -> 0 - (1) -> 1 - (2) -> 2 - (3) (A) -> 3 - (1,1) -> (1*A+1) = 4 - (1,2) -> (1*A+2) = 5 - (1,3) -> (1*A+3) = 6 - (2,1) -> (2*A+1) = 7 - etc. - -//TODO: block-align siblings! assume data[0] is 16 or 32-byte aligned ... then we want root @ index (blocksize-1). see http://www.lamarca.org/anthony/pubs/heaps.pdf pg8. for pow2(e.g. 4)-ary heap, it may be reasonable to use root @index A-1. however, suppose the key size is not padded to a power of 2 (e.g. 12 bytes), then we would need internal gaps at times. would want to use compile const template based inlineable alignment math for this? possibly use a container like vector that lets you specify padding relative to some address multiple for v[0]. - - optimal D: see http://www.lamarca.org/anthony/pubs/heaps.pdf pg 9. depedns on relative cost of swap,compare, but in all cases except swap=free, 2 is worse than 3-4. for expensive swap (3x compare), 4 still as good as 5. so just use 4. boost benchmarking djikstra agrees; 4 is best. - - cache-aligned 4-heap speedup over regular 2-heap is 10-80% (for huge heaps, the speedup is more) - - splay/skew heaps are worse than 2heap or aligned 4heap in practice. - - //TODO: switch from heapify (Floyd's method) to repeated push past some size limit (in bytes) due to cache effect - - #define D_ARY_BYTES_OUT_OF_CACHE 0x1000000 - - //TODO: assuming locmap is an lvalue pmap, we can be more efficient. on the other hand, if it's an intrusive property map to an interned mutable object, there's no difference in performance, and that's what i'm going to do in my first uses. plus, if keys are indices and the map is a vector, it's barely any overhead. - - */ - -// -//======================================================================= -// Copyright 2009 Trustees of Indiana University -// Authors: Jeremiah J. Willcock, Andrew Lumsdaine -// -// Distributed under the Boost Software License, Version 1.0. (See -// accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) -//======================================================================= -// - -#include -#include -#include -#include -#include -#include -#include -#include - - - // D-ary heap using an indirect compare operator (use identity_property_map - // as DistanceMap to get a direct compare operator). This heap appears to be - // commonly used for Dijkstra's algorithm for its good practical performance - // on some platforms; asymptotically, it's not optimal; it has an O(lg N) decrease-key - // operation, which is (amortized) constant time on a relaxed heap or fibonacci heap. The - // implementation is mostly based on the binary heap page on Wikipedia and - // online sources that state that the operations are the same for d-ary - // heaps. This code is not based on the old Boost d-ary heap code. - // - // - d_ary_heap_indirect is a model of UpdatableQueue as is needed for - // dijkstra_shortest_paths. - // - // - Value must model Assignable. - // - Arity must be at least 2 (optimal value appears to be 4, both in my and - // third-party experiments). - // - IndexInHeapMap must be a ReadWritePropertyMap from Value to - // Container::size_type (to store the index of each stored value within the - // heap for decrease-key aka update). - // - DistanceMap must be a ReadablePropertyMap from Value to something - // (typedef'ed as distance_type). - // - Compare must be a BinaryPredicate used as a less-than operator on - // distance_type. - // - Container must be a random-access, contiguous container (in practice, - // the operations used probably require that it is std::vector). - // - template , - typename Container = std::vector, - typename Size = typename Container::size_type, - typename Equal = std::equal_to > - class d_ary_heap_indirect { - BOOST_STATIC_ASSERT (Arity >= 2); - public: - typedef Container container_type; - typedef Size size_type; - typedef Value value_type; - typedef typename Container::const_iterator const_iterator; - typedef const_iterator iterator; - // The distances being compared using better and that are stored in the - // distance map - typedef typename boost::property_traits::value_type distance_type; - d_ary_heap_indirect(DistanceMap const& distance, - IndexInHeapPropertyMap const& index_in_heap, - const Better& better = Better(), - size_type container_reserve = 100000, - Equal const& equal = Equal() - ) - : better(better), data(), distance(distance), - index_in_heap(index_in_heap),equal(equal) { - data.reserve(container_reserve); - } - /* Implicit copy constructor */ - /* Implicit assignment operator */ - - template - void append_heapify(C const& c) { - data.reserve(data.size()+c.size()); - append_heapify(c.begin(),c.end()); - } - - template - void append_heapify(I begin,I end) { - data.insert(data.end(),begin,end); - heapify(); - } - - template - void append_push(C const& c) { - data.reserve(data.size()+c.size()); - append_push(c.begin(),c.end()); - } - - // past some threshold, this should be faster than append_heapify. also, if there are many existing elements it will be faster. - template - void append_push(I begin,I end) { - for (;begin!=end;++begin) - push(*begin); - } - - template - void append(C const& c) { - if (D_ARY_APPEND_ALWAYS_PUSH || data.size()>=c.size()/2) - append_push(c); - else - append_heapify(c); - } - - // past some threshold, this should be faster than append_heapify. also, if there are many existing elements it will be faster. - template - void append(I begin,I end) { - if (D_ARY_APPEND_ALWAYS_PUSH || data.size()>=0x10000) - append_push(begin,end); - else - append_heapify(begin,end); - } - - // could allow mutation of data directly, e.g. push_back 1 at a time - but then they could forget to heapify() - - //from bottom of heap tree up, turn that subtree into a heap by adjusting the root down - // for n=size, array elements indexed by floor(n/2) + 1, floor(n/2) + 2, ... , n are all leaves for the tree, thus each is an one-element heap already - // warning: this is many fewer instructions but, at some point (when heap doesn't fit in Lx cache) it will become slower than repeated push(). - void heapify() { - for (size_type i=parent(data.size()-1);i>0;--i) // starting from parent of last node, ending at first child of root (i==1) - preserve_heap_property_down(i); - } - - void reserve(size_type s) { - data.reserve(s); - } - - size_type size() const { - return data.size(); - } - - bool empty() const { - return data.empty(); - } - - const_iterator begin() const { - return data.begin(); - } - - const_iterator end() const { - return data.end(); - } - - void clear() { -#if D_ARY_TRACK_OUT_OF_HEAP - using boost::put; - for (typename Container::iterator i=data.begin(),e=data.end();i!=e;++i) - put(index_in_heap,*i,(size_type)D_ARY_HEAP_NULL_INDEX); -#endif - data.clear(); - } - - void push(const Value& v) { - if (D_ARY_PUSH_GRAEHL) { - size_type i = data.size(); - data.push_back(Value()); // (hoping default construct is cheap, construct-copy inline) - preserve_heap_property_up(v,i); // we don't have to recopy v, or init index_in_heap - } else { - size_type index = data.size(); - data.push_back(v); - using boost::put; - put(index_in_heap, v, index); - preserve_heap_property_up(index); - } - verify_heap(); - } - - Value& top() { - return data[0]; - } - - const Value& top() const { - return data[0]; - } - - void pop() { - using boost::put; - if(D_ARY_TRACK_OUT_OF_HEAP) - put(index_in_heap, data[0], (size_type)D_ARY_HEAP_NULL_INDEX); - if (data.size() != 1) { - if (D_ARY_POP_GRAEHL) { - preserve_heap_property_down(data.back(),0,data.size()-1); - data.pop_back(); - } else { - data[0] = data.back(); - put(index_in_heap, data[0], 0); - data.pop_back(); - preserve_heap_property_down(); - } - verify_heap(); - } else { - data.pop_back(); - } - } - - // This function assumes the key has been improved - // (distance has become smaller, so it may need to rise toward top(). - // i.e. decrease-key in a min-heap - void update(const Value& v) { - using boost::get; - size_type index = get(index_in_heap, v); - preserve_heap_property_up(v,index); - verify_heap(); - } - - // return true if improved. - bool maybe_improve(const Value& v,distance_type dbetter) { - using boost::get; - if (better(dbetter,get(distance,v))) { - preserve_heap_property_up_dist(v,dbetter); - return true; - } - return false; - } - - distance_type best(distance_type null=0) const { - return empty() ? null : get(distance,data[0]); - } - distance_type second_best(distance_type null=0) const { - if (data.size()<2) return null; - int m=std::min(data.size(),Arity+1); -// if (m>=Arity) m=Arity+1; - distance_type b=get(distance,data[1]); - for (int i=2;i=0 && i=0 check to catch uninit. data - } -#include "warning_pop.h" - - inline bool contains(const Value& v) const { - using boost::get; - return contains(v,get(index_in_heap, v)); - } - - void push_or_update(const Value& v) { /* insert if not present, else update */ - using boost::get; - size_type index = get(index_in_heap, v); - if (D_ARY_PUSH_GRAEHL) { - if (contains(v,index)) - preserve_heap_property_up(v,index); - else - push(v); - } else { - if (!contains(v,index)) { - index = data.size(); - data.push_back(v); - using boost::put; - put(index_in_heap, v, index); - } - preserve_heap_property_up(index); - } - verify_heap(); - } - - private: - Better better; - Container data; - DistanceMap distance; - IndexInHeapPropertyMap index_in_heap; - Equal equal; - - // Get the parent of a given node in the heap - static inline size_type parent(size_type index) { - return (index - 1) / Arity; - } - - // Get the child_idx'th child of a given node; 0 <= child_idx < Arity - static inline size_type child(size_type index, std::size_t child_idx) { - return index * Arity + child_idx + 1; - } - - // Swap two elements in the heap by index, updating index_in_heap - inline void swap_heap_elements(size_type index_a, size_type index_b) { - using std::swap; - Value value_a = data[index_a]; - Value value_b = data[index_b]; - data[index_a] = value_b; - data[index_b] = value_a; - using boost::put; - put(index_in_heap, value_a, index_b); - put(index_in_heap, value_b, index_a); - } - - inline void move_heap_element(Value const& v,size_type ito) { - using boost::put; - put(index_in_heap,v,ito); - data[ito]=v; //todo: move assign? - } - - // Verify that the array forms a heap; commented out by default - void verify_heap() const { - // This is a very expensive test so it should be disabled even when - // NDEBUG is not defined -#if D_ARY_VERIFY_HEAP - using boost::get; - for (size_t i = 1; i < data.size(); ++i) { - if (better(get(distance,data[i]), get(distance,data[parent(i)]))) { - assert (!"Element is smaller than its parent"); - } - } -#endif - } - - // we have a copy of the key, so we don't need to do that stupid find # of levels to move then move. we act as though data[index]=currently_being_moved, but in fact it's an uninitialized "hole", which we fill at the very end - inline void preserve_heap_property_up(Value const& currently_being_moved,size_type index) { - using boost::get; - preserve_heap_property_up(currently_being_moved,index,get(distance,currently_being_moved)); - } - - inline void preserve_heap_property_up_set_dist(Value const& currently_being_moved,distance_type dbetter) { - using boost::get; - using boost::put; - put(distance,currently_being_moved,dbetter); - preserve_heap_property_up(currently_being_moved,get(index_in_heap,currently_being_moved),dbetter); - verify_heap(); - } - - void preserve_heap_property_up(Value const& currently_being_moved,size_type index,distance_type currently_being_moved_dist) { - using boost::put; - using boost::get; - if (D_ARY_UP_GRAEHL) { - for (;;) { - if (index == 0) break; // Stop at root - size_type parent_index = parent(index); - Value const& parent_value = data[parent_index]; - if (better(currently_being_moved_dist, get(distance, parent_value))) { - move_heap_element(parent_value,index); - index = parent_index; - } else { - break; // Heap property satisfied - } - } - //finish "swap chain" by filling hole w/ currently_being_moved - move_heap_element(currently_being_moved,index); // note: it's ok not to return early on index==0 at start, even if self-assignment isn't supported by Value - because currently_being_moved is a copy. - } else { - put(index_in_heap,currently_being_moved,index); - put(distance,currently_being_moved,currently_being_moved_dist); - preserve_heap_property_up(index); - } - } - - // Starting at a node, move up the tree swapping elements to preserve the - // heap property. doesn't actually use swap; uses hole - void preserve_heap_property_up(size_type index) { - using boost::get; - if (index == 0) return; // Do nothing on root - if (D_ARY_UP_GRAEHL) { - Value copyi=data[index]; - preserve_heap_property_up(copyi,index); - return; - } - size_type orig_index = index; - size_type num_levels_moved = 0; - // The first loop just saves swaps that need to be done in order to avoid - // aliasing issues in its search; there is a second loop that does the - // necessary swap operations - Value currently_being_moved = data[index]; - distance_type currently_being_moved_dist = - get(distance, currently_being_moved); - for (;;) { - if (index == 0) break; // Stop at root - size_type parent_index = parent(index); - Value parent_value = data[parent_index]; - if (better(currently_being_moved_dist, get(distance, parent_value))) { - ++num_levels_moved; - index = parent_index; - continue; - } else { - break; // Heap property satisfied - } - } - // Actually do the moves -- move num_levels_moved elements down in the - // tree, then put currently_being_moved at the top - index = orig_index; - using boost::put; - for (size_type i = 0; i < num_levels_moved; ++i) { - size_type parent_index = parent(index); - Value parent_value = data[parent_index]; - put(index_in_heap, parent_value, index); - data[index] = parent_value; - index = parent_index; - } - data[index] = currently_being_moved; - put(index_in_heap, currently_being_moved, index); - verify_heap(); - } - - - // From the root, swap elements (each one with its smallest child) if there - // are any parent-child pairs that violate the heap property. v is placed at data[i], but then pushed down (note: data[i] won't be read explicitly; it will instead be overwritten by percolation). this also means that v must be a copy of data[i] if it was already at i. - // e.g. v=data.back(), i=0, sz=data.size()-1 for pop(), implicitly swapping data[i], data.back(), and doing data.pop_back(), then adjusting from 0 down w/ swaps. updates index_in_heap for v. - inline void preserve_heap_property_down(Value const& currently_being_moved,size_type i,size_type heap_size) { - using boost::get; - distance_type currently_being_moved_dist=get(distance,currently_being_moved); - Value* data_ptr = &data[0]; - size_type index = 0; // hole at index - currently_being_moved to be put here when we find the final hole spot - for (;;) { - size_type first_child_index = child(index, 0); - if (first_child_index >= heap_size) break; /* No children */ - Value* child_base_ptr = data_ptr + first_child_index; // using index of first_child_index+smallest_child_index because we hope optimizer will be smart enough to const-unroll a loop below if we do this. i think the optimizer would have gotten it even without our help (i.e. store root-relative index) - - // begin find best child index/distance - size_type smallest_child_index = 0; // don't add to base first_child_index every time we update which is smallest. - distance_type smallest_child_dist = get(distance, child_base_ptr[smallest_child_index]); -#undef D_ARY_MAYBE_IMPROVE_CHILD_I -#define D_ARY_MAYBE_IMPROVE_CHILD_I \ - distance_type i_dist = get(distance, child_base_ptr[i]); \ - if (better(i_dist, smallest_child_dist)) { \ - smallest_child_index = i; \ - smallest_child_dist = i_dist; \ - } - if (first_child_index + Arity <= heap_size) { - // avoid repeated heap_size boundcheck (should test if this is really a speedup - instruction cache tradeoff - could use upperbound = min(Arity,heap_size-first_child_index) instead. but this optimizes to a fixed number of iterations (compile time known) so probably worth it - for (size_t i = 1; i < Arity; ++i) { - D_ARY_MAYBE_IMPROVE_CHILD_I - } - } else { - for (size_t i = 1,e=heap_size - first_child_index; i < e; ++i) { - D_ARY_MAYBE_IMPROVE_CHILD_I - } - } - //end: know best child - - if (better(smallest_child_dist, currently_being_moved_dist)) { - // instead of swapping, move. - move_heap_element(child_base_ptr[smallest_child_index],index); // move up - index=first_child_index+smallest_child_index; // descend - hole is now here - } else { - move_heap_element(currently_being_moved,index); // finish "swap chain" by filling hole - break; - } - } - verify_heap(); - } - - inline void preserve_heap_property_down(size_type i) { - preserve_heap_property_down(data[i],i,data.size()); - } - - void preserve_heap_property_down() { - using boost::get; - if (data.empty()) return; - if (D_ARY_DOWN_GRAEHL) { // this *should* be more efficient because i avoid swaps. - Value copy0=data[0]; - preserve_heap_property_down(copy0,0,data.size()); - return; - } - size_type index = 0; - Value currently_being_moved = data[0]; - distance_type currently_being_moved_dist = - get(distance, currently_being_moved); - size_type heap_size = data.size(); - Value* data_ptr = &data[0]; - for (;;) { - size_type first_child_index = child(index, 0); - if (first_child_index >= heap_size) break; /* No children */ - Value* child_base_ptr = data_ptr + first_child_index; - size_type smallest_child_index = 0; - distance_type smallest_child_dist = get(distance, child_base_ptr[smallest_child_index]); - if (first_child_index + Arity <= heap_size) { - for (size_t i = 1; i < Arity; ++i) { // can be unrolled completely. - - D_ARY_MAYBE_IMPROVE_CHILD_I - } - } else { - for (size_t i = 1,e=heap_size - first_child_index; i < e; ++i) { - D_ARY_MAYBE_IMPROVE_CHILD_I - } - } - if (better(smallest_child_dist, currently_being_moved_dist)) { - swap_heap_elements(smallest_child_index + first_child_index, index); - index = smallest_child_index + first_child_index; - continue; - } else { - break; // Heap property satisfied - } - } - verify_heap(); - } - - }; - -#endif diff --git a/utils/ftoa.h b/utils/ftoa.h deleted file mode 100644 index 3dba528d..00000000 --- a/utils/ftoa.h +++ /dev/null @@ -1,403 +0,0 @@ -#ifndef FTOA_H -#define FTOA_H - - -//TODO: for fractional digits/non-sci, determine the right amount of left padding (more if the whole number is indeed <1, to keep the significant digits), less if sci notation and/or mantissa has sig. digits (don't want N before . and N after!) - -#ifndef FTOA_ROUNDTRIP -# define FTOA_ROUNDTRIP 1 -#endif - -#ifndef FTOA_DEBUG -# define FTOA_DEBUG 0 -#endif - -#ifndef FTOA_USE_SPRINTF -#define FTOA_USE_SPRINTF 0 -#endif - -#if FTOA_DEBUG -# define FTOAassert(x) assert(x) -# define DBFTOA(x) std::cerr<<"\nFTOA " <<__func__<<"("<<__LINE__<<"): " #x "="< -#include -#include -#include -#include -#include -#include "utoa.h" -#include "nan.h" - -template -struct ftoa_traits { -}; - -//eP10, -// sigd decimal places normally printed, roundtripd needed so that round-trip float->string->float is identity - -#define DEFINE_FTOA_TRAITS(FLOATT,INTT,sigd,roundtripd,small,large,used,P10) \ -template <> \ -struct ftoa_traits { \ - typedef INTT int_t; \ - typedef u ## INTT uint_t; \ - typedef FLOATT float_t; \ - enum { digits10=std::numeric_limits::digits10, chars_block=P10, usedig=used, sigdig=sigd, roundtripdig=roundtripd, bufsize=roundtripdig+7 }; \ - static const double pow10_block = 1e ## P10; \ - static const float_t small_f = small; \ - static const float_t large_f = large; \ - static inline int sprintf(char *buf,double f) { return std::sprintf(buf,"%." #used "g",f); } \ - static inline int sprintf_sci(char *buf,double f) { return std::sprintf(buf,"%." #used "e",f); } \ - static inline int sprintf_nonsci(char *buf,double f) { return std::sprintf(buf,"%." #used "f",f); } \ - static inline uint_t fracblock(double frac) { FTOAassert(frac>=0 && frac<1); double f=frac*pow10_block;uint_t i=(uint_t)f;FTOAassert(i=0 && frac<1); double f=frac*pow10_block;uint_t i=(uint_t)(f+.5);FTOAassert(ilarge; } \ - static inline bool use_sci(float_t f) { return use_sci_abs(std::fabs(f)); } \ -}; -//TODO: decide on computations in double (would hurt long double) or in native float type - any advantage? more precision is usually better. - -//10^22 = 0x1.0f0cf064dd592p73 is the largest exactly representable power of 10 in the binary64 format. but round down to 18 so int64_t can hold it. - -#if FTOA_ROUNDTRIP -#define DEFINE_FTOA_TRAITS_ROUNDTRIP(FLOATT,INTT,sigd,roundtripd,small,large) DEFINE_FTOA_TRAITS(FLOATT,INTT,sigd,roundtripd,small,large,roundtripd,roundtripd) -#else -#define DEFINE_FTOA_TRAITS_ROUNDTRIP(FLOATT,INTT,sigd,roundtripd,small,large) DEFINE_FTOA_TRAITS(FLOATT,INTT,sigd,roundtripd,small,large,sigd,sigd) -#endif - -DEFINE_FTOA_TRAITS_ROUNDTRIP(double,int64_t,15,17,1e-5,1e8) -//i've heard that 1e10 is fine for float. but we only have 1e9 (9 decimal places) in int32. -DEFINE_FTOA_TRAITS_ROUNDTRIP(float,int32_t,6,9,1e-3,1e8) - - -template -inline void ftoa_error(F f,char const* msg="") { - using namespace std; - cerr<<"ftoa error: "< -char *prepend_pos_frac_digits(char *p,F f) { - FTOAassert(f<1 && f >0); - typedef ftoa_traits FT; - //repeat if very small??? nah, require sci notation to take care of it. - typename FT::uint_t i=FT::rounded_fracblock(f); - DBFTOA2(f,i); - if (i>0) { - unsigned n_skipped; - char *d=utoa_drop_trailing_0(p,i,n_skipped); - char *b=p-FT::chars_block+n_skipped; - FTOAassert(b<=d); - left_pad(b,d,'0'); - return b; - } else { - return p; - } -} - -template -char *append_pos_frac_digits(char *p,F f) { // '0' right-padded, nul terminated, return position of nul. [p,ret) are the digits - if (f==0) { - *p++='0'; - return p; - } - FTOAassert(f<1 && f >0); - typedef ftoa_traits FT; - //repeat if very small??? nah, require sci notation to take care of it. - typename FT::uint_t i=FT::rounded_fracblock(f); - DBFTOA2(f,i); - if (i>0) { - char *e=p+FT::chars_block; - utoa_left_pad(p,e,i,'0'); - *e=0; - return e; - } else { - *p=0; - return p; - } -} - -template -inline char *prepend_pos_frac(char *p,F f) { - FTOAassert(f<1 && f>=0); - if (f==0) { - *--p='0'; - return p; - } - p=prepend_pos_frac_digits(p,f); - *--p='.'; - if (DECIMAL_FOR_WHOLE>0) - *--p='0'; - return p; -} - -template -inline char *append_pos_frac(char *p,F f) { - DBFTOA(f); - if (DECIMAL_FOR_WHOLE>0) - *p++='0'; - *p++='.'; - return append_pos_frac_digits(p,f); -} - -template -inline char *prepend_frac(char *p,F f,bool positive_sign=false) { - FTOAassert(f<1 && f>-1); - if (f==0) - *--p='0'; - else if (f<0) { - p=prepend_pos_frac(p,-f); - *--p='-'; - } else { - p=prepend_pos_frac(p,f); - if (positive_sign) - *--p='+'; - } - return p; -} - - -template -inline char *append_sign(char *p,F f,bool positive_sign=false) { - if (f<0) { - *p++='-'; - } else if (positive_sign) - *p++='+'; - return p; -} - -template -inline char *append_frac(char *p,F f,bool positive_sign=false) { - FTOAassert(f<1 && f>-1); - if (f==0) { - *p++='0'; - return p; - } else if (f<0) { - *p++='-'; - return append_pos_frac(p,-f); - } - if (positive_sign) { - *p++='+'; - return append_pos_frac(p,f); - } - -} - - -//append_frac, append_pos_sci, append_sci. notice these are all composed according to a pattern (but reversing order of composition in pre vs app). or can implement with copy through buffer - -/* will switch to sci notation if integer part is too big for the int type. but for very small values, will simply display 0 (i.e. //TODO: find out log10 and leftpad 0s then convert rest) */ -template -char *prepend_pos_nonsci(char *p,F f) { - typedef ftoa_traits FT; - typedef typename FT::uint_t uint_t; - DBFTOA(f); - FTOAassert(f>0); - if (f>std::numeric_limits::max()) - return prepend_pos_sci(p,f); - //which is faster - modf is weird and returns negative frac part if f is negative. while we could deal with this using fabs, we instead only handle positive here (put - sign in front and negate, then call us) - ? -#if 0 - F intpart; - F frac=std::modf(f,&intpart); - uint_t u=intpart; -#else - uint_t u=f; - F frac=f-u; -#endif - DBFTOA2(u,frac); - if (frac == 0) { - if (DECIMAL_FOR_WHOLE>1) - *--p='.'; - } else { - p=prepend_pos_frac_digits(p,frac); - *--p='.'; - } - if (u==0) { - if (DECIMAL_FOR_WHOLE>0) - *--p='0'; - } else - p=utoa(p,u); - return p; -} - -// modify p; return true if handled -template -inline bool prepend_0_etc(char *&p,F f,bool positive_sign=false) { - if (f==0) { - *--p='0'; - return true; - } - if (is_nan(f)) { - p-=3; - p[0]='N';p[1]='A';p[2]='N'; - return true; - } - if (is_pos_inf(f)) { - p-=3; - p[0]='I';p[1]='N';p[2]='F'; - if (positive_sign) - *--p='+'; - return true; - } - if (is_neg_inf(f)) { - p-=4; - p[0]='-';p[1]='I';p[2]='N';p[3]='F'; - return true; - } - return false; -} - -template -inline char *prepend_nonsci(char *p,F f,bool positive_sign=false) { - if (prepend_0_etc(p,f,positive_sign)) return p; - if (f<0) { - p=prepend_pos_nonsci(p,-f); - *--p='-'; - } else { - p=prepend_pos_nonsci(p,f); - if (positive_sign) - *--p='+'; - } - return p; -} - -template -inline char *prepend_pos_sci(char *p,F f,bool positive_sign_exp=false) { - FTOAassert(f>0); - typedef ftoa_traits FT; - int e10; - F mant=FT::mantexp10(f,e10); - DBFTOA(f); - DBFTOA2(mant,e10); - FTOAassert(mant<10.00001); - if (mant>=10.) { - ++e10; - mant*=.1; - } else if (mant < 1.) { - --e10; - mant*=10; - } - p=itoa(p,e10,positive_sign_exp); - *--p='e'; - return prepend_pos_nonsci(p,mant); -} - -template -inline char *prepend_sci(char *p,F f,bool positive_sign_mant=false,bool positive_sign_exp=false) { - if (prepend_0_etc(p,f,positive_sign_mant)) return p; - if (f==0) - *--p='0'; - else if (f<0) { - p=prepend_pos_sci(p,-f,positive_sign_exp); - *--p='-'; - } else { - p=prepend_pos_sci(p,f,positive_sign_exp); - if (positive_sign_mant) - *--p='+'; - } - return p; -} - -template -inline char *append_nonsci(char *p,F f,bool positive_sign=false) { - if (positive_sign&&f>=0) *p++='+'; - return p+ftoa_traits::sprintf_nonsci(p,f); -} - -template -inline char *append_sci(char *p,F f,bool positive_sign=false) { - if (positive_sign&&f>=0) *p++='+'; - return p+ftoa_traits::sprintf_sci(p,f); -} - -template -inline char *append_ftoa(char *p,F f,bool positive_sign=false) { - if (positive_sign&&f>=0) *p++='+'; - return p+ftoa_traits::sprintf(p,f); -} - -template -inline char *prepend_ftoa(char *p,F f) -{ - typedef ftoa_traits FT; - return FT::use_sci(f) ? prepend_sci(p,f) : prepend_nonsci(p,f); -} - -template -inline std::string ftos_append(F f) { - typedef ftoa_traits FT; - char buf[FT::bufsize]; - return std::string(buf,append_ftoa(buf,f)); -} - -template -inline std::string ftos_prepend(F f) { - typedef ftoa_traits FT; - char buf[FT::bufsize]; - char *end=buf+FT::bufsize; - return std::string(prepend_ftoa(end,f),end); -} - - -template -inline std::string ftos(F f) { -#if 0 - // trust RVO? no extra copies? - return FTOA_USE_SPRINTF ? ftos_append(f) : ftos_prepend(f); -#else - typedef ftoa_traits FT; - char buf[FT::bufsize]; - if (FTOA_USE_SPRINTF) { - return std::string(buf,append_ftoa(buf,f)); - } else { - char *end=buf+FT::bufsize; - return std::string(prepend_ftoa(end,f),end); - } -#endif -} - -namespace { - const int ftoa_bufsize=30; - char ftoa_outbuf[ftoa_bufsize]; -} - -// not even THREADLOCAL - don't use. -inline char *static_ftoa(float f) -{ - if (FTOA_USE_SPRINTF) { - append_ftoa(ftoa_outbuf,f); - return ftoa_outbuf; - } else { - char *end=ftoa_outbuf+ftoa_bufsize; - return prepend_ftoa(end,f); - } -} - - -#endif diff --git a/utils/int_or_pointer.h b/utils/int_or_pointer.h deleted file mode 100644 index 4b6a9e4a..00000000 --- a/utils/int_or_pointer.h +++ /dev/null @@ -1,70 +0,0 @@ -#ifndef INT_OR_POINTER_H -#define INT_OR_POINTER_H - -// if you ever wanted to store a discriminated union of pointer/integer without an extra boolean flag, this will do it, assuming your pointers are never odd. - -// check lsb for expected tag? -#ifndef IOP_CHECK_LSB -# define IOP_CHECK_LSB 1 -#endif -#if IOP_CHECK_LSB -# define iop_assert(x) assert(x) -#else -# define iop_assert(x) -#endif - -#include -#include - -template -struct IntOrPointer { - typedef Pointed pointed_type; - typedef Int integer_type; - typedef Pointed *value_type; - typedef IntOrPointer self_type; - IntOrPointer(int j) { *this=j; } - IntOrPointer(size_t j) { *this=j; } - IntOrPointer(value_type v) { *this=v; } - bool is_integer() const { return i&1; } - bool is_pointer() const { return !(i&1); } - value_type & pointer() { return p; } - const value_type & pointer() const { iop_assert(is_pointer()); return p; } - integer_type integer() const { iop_assert(is_integer()); return i >> 1; } - void set_integer(Int j) { i=2*j+1; } - void set_pointer(value_type p_) { p=p_;iop_assert(is_pointer()); } - void operator=(unsigned j) { i = 2*(integer_type)j+1; } - void operator=(int j) { i = 2*(integer_type)j+1; } - template - void operator=(C j) { i = 2*(integer_type)j+1; } - void operator=(value_type v) { p=v; } - IntOrPointer() {} - IntOrPointer(const self_type &s) : p(s.p) {} - void operator=(const self_type &s) { p=s.p; } - template - bool operator ==(C* v) const { return p==v; } - template - bool operator ==(const C* v) const { return p==v; } - template - bool operator ==(C j) const { return integer() == j; } - bool operator ==(self_type s) const { return p==s.p; } - bool operator !=(self_type s) const { return p!=s.p; } - template void print(O&o) const - { - if (is_integer()) - o << integer(); - else { - o << "0x" << std::hex << (size_t)pointer() << std::dec; - } - } - friend inline std::ostream& operator<<(std::ostream &o,self_type const& s) { - s.print(o); return o; - } -protected: - union { - value_type p; // must be even (guaranteed unless you're pointing at packed chars) - integer_type i; // stored as 2*data+1, so only has half the range (one less bit) of a normal integer_type - }; -}; - - -#endif diff --git a/utils/intern_pool.h b/utils/intern_pool.h deleted file mode 100644 index 7c739add..00000000 --- a/utils/intern_pool.h +++ /dev/null @@ -1,158 +0,0 @@ -#ifndef INTERN_POOL_H -#define INTERN_POOL_H - -#define DEBUG_INTERN_POOL(x) x - -/* to "intern" a string in lisp is to make a symbol from it (a pointer to a canonical copy whose pointer can be equality-compared/hashed directly with other interned things). we take an Item that has a key part and some mutable parts (that aren't in its identity), and we hash-by-value the key part to map to a canonical on-heap Item - and we use a boost object pool to allocate them */ - -//FIXME: actually store function object state (assumed stateless so far) - -#include -#include "hash.h" -//#include "null_traits.h" -#include - -template -struct get_key { // default accessor for I = like pair - typedef typename I::first_type const& result_type; - typedef I const& argument_type; - result_type operator()(I const& i) const { - return i.first; - } -}; - -// Arg type should be the non-pointer version. this saves me from using boost type traits to remove_pointer. f may be binary or unary -template -struct compose_indirect { - typedef Arg *argument_type; // we also accept Arg & - KeyF kf; - F f; - typedef typename F::result_type result_type; - result_type operator()(Arg const& p) const { - return f(kf(p)); - } - result_type operator()(Arg & p) const { - return f(kf(p)); - } - result_type operator()(Arg * p) const { - return f(kf(*p)); - } - template - result_type operator()(V const& v) const { - return f(kf(*v)); - } - - result_type operator()(Arg const& a1,Arg const& a2) const { - return f(kf(a1),kf(a2)); - } - result_type operator()(Arg & a1,Arg & a2) const { - return f(kf(a1),kf(a2)); - } - result_type operator()(Arg * a1,Arg * a2) const { - return f(kf(*a1),kf(*a2)); - } - template - result_type operator()(V const& v,W const&w) const { - return f(kf(*v),kf(*w)); - } - - -}; - -template -struct equal_indirect { - typedef Arg *argument_type; // we also accept Arg & - KeyF kf; - F f; - typedef bool result_type; - - result_type operator()(Arg const& a1,Arg const& a2) const { - return f(kf(a1),kf(a2)); - } - result_type operator()(Arg & a1,Arg & a2) const { - return f(kf(a1),kf(a2)); - } - result_type operator()(Arg * a1,Arg * a2) const { - return a1==a2||(a1&&a2&&f(kf(*a1),kf(*a2))); - } - template - result_type operator()(V const& v,W const&w) const { - return v==w||(v&&w&&f(kf(*v),kf(*w))); - } - - -}; - -/* - -template -struct indirect_function { - F f; - explicit indirect_function(F const& f=F()) : f(f) {} - typedef typename F::result_type result_type; - template - result_type operator()(V *p) const { - return f(*p); - } -}; -*/ - -template ,class HashKey=boost::hash,class EqKey=std::equal_to, class Pool=boost::object_pool > -struct intern_pool : Pool { - KeyF key; - typedef typename KeyF::result_type Key; - typedef Item *Handle; - typedef compose_indirect HashDeep; - typedef equal_indirect EqDeep; - typedef HASH_SET Canonical; - typedef typename Canonical::iterator CFind; - typedef std::pair CInsert; - Canonical canonical; - bool interneq(Handle &i) { // returns true if i is newly interned, false if it already existed - CInsert i_new=canonical.insert(i); - i=*i_new.first; - return i_new.second; - } -// inherited: Handle construct(...) - Handle construct_fresh() { return Pool::construct(); } - Handle intern(Handle i) { // (maybe invalidating i, returning a valid canonical handle (pointer) - CInsert i_new=canonical.insert(i); - if (i_new.second) - return i; - else { - free(i); - return *i_new->first; - } - } - void destroy_interned(Handle i) { - DEBUG_INTERN_POOL(assert(canonical.find(i)!=canonical.end())); - canonical.erase(i); - destroy(i); - } - bool destroy_fresh(Handle i) { - DEBUG_INTERN_POOL(assert(canonical.find(i)!=canonical.end()||*canonical.find(i)!=i)); // i is a constructed item not yet interned. - destroy(i); - } - void destroy_both(Handle i) { // i must have come from this pool. may be interned, or not. destroy both the noninterned and interned. - if (!destroy_if_interned(i)) destroy(i); - } - // destroy intern(i) if it exists. return true if it existed AND its address was i. otherwise return false (whether or not a value-equal item existed and was destroyed) - bool destroy_if_interned(Handle i) { - CFind f=canonical.find(i); - if (f!=canonical.end()) { - Handle interned=*f; - canonical.erase(f); - destroy(f); - if (f==i) return true; - } - return false; - } - - intern_pool() { - HASH_MAP_EMPTY(canonical,(Handle)0); - } -}; - - - -#endif diff --git a/utils/lvalue_pmap.h b/utils/lvalue_pmap.h deleted file mode 100644 index 5b9403c0..00000000 --- a/utils/lvalue_pmap.h +++ /dev/null @@ -1,31 +0,0 @@ -#ifndef LVALUE_PMAP_H -#define LVALUE_PMAP_H - -#include - -// i checked: boost provides get and put given [] - but it's not being found by ADL so instead i define them myself - -// lvalue property map pmapname

that is: P p; valtype &v=p->name; -#define PMAP_MEMBER_INDIRECT(pmapname,valtype,name) template struct pmapname { \ - typedef P key_type; \ - typedef valtype value_type; \ - typedef value_type & reference; \ - typedef boost::lvalue_property_map_tag category; \ - reference operator[](key_type p) const { return p->name; } \ - typedef pmapname

self_type; \ - friend inline value_type const& get(self_type const&,key_type p) { return p->name; } \ - friend inline void put(self_type &,key_type p,value_type const& v) { p->name = v; } \ -}; - -#define PMAP_MEMBER_INDIRECT_2(pmapname,name) template struct pmapname { \ - typedef P key_type; \ - typedef R value_type; \ - typedef value_type & reference; \ - typedef boost::lvalue_property_map_tag category; \ - reference operator[](key_type p) const { return p->name; } \ - typedef pmapname self_type; \ - friend inline value_type const& get(self_type const&,key_type p) { return p->name; } \ - friend inline void put(self_type &,key_type p,value_type const& v) { p->name = v; } \ -}; - -#endif diff --git a/utils/max_plus.h b/utils/max_plus.h deleted file mode 100644 index 2e56f85e..00000000 --- a/utils/max_plus.h +++ /dev/null @@ -1,201 +0,0 @@ -#ifndef MAX_PLUS_H_ -#define MAX_PLUS_H_ - -#define MAX_PLUS_ORDER 0 -#define MAX_PLUS_DEBUG(x) - -// max-plus algebra. ordering a > b really means that (i.e. default a > around -// x+y := max{x,y} -// x*y := x+y -// 0 := -inf -// 1 := 0 -// additive inverse does not, but mult. does. (inverse()) and x/y := x-y = x+y.inverse() -//WARNING: default order is reversed, on purpose, i.e. alog(p_b). sorry. defaults in libs are to order ascending, but we want best first. - -#include -#include -#include -#include -#include -#include -#include "semiring.h" -#include "show.h" -//#include "logval.h" - -template -class MaxPlus { - public: - void print(std::ostream &o) const { - o<) - template - void operator=(O const& o) { - v_=o.v_; - } - template - MaxPlus(O const& o) : v_(o.v_) { } - - typedef MaxPlus Self; - MaxPlus() : v_(LOGVAL_LOG0) {} - explicit MaxPlus(double x) : v_(std::log(x)) {} - MaxPlus(init_1) : v_(0) { } - MaxPlus(init_0) : v_(LOGVAL_LOG0) { } - MaxPlus(int x) : v_(std::log(x)) {} - MaxPlus(unsigned x) : v_(std::log(x)) { } - MaxPlus(double lnx,bool sign) : v_(lnx) { MAX_PLUS_DEBUG(assert(!sign)); } - MaxPlus(double lnx,init_lnx) : v_(lnx) {} - static Self exp(T lnx) { return MaxPlus(lnx,false); } - - // maybe the below are faster than == 1 and == 0. i don't know. - bool is_1() const { return v_==0; } - bool is_0() const { return v_==LOGVAL_LOG0; } - - static Self One() { return Self(init_1()); } - static Self Zero() { return Self(init_0()); } - static Self e() { return Self(1,false); } - void logeq(const T& v) { v_ = v; } - bool signbit() const { return false; } - - Self& logpluseq(const Self& a) { - if (a.is_0()) return *this; - if (a.v_ < v_) { - v_ = v_ + log1p(std::exp(a.v_ - v_)); - } else { - v_ = a.v_ + log1p(std::exp(v_ - a.v_)); - } - return *this; - } - - Self& besteq(const Self& a) { - if (a.v_ < v_) - v_=a.v_; - return *this; - } - - Self& operator+=(const Self& a) { - if (a.v_ < v_) - v_=a.v_; - return *this; - } - - Self& operator*=(const Self& a) { - v_ += a.v_; - return *this; - } - - Self& operator/=(const Self& a) { - v_ -= a.v_; - return *this; - } - - // Self(fabs(log(x)),x.s_) - friend Self abslog(Self x) { - if (x.v_<0) x.v_=-x.v_; - return x; - } - - Self& poweq(const T& power) { - v_ *= power; - return *this; - } - - Self inverse() const { - return Self(-v_,false); - } - - Self pow(const T& power) const { - Self res = *this; - res.poweq(power); - return res; - } - - Self root(const T& root) const { - return pow(1/root); - } - -// copy elision - as opposed to explicit copy of Self const& o1, we should be able to construct Logval r=a+(b+c) as a single result in place in r. todo: return std::move(o1) - C++0x - friend inline Self operator+(Self a,Self const& b) { - a+=b; - return a; - } - friend inline Self operator*(Self a,Self const& b) { - a*=b; - return a; - } - friend inline Self operator/(Self a,Self const& b) { - a/=b; - return a; - } - friend inline T log(Self const& a) { - return a.v_; - } - friend inline T pow(Self const& a,T const& e) { - return a.pow(e); - } - - // intentionally not defining an operator < or operator > - because you may want to default (for library convenience) a v_; - } - friend inline bool operator==(Self const& lhs, Self const& rhs) { - return lhs.v_ == rhs.v_; - } - friend inline bool operator!=(Self const& lhs, Self const& rhs) { - return lhs.v_ != rhs.v_; - } - std::size_t hash() const { - using namespace boost; - return hash_value(v_); - } - friend inline std::size_t hash_value(Self const& x) { - return x.hash(); - } - -/* - operator T() const { - return std::exp(v_); - } -*/ - T as_float() const { - return std::exp(v_); - } - - T v_; -}; - -template -struct semiring_traits > : default_semiring_traits > { - static const bool has_logplus=true; - static const bool has_besteq=true; -#if MAX_PLUS_ORDER - static const bool have_order=true; -#endif -}; - -#if MAX_PLUS_ORDER -template -bool operator<(const MaxPlus& lhs, const MaxPlus& rhs) { - return (lhs.v_ < rhs.v_); -} - -template -bool operator<=(const MaxPlus& lhs, const MaxPlus& rhs) { - return (lhs.v_ <= rhs.v_); -} - -template -bool operator>(const MaxPlus& lhs, const MaxPlus& rhs) { - return (lhs.v_ > rhs.v_); -} - -template -bool operator>=(const MaxPlus& lhs, const MaxPlus& rhs) { - return (lhs.v_ >= rhs.v_); -} -#endif - -#endif diff --git a/utils/maybe_update_bound.h b/utils/maybe_update_bound.h deleted file mode 100644 index d57215d0..00000000 --- a/utils/maybe_update_bound.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef MAYBE_UPDATE_BOUND_H -#define MAYBE_UPDATE_BOUND_H - -template -inline void maybe_increase_max(To &to,const From &from) { - if (to -inline void maybe_decrease_min(To &to,const From &from) { - if (from - -template struct nan_static_assert; -template <> struct nan_static_assert { }; - -// is_iec559 i.e. only IEEE 754 float has x != x <=> x is nan -template -inline bool is_nan(T x) { -// static_cast(sizeof(nan_static_assert::has_quiet_NaN>)); - return std::numeric_limits::has_quiet_NaN && (x != x); -} - -template -inline bool is_inf(T x) { -// static_cast(sizeof(nan_static_assert::has_infinity>)); - return x == std::numeric_limits::infinity() || x == -std::numeric_limits::infinity(); -} - -template -inline bool is_pos_inf(T x) { -// static_cast(sizeof(nan_static_assert::has_infinity>)); - return x == std::numeric_limits::infinity(); -} - -template -inline bool is_neg_inf(T x) { -// static_cast(sizeof(nan_static_assert::has_infinity>)); - return x == -std::numeric_limits::infinity(); -} - -//c99 isfinite macro shoudl be much faster -template -inline bool is_finite(T x) { - return !is_nan(x) && !is_inf(x); -} - - -#endif diff --git a/utils/string_to.h b/utils/string_to.h deleted file mode 100644 index c78a5394..00000000 --- a/utils/string_to.h +++ /dev/null @@ -1,314 +0,0 @@ -#ifndef STRING_TO_H -#define STRING_TO_H - -/* - may not be any faster than boost::lexical_cast in later incarnations (see http://accu.org/index.php/journals/1375) - but is slightly simpler. no wide char or locale. - - X string_to(string); - string to_string(X); - X& string_into(string,X &); // note: returns the same ref you passed in, for convenience of use - - default implementation via stringstreams (quite slow, I'm sure) - - fast implementation for string, int<->string, unsigned<->string, float<->string, double<->string - -*/ - -#ifndef USE_FTOA -#define USE_FTOA 1 -#endif -#ifndef HAVE_STRTOUL -# define HAVE_STRTOUL 1 -#endif - -#include -#include -#include -#include - -#include "have_64_bits.h" -#include "utoa.h" -#if USE_FTOA -# include "ftoa.h" -#endif - -namespace { -// for faster numeric to/from string. TODO: separate into optional header -#include -#include -#include // access to evil (fast) C isspace etc. -#include //strtoul -} - -inline void throw_string_to(std::string const& msg,char const* prefix="string_to: ") { - throw std::runtime_error(prefix+msg); -} - -template -bool try_stream_into(I & i,To &to,bool complete=true) -{ - i >> to; - if (i.fail()) return false; - if (complete) { - char c; - return !(i >> c); - } - return true; -} - -template -bool try_string_into(Str const& str,To &to,bool complete=true) -{ - std::istringstream i(str); - return try_stream_into(i,to,complete); -} - -template inline -Data & string_into(const Str &str,Data &data) -{ - if (!try_string_into(str,data)) - throw std::runtime_error(std::string("Couldn't convert (string_into): ")+str); - return data; -} - - -template inline -Data string_to(const Str &str) -{ - Data ret; - string_into(str,ret); - return ret; -} - -template inline -std::string to_string(D const &d) -{ - std::ostringstream o; - o << d; - return o.str(); -} - -inline std::string to_string(unsigned x) { - return utos(x); -} - -inline std::string to_string(int x) { - return itos(x); -} - -inline long strtol_complete(char const* s,int base=10) { - char *e; - if (*s) { - long r=strtol(s,&e,base); - char c=*e; - if (!c || isspace(c)) //simplifying assumption: we're happy if there's other stuff in the string, so long as the number ends in a space or eos. TODO: loop consuming spaces until end? - return r; - } - throw_string_to(s,"Couldn't convert to integer: "); -} - -// returns -INT_MAX or INT_MAX if number is too large/small -inline int strtoi_complete_bounded(char const* s,int base=10) { - long l=strtol_complete(s,base); - if (l::min()) - return std::numeric_limits::min(); - if (l>std::numeric_limits::max()) - return std::numeric_limits::max(); - return l; -} -#define RANGE_STR(x) #x -#ifdef INT_MIN -# define INTRANGE_STR "[" RANGE_STR(INT_MIN) "," RANGE_STR(INT_MAX) "]" -#else -# define INTRANGE_STR "[-2137483648,2147483647]" -#endif - - // throw if out of int range -inline int strtoi_complete_exact(char const* s,int base=10) { - long l=strtol_complete(s,base); - if (l::min() || l>std::numeric_limits::max()) - throw_string_to(s,"Out of range for int " INTRANGE_STR ": "); - return l; -} - -#if HAVE_LONGER_LONG -inline int& string_into(std::string const& s,int &x) { - x=strtoi_complete_exact(s.c_str()); - return x; -} -inline int& string_into(char const* s,int &x) { - x=strtoi_complete_exact(s); - return x; -} -#endif - -inline long& string_into(std::string const& s,long &x) { - x=strtol_complete(s.c_str()); - return x; -} -inline long& string_into(char const* s,long &x) { - x=strtol_complete(s); - return x; -} - - -//FIXME: preprocessor separation for tokens int<->unsigned int, long<->unsigned long, strtol<->strtoul ? massive code duplication -inline unsigned long strtoul_complete(char const* s,int base=10) { - char *e; - if (*s) { -#if HAVE_STRTOUL - unsigned long r=strtoul(s,&e,base); -#else -// unsigned long r=strtol(s,&e,base); //FIXME: not usually safe - unsigned long r; - sscanf(s,"%ul",&r); -#endif - char c=*e; - if (!c || isspace(c)) //simplifying assumption: we're happy if there's other stuff in the string, so long as the number ends in a space or eos. TODO: loop consuming spaces until end? - return r; - } - throw_string_to(s,"Couldn't convert to integer: "); -} - -inline unsigned strtou_complete_bounded(char const* s,int base=10) { - unsigned long l=strtoul_complete(s,base); - if (l::min()) - return std::numeric_limits::min(); - if (l>std::numeric_limits::max()) - return std::numeric_limits::max(); - return l; -} - -#ifdef UINT_MIN -# define UINTRANGE_STR "[" RANGE_STR(UINT_MIN) "," RANGE_STR(UINT_MAX) "]" -#else -# define UINTRANGE_STR "[0,4,294,967,295]" -#endif - - // throw if out of int range -inline unsigned strtou_complete_exact(char const* s,int base=10) { - unsigned long l=strtoul_complete(s,base); - if (l::min() || l>std::numeric_limits::max()) - throw_string_to(s,"Out of range for uint " UINTRANGE_STR ": "); - return l; -} - -#if HAVE_LONGER_LONG -inline unsigned& string_into(std::string const& s,unsigned &x) { - x=strtou_complete_exact(s.c_str()); - return x; -} -inline unsigned& string_into(char const* s,unsigned &x) { - x=strtou_complete_exact(s); - return x; -} -#endif - -inline unsigned long& string_into(std::string const& s,unsigned long &x) { - x=strtoul_complete(s.c_str()); - return x; -} -inline unsigned long& string_into(char const* s,unsigned long &x) { - x=strtoul_complete(s); - return x; -} - -//FIXME: end code duplication - - -/* 9 decimal places needed to avoid rounding error in float->string->float. 17 for double->string->double - in terms of usable decimal places, there are 6 for float and 15 for double - */ -inline std::string to_string_roundtrip(float x) { - char buf[17]; - return std::string(buf,buf+sprintf(buf,"%.9g",x)); -} -inline std::string to_string(float x) { -#if USE_FTOA - return ftos(x); -#else - char buf[15]; - return std::string(buf,buf+sprintf(buf,"%.7g",x)); -#endif -} -inline std::string to_string_roundtrip(double x) { - char buf[32]; - return std::string(buf,buf+sprintf(buf,"%.17g",x)); -} -inline std::string to_string(double x) { -#if USE_FTOA - return ftos(x); -#else - char buf[30]; - return std::string(buf,buf+sprintf(buf,"%.15g",x)); -#endif -} - -inline double& string_into(char const* s,double &x) { - x=std::atof(s); - return x; -} -inline float& string_into(char const* s,float &x) { - x=std::atof(s); - return x; -} - -inline double& string_into(std::string const& s,double &x) { - x=std::atof(s.c_str()); - return x; -} -inline float& string_into(std::string const& s,float &x) { - x=std::atof(s.c_str()); - return x; -} - - -template -bool try_string_into(Str const& str,Str &to,bool complete=true) -{ - str=to; - return true; -} - -inline std::string const& to_string(std::string const& d) -{ - return d; -} - -template -Str const& string_to(Str const &s) -{ - return s; -} - -template -Str & string_into(Str const &s,Str &d) -{ - return d=s; -} - -/* - -template inline -void substring_into(const Str &str,size_type pos,size_type n,Data &data) -{ -// std::istringstream i(str,pos,n); // doesn't exist! - std::istringstream i(str.substr(pos,n)); - if (!(i>>*data)) - throw std::runtime_error("Couldn't convert (string_into): "+str); -} - -template inline -Data string_to(const Str &str,size_type pos,size_type n) -{ - Data ret; - substring_into(str,pos,n,ret); - return ret; -} - -*/ - - - -#endif -- cgit v1.2.3 From cc01f3b7c9b87928be91e8a89f233a07a183ac2e Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Wed, 29 Feb 2012 01:16:34 -0500 Subject: corpus tools --- utils/Makefile.am | 1 + 1 file changed, 1 insertion(+) (limited to 'utils') diff --git a/utils/Makefile.am b/utils/Makefile.am index 6e0678de..bb067ed9 100644 --- a/utils/Makefile.am +++ b/utils/Makefile.am @@ -24,6 +24,7 @@ noinst_LIBRARIES = libutils.a libutils_a_SOURCES = \ alignment_pharaoh.cc \ b64tools.cc \ + corpus_tools.cc \ dict.cc \ tdict.cc \ fdict.cc \ -- cgit v1.2.3 From a872f46ce1212703b8bed562c894ea1a932c0746 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Wed, 29 Feb 2012 07:00:49 +0000 Subject: mpi fixes --- training/lbl_model.cc | 54 +++++++++++++++++++++++++++++++++++++++++---------- utils/corpus_tools.cc | 16 +++++++++------ 2 files changed, 54 insertions(+), 16 deletions(-) (limited to 'utils') diff --git a/training/lbl_model.cc b/training/lbl_model.cc index def5075a..a46ce33c 100644 --- a/training/lbl_model.cc +++ b/training/lbl_model.cc @@ -15,6 +15,7 @@ #ifdef HAVE_MPI #include #include +#include namespace mpi = boost::mpi; #endif #include @@ -34,12 +35,26 @@ namespace mpi = boost::mpi; namespace po = boost::program_options; using namespace std; -#define kDIMENSIONS 100 +#define kDIMENSIONS 10 typedef Eigen::Matrix RVector; typedef Eigen::Matrix RTVector; typedef Eigen::Matrix TMatrix; vector r_src, r_trg; +#if HAVE_MPI +namespace boost { +namespace serialization { + +template +void serialize(Archive & ar, RVector & v, const unsigned int version) { + for (unsigned i = 0; i < kDIMENSIONS; ++i) + ar & v[i]; +} + +} // namespace serialization +} // namespace boost +#endif + bool InitCommandLine(int argc, char** argv, po::variables_map* conf) { po::options_description opts("Configuration options"); opts.add_options() @@ -224,7 +239,7 @@ int main(int argc, char** argv) { srand(seed); } - TMatrix t; + TMatrix t = TMatrix::Zero(); if (rank == 0) { t = TMatrix::Random() / 50.0; for (unsigned i = 1; i < r_trg.size(); ++i) { @@ -241,16 +256,18 @@ int main(int argc, char** argv) { TMatrix g = TMatrix::Zero(); vector exp_src; vector z_src; - vector flat_g, flat_t; + vector flat_g, flat_t, rcv_grad; Flatten(t, &flat_t); bool converged = false; - // TODO broadcast embeddings - for (int iter = 0; !converged && iter < ITERATIONS; ++iter) { -#ifdef HAVE_MPI - mpi::broadcast(world, &flat_t[0], flat_t.size(), 0); +#if HAVE_MPI + mpi::broadcast(world, &flat_t[0], flat_t.size(), 0); + mpi::broadcast(world, r_trg, 0); + mpi::broadcast(world, r_src, 0); #endif + cerr << "rank=" << rank << ": " << r_trg[0][4] << endl; + for (int iter = 0; !converged && iter < ITERATIONS; ++iter) { + if (rank == 0) cerr << "ITERATION " << (iter + 1) << endl; Unflatten(flat_t, &t); - cerr << "ITERATION " << (iter + 1) << endl; double likelihood = 0; double denom = 0.0; lc = 0; @@ -350,7 +367,22 @@ int main(int argc, char** argv) { if (!SGD) { Flatten(g, &flat_g); obj = -likelihood; - // TODO - reduce gradient +#if HAVE_MPI + rcv_grad.resize(flat_g.size(), 0.0); + mpi::reduce(world, &flat_g[0], flat_g.size(), &rcv_grad[0], plus(), 0); + swap(flat_g, rcv_grad); + rcv_grad.clear(); + + double to = 0; + mpi::reduce(world, obj, to, plus(), 0); + obj = to; + double tlh = 0; + mpi::reduce(world, likelihood, tlh, plus(), 0); + likelihood = tlh; + double td = 0; + mpi::reduce(world, denom, td, plus(), 0); + denom = td; +#endif } if (rank == 0) { @@ -376,10 +408,12 @@ int main(int argc, char** argv) { } } #ifdef HAVE_MPI + mpi::broadcast(world, &flat_t[0], flat_t.size(), 0); mpi::broadcast(world, converged, 0); #endif } - cerr << "TRANSLATION MATRIX:" << endl << t << endl; + if (rank == 0) + cerr << "TRANSLATION MATRIX:" << endl << t << endl; return 0; } diff --git a/utils/corpus_tools.cc b/utils/corpus_tools.cc index a0542b6e..d17785af 100644 --- a/utils/corpus_tools.cc +++ b/utils/corpus_tools.cc @@ -33,10 +33,12 @@ void CorpusTools::ReadFromFile(const string& filename, while(getline(in, line)) { const bool skip = (lc % size != rank); ++lc; - if (skip) continue; TD::ConvertSentence(line, &tmp); - src->push_back(vector()); - vector* d = &src->back(); + vector* d = NULL; + if (!skip) { + src->push_back(vector()); + d = &src->back(); + } set* v = src_vocab; int s = 0; for (unsigned i = 0; i < tmp.size(); ++i) { @@ -44,11 +46,13 @@ void CorpusTools::ReadFromFile(const string& filename, ++s; if (s > 1) { cerr << "Unexpected format in line " << lc << ": " << line << endl; abort(); } assert(trg); - trg->push_back(vector()); - d = &trg->back(); + if (!skip) { + trg->push_back(vector()); + d = &trg->back(); + } v = trg_vocab; } else { - d->push_back(tmp[i]); + if (d) d->push_back(tmp[i]); if (v) v->insert(tmp[i]); } } -- cgit v1.2.3 From 378ba2373374015c8de4b360a30227f072616c6c Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Thu, 1 Mar 2012 23:09:32 -0500 Subject: compile fix on old versions of gcc with MPI enabled --- utils/fast_sparse_vector.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'utils') diff --git a/utils/fast_sparse_vector.h b/utils/fast_sparse_vector.h index 17fa47bf..d11be48f 100644 --- a/utils/fast_sparse_vector.h +++ b/utils/fast_sparse_vector.h @@ -363,7 +363,7 @@ class FastSparseVector { } ar & eff_size; while (it != this->end()) { - const std::pair wire_pair(FD::Convert(it->first), it->second); + const std::pair wire_pair(FD::Convert(it->first), it->second); ar & wire_pair; ++it; } -- cgit v1.2.3 From 2579dd24d3833823527e688196276c2fab381b37 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sat, 3 Mar 2012 17:16:58 -0500 Subject: pyp lm, fixed hyperparameters inference --- gi/pf/align-lexonly-pyp.cc | 2 +- gi/pf/align-lexonly.cc | 2 +- gi/pf/brat.cc | 2 +- gi/pf/conditional_pseg.h | 4 +- gi/pf/learn_cfg.cc | 4 +- gi/pf/pfbrat.cc | 2 +- gi/pf/pyp_lm.cc | 70 ++++++++++++++++++++++++++++--- phrasinator/gibbs_train_plm.cc | 2 +- utils/ccrp.h | 95 ++++++++++++++++++------------------------ utils/ccrp_nt.h | 52 +++++++++++------------ utils/ccrp_onetable.h | 70 +++++++++++++++---------------- utils/mfcr.h | 58 +++++++++++++------------- 12 files changed, 203 insertions(+), 160 deletions(-) (limited to 'utils') diff --git a/gi/pf/align-lexonly-pyp.cc b/gi/pf/align-lexonly-pyp.cc index e24cb457..4ce7cf62 100644 --- a/gi/pf/align-lexonly-pyp.cc +++ b/gi/pf/align-lexonly-pyp.cc @@ -104,7 +104,7 @@ struct HierarchicalWordBase { } void Summary() const { - cerr << "NUMBER OF CUSTOMERS: " << r.num_customers() << " (d=" << r.d() << ",\\alpha=" << r.alpha() << ')' << endl; + cerr << "NUMBER OF CUSTOMERS: " << r.num_customers() << " (d=" << r.discount() << ",\\alpha=" << r.alpha() << ')' << endl; for (MFCR >::const_iterator it = r.begin(); it != r.end(); ++it) cerr << " " << it->second.total_dish_count_ << " (on " << it->second.table_counts_.size() << " tables)" << TD::GetString(it->first) << endl; } diff --git a/gi/pf/align-lexonly.cc b/gi/pf/align-lexonly.cc index 8c1d689f..dbc9dc07 100644 --- a/gi/pf/align-lexonly.cc +++ b/gi/pf/align-lexonly.cc @@ -105,7 +105,7 @@ struct HierarchicalWordBase { } void Summary() const { - cerr << "NUMBER OF CUSTOMERS: " << r.num_customers() << " (\\alpha=" << r.concentration() << ')' << endl; + cerr << "NUMBER OF CUSTOMERS: " << r.num_customers() << " (\\alpha=" << r.alpha() << ')' << endl; for (CCRP_NoTable >::const_iterator it = r.begin(); it != r.end(); ++it) cerr << " " << it->second << '\t' << TD::GetString(it->first) << endl; } diff --git a/gi/pf/brat.cc b/gi/pf/brat.cc index 7b60ef23..c2c52760 100644 --- a/gi/pf/brat.cc +++ b/gi/pf/brat.cc @@ -191,7 +191,7 @@ struct UniphraseLM { void ResampleHyperparameters(MT19937* rng) { phrases_.resample_hyperparameters(rng); gen_.resample_hyperparameters(rng); - cerr << " " << phrases_.concentration(); + cerr << " " << phrases_.alpha(); } CCRP_NoTable > phrases_; diff --git a/gi/pf/conditional_pseg.h b/gi/pf/conditional_pseg.h index 2e9e38fc..f9841cbf 100644 --- a/gi/pf/conditional_pseg.h +++ b/gi/pf/conditional_pseg.h @@ -22,7 +22,7 @@ struct MConditionalTranslationModel { void Summary() const { std::cerr << "Number of conditioning contexts: " << r.size() << std::endl; for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) { - std::cerr << TD::GetString(it->first) << " \t(d=" << it->second.d() << ",\\alpha = " << it->second.alpha() << ") --------------------------" << std::endl; + std::cerr << TD::GetString(it->first) << " \t(d=" << it->second.discount() << ",\\alpha = " << it->second.alpha() << ") --------------------------" << std::endl; for (MFCR::const_iterator i2 = it->second.begin(); i2 != it->second.end(); ++i2) std::cerr << " " << -1 << '\t' << i2->first << std::endl; } @@ -95,7 +95,7 @@ struct ConditionalTranslationModel { void Summary() const { std::cerr << "Number of conditioning contexts: " << r.size() << std::endl; for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) { - std::cerr << TD::GetString(it->first) << " \t(\\alpha = " << it->second.concentration() << ") --------------------------" << std::endl; + std::cerr << TD::GetString(it->first) << " \t(\\alpha = " << it->second.alpha() << ") --------------------------" << std::endl; for (CCRP_NoTable::const_iterator i2 = it->second.begin(); i2 != it->second.end(); ++i2) std::cerr << " " << i2->second << '\t' << i2->first << std::endl; } diff --git a/gi/pf/learn_cfg.cc b/gi/pf/learn_cfg.cc index b2ca029a..5b748311 100644 --- a/gi/pf/learn_cfg.cc +++ b/gi/pf/learn_cfg.cc @@ -183,9 +183,9 @@ struct HieroLMModel { nts[i].resample_hyperparameters(rng); if (kHIERARCHICAL_PRIOR) { q0.resample_hyperparameters(rng); - cerr << "[base d=" << q0.discount() << ", alpha=" << q0.discount() << "]"; + cerr << "[base d=" << q0.discount() << ", alpha=" << q0.alpha() << "]"; } - cerr << " d=" << nts[0].discount() << ", alpha=" << nts[0].concentration() << endl; + cerr << " d=" << nts[0].discount() << ", alpha=" << nts[0].alpha() << endl; } const BaseRuleModel base; diff --git a/gi/pf/pfbrat.cc b/gi/pf/pfbrat.cc index 7b60ef23..c2c52760 100644 --- a/gi/pf/pfbrat.cc +++ b/gi/pf/pfbrat.cc @@ -191,7 +191,7 @@ struct UniphraseLM { void ResampleHyperparameters(MT19937* rng) { phrases_.resample_hyperparameters(rng); gen_.resample_hyperparameters(rng); - cerr << " " << phrases_.concentration(); + cerr << " " << phrases_.alpha(); } CCRP_NoTable > phrases_; diff --git a/gi/pf/pyp_lm.cc b/gi/pf/pyp_lm.cc index 2837e33c..0d85536c 100644 --- a/gi/pf/pyp_lm.cc +++ b/gi/pf/pyp_lm.cc @@ -50,16 +50,19 @@ template struct PYPLM; // uniform base distribution template<> struct PYPLM<0> { - PYPLM(unsigned vs) : p0(1.0 / vs) {} - void increment(WordID w, const vector& context, MT19937* rng) const {} - void decrement(WordID w, const vector& context, MT19937* rng) const {} + PYPLM(unsigned vs) : p0(1.0 / vs), draws() {} + void increment(WordID w, const vector& context, MT19937* rng) { ++draws; } + void decrement(WordID w, const vector& context, MT19937* rng) { --draws; assert(draws >= 0); } double prob(WordID w, const vector& context) const { return p0; } + void resample_hyperparameters(MT19937* rng, const unsigned nloop, const unsigned niterations) {} + double log_likelihood() const { return draws * log(p0); } const double p0; + int draws; }; // represents an N-gram LM template struct PYPLM { - PYPLM(unsigned vs) : backoff(vs) {} + PYPLM(unsigned vs) : backoff(vs), d(0.8), alpha(1.0) {} void increment(WordID w, const vector& context, MT19937* rng) { const double bo = backoff.prob(w, context); static vector lookup(N-1); @@ -67,7 +70,7 @@ template struct PYPLM { lookup[i] = context[context.size() - 1 - i]; typename unordered_map, CCRP, boost::hash > >::iterator it = p.find(lookup); if (it == p.end()) - it = p.insert(make_pair(lookup, CCRP(1,1,1,1))).first; + it = p.insert(make_pair(lookup, CCRP(d,alpha))).first; if (it->second.increment(w, bo, rng)) backoff.increment(w, context, rng); } @@ -89,7 +92,58 @@ template struct PYPLM { if (it == p.end()) return bo; return it->second.prob(w, bo); } + + double log_likelihood() const { + return log_likelihood(d, alpha) + backoff.log_likelihood(); + } + + double log_likelihood(const double& dd, const double& aa) const { + if (aa <= -dd) return -std::numeric_limits::infinity(); + double llh = Md::log_beta_density(dd, 1, 1) + Md::log_gamma_density(aa, 1, 1); + typename unordered_map, CCRP, boost::hash > >::const_iterator it; + for (it = p.begin(); it != p.end(); ++it) + llh += it->second.log_crp_prob(dd, aa); + return llh; + } + + struct DiscountResampler { + DiscountResampler(const PYPLM& m) : m_(m) {} + const PYPLM& m_; + double operator()(const double& proposed_discount) const { + return m_.log_likelihood(proposed_discount, m_.alpha); + } + }; + + struct AlphaResampler { + AlphaResampler(const PYPLM& m) : m_(m) {} + const PYPLM& m_; + double operator()(const double& proposed_alpha) const { + return m_.log_likelihood(m_.d, proposed_alpha); + } + }; + + void resample_hyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) { + DiscountResampler dr(*this); + AlphaResampler ar(*this); + for (int iter = 0; iter < nloop; ++iter) { + alpha = slice_sampler1d(ar, alpha, *rng, 0.0, + std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); + d = slice_sampler1d(dr, d, *rng, std::numeric_limits::min(), + 1.0, 0.0, niterations, 100*niterations); + } + alpha = slice_sampler1d(ar, alpha, *rng, 0.0, + std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); + typename unordered_map, CCRP, boost::hash > >::iterator it; + cerr << "PYPLM<" << N << ">(d=" << d << ",a=" << alpha << ") = " << log_likelihood(d, alpha) << endl; + for (it = p.begin(); it != p.end(); ++it) { + it->second.set_discount(d); + it->second.set_alpha(alpha); + } + backoff.resample_hyperparameters(rng, nloop, niterations); + } + PYPLM backoff; + double d, alpha; unordered_map, CCRP, boost::hash > > p; }; @@ -109,7 +163,7 @@ int main(int argc, char** argv) { cerr << "Reading corpus...\n"; CorpusTools::ReadFromFile(conf["input"].as(), &corpuse, &vocabe); cerr << "E-corpus size: " << corpuse.size() << " sentences\t (" << vocabe.size() << " word types)\n"; -#define kORDER 5 +#define kORDER 3 PYPLM lm(vocabe.size()); vector ctx(kORDER - 1, TD::Convert("")); int mci = corpuse.size() * 99 / 100; @@ -126,6 +180,10 @@ int main(int argc, char** argv) { if (SS > 0) lm.decrement(kEOS, ctx, &rng); lm.increment(kEOS, ctx, &rng); } + if (SS % 10 == 9) { + cerr << " [LLH=" << lm.log_likelihood() << "]" << endl; + if (SS % 20 == 19) lm.resample_hyperparameters(&rng); + } else { cerr << '.' << flush; } } double llh = 0; unsigned cnt = 0; diff --git a/phrasinator/gibbs_train_plm.cc b/phrasinator/gibbs_train_plm.cc index 66b46011..54861dcb 100644 --- a/phrasinator/gibbs_train_plm.cc +++ b/phrasinator/gibbs_train_plm.cc @@ -252,7 +252,7 @@ struct UniphraseLM { void ResampleHyperparameters(MT19937* rng) { phrases_.resample_hyperparameters(rng); gen_.resample_hyperparameters(rng); - cerr << " d=" << phrases_.discount() << ",c=" << phrases_.concentration(); + cerr << " d=" << phrases_.discount() << ",a=" << phrases_.alpha(); } CCRP > phrases_; diff --git a/utils/ccrp.h b/utils/ccrp.h index 1a9e3ed5..d9a38089 100644 --- a/utils/ccrp.h +++ b/utils/ccrp.h @@ -17,35 +17,37 @@ template > class CCRP { public: - CCRP(double disc, double conc) : + CCRP(double disc, double alpha) : num_tables_(), num_customers_(), discount_(disc), - concentration_(conc), + alpha_(alpha), discount_prior_alpha_(std::numeric_limits::quiet_NaN()), discount_prior_beta_(std::numeric_limits::quiet_NaN()), - concentration_prior_shape_(std::numeric_limits::quiet_NaN()), - concentration_prior_rate_(std::numeric_limits::quiet_NaN()) {} + alpha_prior_shape_(std::numeric_limits::quiet_NaN()), + alpha_prior_rate_(std::numeric_limits::quiet_NaN()) {} CCRP(double d_alpha, double d_beta, double c_shape, double c_rate, double d = 0.9, double c = 1.0) : num_tables_(), num_customers_(), discount_(d), - concentration_(c), + alpha_(c), discount_prior_alpha_(d_alpha), discount_prior_beta_(d_beta), - concentration_prior_shape_(c_shape), - concentration_prior_rate_(c_rate) {} + alpha_prior_shape_(c_shape), + alpha_prior_rate_(c_rate) {} double discount() const { return discount_; } - double concentration() const { return concentration_; } + double alpha() const { return alpha_; } + void set_discount(double d) { discount_ = d; } + void set_alpha(double a) { alpha_ = a; } bool has_discount_prior() const { return !std::isnan(discount_prior_alpha_); } - bool has_concentration_prior() const { - return !std::isnan(concentration_prior_shape_); + bool has_alpha_prior() const { + return !std::isnan(alpha_prior_shape_); } void clear() { @@ -79,7 +81,7 @@ class CCRP { DishLocations& loc = dish_locs_[dish]; bool share_table = false; if (loc.total_dish_count_) { - const double p_empty = (concentration_ + num_tables_ * discount_) * p0; + const double p_empty = (alpha_ + num_tables_ * discount_) * p0; const double p_share = (loc.total_dish_count_ - loc.table_counts_.size() * discount_); share_table = rng->SelectSample(p_empty, p_share); } @@ -113,7 +115,7 @@ class CCRP { DishLocations& loc = dish_locs_[dish]; bool share_table = false; if (loc.total_dish_count_) { - const T p_empty = T(concentration_ + num_tables_ * discount_) * p0; + const T p_empty = T(alpha_ + num_tables_ * discount_) * p0; const T p_share = T(loc.total_dish_count_ - loc.table_counts_.size() * discount_); share_table = rng->SelectSample(p_empty, p_share); } @@ -180,63 +182,46 @@ class CCRP { double prob(const Dish& dish, const double& p0) const { const typename std::tr1::unordered_map::const_iterator it = dish_locs_.find(dish); - const double r = num_tables_ * discount_ + concentration_; + const double r = num_tables_ * discount_ + alpha_; if (it == dish_locs_.end()) { - return r * p0 / (num_customers_ + concentration_); + return r * p0 / (num_customers_ + alpha_); } else { return (it->second.total_dish_count_ - discount_ * it->second.table_counts_.size() + r * p0) / - (num_customers_ + concentration_); + (num_customers_ + alpha_); } } template T probT(const Dish& dish, const T& p0) const { const typename std::tr1::unordered_map::const_iterator it = dish_locs_.find(dish); - const T r = T(num_tables_ * discount_ + concentration_); + const T r = T(num_tables_ * discount_ + alpha_); if (it == dish_locs_.end()) { - return r * p0 / T(num_customers_ + concentration_); + return r * p0 / T(num_customers_ + alpha_); } else { return (T(it->second.total_dish_count_ - discount_ * it->second.table_counts_.size()) + r * p0) / - T(num_customers_ + concentration_); + T(num_customers_ + alpha_); } } double log_crp_prob() const { - return log_crp_prob(discount_, concentration_); - } - - static double log_beta_density(const double& x, const double& alpha, const double& beta) { - assert(x > 0.0); - assert(x < 1.0); - assert(alpha > 0.0); - assert(beta > 0.0); - const double lp = (alpha-1)*log(x)+(beta-1)*log(1-x)+lgamma(alpha+beta)-lgamma(alpha)-lgamma(beta); - return lp; - } - - static double log_gamma_density(const double& x, const double& shape, const double& rate) { - assert(x >= 0.0); - assert(shape > 0.0); - assert(rate > 0.0); - const double lp = (shape-1)*log(x) - shape*log(rate) - x/rate - lgamma(shape); - return lp; + return log_crp_prob(discount_, alpha_); } // taken from http://en.wikipedia.org/wiki/Chinese_restaurant_process // does not include P_0's - double log_crp_prob(const double& discount, const double& concentration) const { + double log_crp_prob(const double& discount, const double& alpha) const { double lp = 0.0; if (has_discount_prior()) - lp = log_beta_density(discount, discount_prior_alpha_, discount_prior_beta_); - if (has_concentration_prior()) - lp += log_gamma_density(concentration, concentration_prior_shape_, concentration_prior_rate_); + lp = Md::log_beta_density(discount, discount_prior_alpha_, discount_prior_beta_); + if (has_alpha_prior()) + lp += Md::log_gamma_density(alpha, alpha_prior_shape_, alpha_prior_rate_); assert(lp <= 0.0); if (num_customers_) { if (discount > 0.0) { const double r = lgamma(1.0 - discount); - lp += lgamma(concentration) - lgamma(concentration + num_customers_) - + num_tables_ * log(discount) + lgamma(concentration / discount + num_tables_) - - lgamma(concentration / discount); + lp += lgamma(alpha) - lgamma(alpha + num_customers_) + + num_tables_ * log(discount) + lgamma(alpha / discount + num_tables_) + - lgamma(alpha / discount); assert(std::isfinite(lp)); for (typename std::tr1::unordered_map::const_iterator it = dish_locs_.begin(); it != dish_locs_.end(); ++it) { @@ -254,12 +239,12 @@ class CCRP { } void resample_hyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) { - assert(has_discount_prior() || has_concentration_prior()); + assert(has_discount_prior() || has_alpha_prior()); DiscountResampler dr(*this); ConcentrationResampler cr(*this); for (int iter = 0; iter < nloop; ++iter) { - if (has_concentration_prior()) { - concentration_ = slice_sampler1d(cr, concentration_, *rng, 0.0, + if (has_alpha_prior()) { + alpha_ = slice_sampler1d(cr, alpha_, *rng, 0.0, std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); } if (has_discount_prior()) { @@ -267,7 +252,7 @@ class CCRP { 1.0, 0.0, niterations, 100*niterations); } } - concentration_ = slice_sampler1d(cr, concentration_, *rng, 0.0, + alpha_ = slice_sampler1d(cr, alpha_, *rng, 0.0, std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); } @@ -275,15 +260,15 @@ class CCRP { DiscountResampler(const CCRP& crp) : crp_(crp) {} const CCRP& crp_; double operator()(const double& proposed_discount) const { - return crp_.log_crp_prob(proposed_discount, crp_.concentration_); + return crp_.log_crp_prob(proposed_discount, crp_.alpha_); } }; struct ConcentrationResampler { ConcentrationResampler(const CCRP& crp) : crp_(crp) {} const CCRP& crp_; - double operator()(const double& proposed_concentration) const { - return crp_.log_crp_prob(crp_.discount_, proposed_concentration); + double operator()(const double& proposed_alpha) const { + return crp_.log_crp_prob(crp_.discount_, proposed_alpha); } }; @@ -295,7 +280,7 @@ class CCRP { }; void Print(std::ostream* out) const { - std::cerr << "PYP(d=" << discount_ << ",c=" << concentration_ << ") customers=" << num_customers_ << std::endl; + std::cerr << "PYP(d=" << discount_ << ",c=" << alpha_ << ") customers=" << num_customers_ << std::endl; for (typename std::tr1::unordered_map::const_iterator it = dish_locs_.begin(); it != dish_locs_.end(); ++it) { (*out) << it->first << " (" << it->second.total_dish_count_ << " on " << it->second.table_counts_.size() << " tables): "; @@ -320,15 +305,15 @@ class CCRP { std::tr1::unordered_map dish_locs_; double discount_; - double concentration_; + double alpha_; // optional beta prior on discount_ (NaN if no prior) double discount_prior_alpha_; double discount_prior_beta_; - // optional gamma prior on concentration_ (NaN if no prior) - double concentration_prior_shape_; - double concentration_prior_rate_; + // optional gamma prior on alpha_ (NaN if no prior) + double alpha_prior_shape_; + double alpha_prior_rate_; }; template diff --git a/utils/ccrp_nt.h b/utils/ccrp_nt.h index 63b6f4c2..79321493 100644 --- a/utils/ccrp_nt.h +++ b/utils/ccrp_nt.h @@ -18,20 +18,20 @@ class CCRP_NoTable { public: explicit CCRP_NoTable(double conc) : num_customers_(), - concentration_(conc), - concentration_prior_shape_(std::numeric_limits::quiet_NaN()), - concentration_prior_rate_(std::numeric_limits::quiet_NaN()) {} + alpha_(conc), + alpha_prior_shape_(std::numeric_limits::quiet_NaN()), + alpha_prior_rate_(std::numeric_limits::quiet_NaN()) {} CCRP_NoTable(double c_shape, double c_rate, double c = 10.0) : num_customers_(), - concentration_(c), - concentration_prior_shape_(c_shape), - concentration_prior_rate_(c_rate) {} + alpha_(c), + alpha_prior_shape_(c_shape), + alpha_prior_rate_(c_rate) {} - double concentration() const { return concentration_; } + double alpha() const { return alpha_; } - bool has_concentration_prior() const { - return !std::isnan(concentration_prior_shape_); + bool has_alpha_prior() const { + return !std::isnan(alpha_prior_shape_); } void clear() { @@ -73,16 +73,16 @@ class CCRP_NoTable { double prob(const Dish& dish, const double& p0) const { const unsigned at_table = num_customers(dish); - return (at_table + p0 * concentration_) / (num_customers_ + concentration_); + return (at_table + p0 * alpha_) / (num_customers_ + alpha_); } double logprob(const Dish& dish, const double& logp0) const { const unsigned at_table = num_customers(dish); - return log(at_table + exp(logp0 + log(concentration_))) - log(num_customers_ + concentration_); + return log(at_table + exp(logp0 + log(alpha_))) - log(num_customers_ + alpha_); } double log_crp_prob() const { - return log_crp_prob(concentration_); + return log_crp_prob(alpha_); } static double log_gamma_density(const double& x, const double& shape, const double& rate) { @@ -95,14 +95,14 @@ class CCRP_NoTable { // taken from http://en.wikipedia.org/wiki/Chinese_restaurant_process // does not include P_0's - double log_crp_prob(const double& concentration) const { + double log_crp_prob(const double& alpha) const { double lp = 0.0; - if (has_concentration_prior()) - lp += log_gamma_density(concentration, concentration_prior_shape_, concentration_prior_rate_); + if (has_alpha_prior()) + lp += log_gamma_density(alpha, alpha_prior_shape_, alpha_prior_rate_); assert(lp <= 0.0); if (num_customers_) { - lp += lgamma(concentration) - lgamma(concentration + num_customers_) + - custs_.size() * log(concentration); + lp += lgamma(alpha) - lgamma(alpha + num_customers_) + + custs_.size() * log(alpha); assert(std::isfinite(lp)); for (typename std::tr1::unordered_map::const_iterator it = custs_.begin(); it != custs_.end(); ++it) { @@ -114,10 +114,10 @@ class CCRP_NoTable { } void resample_hyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) { - assert(has_concentration_prior()); + assert(has_alpha_prior()); ConcentrationResampler cr(*this); for (int iter = 0; iter < nloop; ++iter) { - concentration_ = slice_sampler1d(cr, concentration_, *rng, 0.0, + alpha_ = slice_sampler1d(cr, alpha_, *rng, 0.0, std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); } } @@ -125,13 +125,13 @@ class CCRP_NoTable { struct ConcentrationResampler { ConcentrationResampler(const CCRP_NoTable& crp) : crp_(crp) {} const CCRP_NoTable& crp_; - double operator()(const double& proposed_concentration) const { - return crp_.log_crp_prob(proposed_concentration); + double operator()(const double& proposed_alpha) const { + return crp_.log_crp_prob(proposed_alpha); } }; void Print(std::ostream* out) const { - (*out) << "DP(alpha=" << concentration_ << ") customers=" << num_customers_ << std::endl; + (*out) << "DP(alpha=" << alpha_ << ") customers=" << num_customers_ << std::endl; int cc = 0; for (typename std::tr1::unordered_map::const_iterator it = custs_.begin(); it != custs_.end(); ++it) { @@ -153,11 +153,11 @@ class CCRP_NoTable { return custs_.end(); } - double concentration_; + double alpha_; - // optional gamma prior on concentration_ (NaN if no prior) - double concentration_prior_shape_; - double concentration_prior_rate_; + // optional gamma prior on alpha_ (NaN if no prior) + double alpha_prior_shape_; + double alpha_prior_rate_; }; template diff --git a/utils/ccrp_onetable.h b/utils/ccrp_onetable.h index b63737d1..1fe01b0e 100644 --- a/utils/ccrp_onetable.h +++ b/utils/ccrp_onetable.h @@ -21,33 +21,33 @@ class CCRP_OneTable { num_tables_(), num_customers_(), discount_(disc), - concentration_(conc), + alpha_(conc), discount_prior_alpha_(std::numeric_limits::quiet_NaN()), discount_prior_beta_(std::numeric_limits::quiet_NaN()), - concentration_prior_shape_(std::numeric_limits::quiet_NaN()), - concentration_prior_rate_(std::numeric_limits::quiet_NaN()) {} + alpha_prior_shape_(std::numeric_limits::quiet_NaN()), + alpha_prior_rate_(std::numeric_limits::quiet_NaN()) {} CCRP_OneTable(double d_alpha, double d_beta, double c_shape, double c_rate, double d = 0.9, double c = 1.0) : num_tables_(), num_customers_(), discount_(d), - concentration_(c), + alpha_(c), discount_prior_alpha_(d_alpha), discount_prior_beta_(d_beta), - concentration_prior_shape_(c_shape), - concentration_prior_rate_(c_rate) {} + alpha_prior_shape_(c_shape), + alpha_prior_rate_(c_rate) {} double discount() const { return discount_; } - double concentration() const { return concentration_; } - void set_concentration(double c) { concentration_ = c; } + double alpha() const { return alpha_; } + void set_alpha(double c) { alpha_ = c; } void set_discount(double d) { discount_ = d; } bool has_discount_prior() const { return !std::isnan(discount_prior_alpha_); } - bool has_concentration_prior() const { - return !std::isnan(concentration_prior_shape_); + bool has_alpha_prior() const { + return !std::isnan(alpha_prior_shape_); } void clear() { @@ -108,29 +108,29 @@ class CCRP_OneTable { double prob(const Dish& dish, const double& p0) const { const typename DishMapType::const_iterator it = dish_counts_.find(dish); - const double r = num_tables_ * discount_ + concentration_; + const double r = num_tables_ * discount_ + alpha_; if (it == dish_counts_.end()) { - return r * p0 / (num_customers_ + concentration_); + return r * p0 / (num_customers_ + alpha_); } else { return (it->second - discount_ + r * p0) / - (num_customers_ + concentration_); + (num_customers_ + alpha_); } } template T probT(const Dish& dish, const T& p0) const { const typename DishMapType::const_iterator it = dish_counts_.find(dish); - const T r(num_tables_ * discount_ + concentration_); + const T r(num_tables_ * discount_ + alpha_); if (it == dish_counts_.end()) { - return r * p0 / T(num_customers_ + concentration_); + return r * p0 / T(num_customers_ + alpha_); } else { return (T(it->second - discount_) + r * p0) / - T(num_customers_ + concentration_); + T(num_customers_ + alpha_); } } double log_crp_prob() const { - return log_crp_prob(discount_, concentration_); + return log_crp_prob(discount_, alpha_); } static double log_beta_density(const double& x, const double& alpha, const double& beta) { @@ -152,19 +152,19 @@ class CCRP_OneTable { // taken from http://en.wikipedia.org/wiki/Chinese_restaurant_process // does not include P_0's - double log_crp_prob(const double& discount, const double& concentration) const { + double log_crp_prob(const double& discount, const double& alpha) const { double lp = 0.0; if (has_discount_prior()) lp = log_beta_density(discount, discount_prior_alpha_, discount_prior_beta_); - if (has_concentration_prior()) - lp += log_gamma_density(concentration, concentration_prior_shape_, concentration_prior_rate_); + if (has_alpha_prior()) + lp += log_gamma_density(alpha, alpha_prior_shape_, alpha_prior_rate_); assert(lp <= 0.0); if (num_customers_) { if (discount > 0.0) { const double r = lgamma(1.0 - discount); - lp += lgamma(concentration) - lgamma(concentration + num_customers_) - + num_tables_ * log(discount) + lgamma(concentration / discount + num_tables_) - - lgamma(concentration / discount); + lp += lgamma(alpha) - lgamma(alpha + num_customers_) + + num_tables_ * log(discount) + lgamma(alpha / discount + num_tables_) + - lgamma(alpha / discount); assert(std::isfinite(lp)); for (typename DishMapType::const_iterator it = dish_counts_.begin(); it != dish_counts_.end(); ++it) { @@ -180,12 +180,12 @@ class CCRP_OneTable { } void resample_hyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) { - assert(has_discount_prior() || has_concentration_prior()); + assert(has_discount_prior() || has_alpha_prior()); DiscountResampler dr(*this); ConcentrationResampler cr(*this); for (int iter = 0; iter < nloop; ++iter) { - if (has_concentration_prior()) { - concentration_ = slice_sampler1d(cr, concentration_, *rng, 0.0, + if (has_alpha_prior()) { + alpha_ = slice_sampler1d(cr, alpha_, *rng, 0.0, std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); } if (has_discount_prior()) { @@ -193,7 +193,7 @@ class CCRP_OneTable { 1.0, 0.0, niterations, 100*niterations); } } - concentration_ = slice_sampler1d(cr, concentration_, *rng, 0.0, + alpha_ = slice_sampler1d(cr, alpha_, *rng, 0.0, std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); } @@ -201,20 +201,20 @@ class CCRP_OneTable { DiscountResampler(const CCRP_OneTable& crp) : crp_(crp) {} const CCRP_OneTable& crp_; double operator()(const double& proposed_discount) const { - return crp_.log_crp_prob(proposed_discount, crp_.concentration_); + return crp_.log_crp_prob(proposed_discount, crp_.alpha_); } }; struct ConcentrationResampler { ConcentrationResampler(const CCRP_OneTable& crp) : crp_(crp) {} const CCRP_OneTable& crp_; - double operator()(const double& proposed_concentration) const { - return crp_.log_crp_prob(crp_.discount_, proposed_concentration); + double operator()(const double& proposed_alpha) const { + return crp_.log_crp_prob(crp_.discount_, proposed_alpha); } }; void Print(std::ostream* out) const { - (*out) << "PYP(d=" << discount_ << ",c=" << concentration_ << ") customers=" << num_customers_ << std::endl; + (*out) << "PYP(d=" << discount_ << ",c=" << alpha_ << ") customers=" << num_customers_ << std::endl; for (typename DishMapType::const_iterator it = dish_counts_.begin(); it != dish_counts_.end(); ++it) { (*out) << " " << it->first << " = " << it->second << std::endl; } @@ -233,15 +233,15 @@ class CCRP_OneTable { DishMapType dish_counts_; double discount_; - double concentration_; + double alpha_; // optional beta prior on discount_ (NaN if no prior) double discount_prior_alpha_; double discount_prior_beta_; - // optional gamma prior on concentration_ (NaN if no prior) - double concentration_prior_shape_; - double concentration_prior_rate_; + // optional gamma prior on alpha_ (NaN if no prior) + double alpha_prior_shape_; + double alpha_prior_rate_; }; template diff --git a/utils/mfcr.h b/utils/mfcr.h index 396d0205..df988f51 100644 --- a/utils/mfcr.h +++ b/utils/mfcr.h @@ -43,29 +43,29 @@ class MFCR { num_floors_(num_floors), num_tables_(), num_customers_(), - d_(d), + discount_(d), alpha_(alpha), - d_prior_alpha_(std::numeric_limits::quiet_NaN()), - d_prior_beta_(std::numeric_limits::quiet_NaN()), + discount_prior_alpha_(std::numeric_limits::quiet_NaN()), + discount_prior_beta_(std::numeric_limits::quiet_NaN()), alpha_prior_shape_(std::numeric_limits::quiet_NaN()), alpha_prior_rate_(std::numeric_limits::quiet_NaN()) {} - MFCR(unsigned num_floors, double d_alpha, double d_beta, double alpha_shape, double alpha_rate, double d = 0.9, double alpha = 10.0) : + MFCR(unsigned num_floors, double discount_alpha, double discount_beta, double alpha_shape, double alpha_rate, double d = 0.9, double alpha = 10.0) : num_floors_(num_floors), num_tables_(), num_customers_(), - d_(d), + discount_(d), alpha_(alpha), - d_prior_alpha_(d_alpha), - d_prior_beta_(d_beta), + discount_prior_alpha_(discount_alpha), + discount_prior_beta_(discount_beta), alpha_prior_shape_(alpha_shape), alpha_prior_rate_(alpha_rate) {} - double d() const { return d_; } + double discount() const { return discount_; } double alpha() const { return alpha_; } - bool has_d_prior() const { - return !std::isnan(d_prior_alpha_); + bool has_discount_prior() const { + return !std::isnan(discount_prior_alpha_); } bool has_alpha_prior() const { @@ -122,15 +122,15 @@ class MFCR { int floor = -1; bool share_table = false; if (loc.total_dish_count_) { - const double p_empty = (alpha_ + num_tables_ * d_) * marg_p0; - const double p_share = (loc.total_dish_count_ - loc.table_counts_.size() * d_); + const double p_empty = (alpha_ + num_tables_ * discount_) * marg_p0; + const double p_share = (loc.total_dish_count_ - loc.table_counts_.size() * discount_); share_table = rng->SelectSample(p_empty, p_share); } if (share_table) { - double r = rng->next() * (loc.total_dish_count_ - loc.table_counts_.size() * d_); + double r = rng->next() * (loc.total_dish_count_ - loc.table_counts_.size() * discount_); for (typename std::list::iterator ti = loc.table_counts_.begin(); ti != loc.table_counts_.end(); ++ti) { - r -= ti->count - d_; + r -= ti->count - discount_; if (r <= 0.0) { ++ti->count; floor = ti->floor; @@ -206,25 +206,25 @@ class MFCR { const double marg_p0 = std::inner_product(p0s.begin(), p0s.end(), lambdas.begin(), 0.0); assert(marg_p0 <= 1.0); const typename std::tr1::unordered_map::const_iterator it = dish_locs_.find(dish); - const double r = num_tables_ * d_ + alpha_; + const double r = num_tables_ * discount_ + alpha_; if (it == dish_locs_.end()) { return r * marg_p0 / (num_customers_ + alpha_); } else { - return (it->second.total_dish_count_ - d_ * it->second.table_counts_.size() + r * marg_p0) / + return (it->second.total_dish_count_ - discount_ * it->second.table_counts_.size() + r * marg_p0) / (num_customers_ + alpha_); } } double log_crp_prob() const { - return log_crp_prob(d_, alpha_); + return log_crp_prob(discount_, alpha_); } // taken from http://en.wikipedia.org/wiki/Chinese_restaurant_process // does not include draws from G_w's double log_crp_prob(const double& d, const double& alpha) const { double lp = 0.0; - if (has_d_prior()) - lp = Md::log_beta_density(d, d_prior_alpha_, d_prior_beta_); + if (has_discount_prior()) + lp = Md::log_beta_density(d, discount_prior_alpha_, discount_prior_beta_); if (has_alpha_prior()) lp += Md::log_gamma_density(alpha, alpha_prior_shape_, alpha_prior_rate_); assert(lp <= 0.0); @@ -251,7 +251,7 @@ class MFCR { } void resample_hyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) { - assert(has_d_prior() || has_alpha_prior()); + assert(has_discount_prior() || has_alpha_prior()); DiscountResampler dr(*this); ConcentrationResampler cr(*this); for (int iter = 0; iter < nloop; ++iter) { @@ -259,8 +259,8 @@ class MFCR { alpha_ = slice_sampler1d(cr, alpha_, *rng, 0.0, std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); } - if (has_d_prior()) { - d_ = slice_sampler1d(dr, d_, *rng, std::numeric_limits::min(), + if (has_discount_prior()) { + discount_ = slice_sampler1d(dr, discount_, *rng, std::numeric_limits::min(), 1.0, 0.0, niterations, 100*niterations); } } @@ -279,8 +279,8 @@ class MFCR { struct ConcentrationResampler { ConcentrationResampler(const MFCR& crp) : crp_(crp) {} const MFCR& crp_; - double operator()(const double& proposed_alpha) const { - return crp_.log_crp_prob(crp_.d_, proposed_alpha); + double operator()(const double& proposediscount_alpha) const { + return crp_.log_crp_prob(crp_.discount_, proposediscount_alpha); } }; @@ -292,7 +292,7 @@ class MFCR { }; void Print(std::ostream* out) const { - (*out) << "MFCR(d=" << d_ << ",alpha=" << alpha_ << ") customers=" << num_customers_ << std::endl; + (*out) << "MFCR(d=" << discount_ << ",alpha=" << alpha_ << ") customers=" << num_customers_ << std::endl; for (typename std::tr1::unordered_map::const_iterator it = dish_locs_.begin(); it != dish_locs_.end(); ++it) { (*out) << it->first << " (" << it->second.total_dish_count_ << " on " << it->second.table_counts_.size() << " tables): "; @@ -317,12 +317,12 @@ class MFCR { unsigned num_customers_; std::tr1::unordered_map dish_locs_; - double d_; + double discount_; double alpha_; - // optional beta prior on d_ (NaN if no prior) - double d_prior_alpha_; - double d_prior_beta_; + // optional beta prior on discount_ (NaN if no prior) + double discount_prior_alpha_; + double discount_prior_beta_; // optional gamma prior on alpha_ (NaN if no prior) double alpha_prior_shape_; -- cgit v1.2.3 From 29ae46010c3610dda877f2d1a07fe942f79bfc31 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sat, 3 Mar 2012 22:18:58 +0000 Subject: fix include --- utils/ccrp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'utils') diff --git a/utils/ccrp.h b/utils/ccrp.h index d9a38089..61ab5576 100644 --- a/utils/ccrp.h +++ b/utils/ccrp.h @@ -11,6 +11,7 @@ #include #include "sampler.h" #include "slice_sampler.h" +#include "m.h" // Chinese restaurant process (Pitman-Yor parameters) with table tracking. -- cgit v1.2.3 From 5b2daa43c608d648a077d37ed8ab0217f8ce8104 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sun, 4 Mar 2012 21:35:50 -0500 Subject: move crp stuff around --- gi/clda/src/Makefile.am | 11 ------ gi/clda/src/crp_test.cc | 102 ------------------------------------------------ utils/Makefile.am | 5 ++- utils/crp_test.cc | 102 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 106 insertions(+), 114 deletions(-) delete mode 100644 gi/clda/src/crp_test.cc create mode 100644 utils/crp_test.cc (limited to 'utils') diff --git a/gi/clda/src/Makefile.am b/gi/clda/src/Makefile.am index 3aab17da..cdca1f97 100644 --- a/gi/clda/src/Makefile.am +++ b/gi/clda/src/Makefile.am @@ -1,14 +1,3 @@ -if HAVE_GTEST -noinst_PROGRAMS = \ - crp_test - -TESTS = crp_test - -crp_test_SOURCES = crp_test.cc -crp_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) - -endif - bin_PROGRAMS = clda clda_SOURCES = clda.cc diff --git a/gi/clda/src/crp_test.cc b/gi/clda/src/crp_test.cc deleted file mode 100644 index 561cd4dd..00000000 --- a/gi/clda/src/crp_test.cc +++ /dev/null @@ -1,102 +0,0 @@ -#include -#include -#include - -#include - -#include "ccrp.h" -#include "sampler.h" - -const size_t MAX_DOC_LEN_CHARS = 10000000; - -using namespace std; - -class CRPTest : public testing::Test { - public: - CRPTest() {} - protected: - virtual void SetUp() { } - virtual void TearDown() { } - MT19937 rng; -}; - -TEST_F(CRPTest, Dist) { - CCRP crp(0.1, 5); - double un = 0.25; - int tt = 0; - tt += crp.increment("hi", un, &rng); - tt += crp.increment("foo", un, &rng); - tt += crp.increment("bar", un, &rng); - tt += crp.increment("bar", un, &rng); - tt += crp.increment("bar", un, &rng); - tt += crp.increment("bar", un, &rng); - tt += crp.increment("bar", un, &rng); - tt += crp.increment("bar", un, &rng); - tt += crp.increment("bar", un, &rng); - cout << "tt=" << tt << endl; - cout << crp << endl; - cout << " P(bar)=" << crp.prob("bar", un) << endl; - cout << " P(hi)=" << crp.prob("hi", un) << endl; - cout << " P(baz)=" << crp.prob("baz", un) << endl; - cout << " P(foo)=" << crp.prob("foo", un) << endl; - double x = crp.prob("bar", un) + crp.prob("hi", un) + crp.prob("baz", un) + crp.prob("foo", un); - cout << " tot=" << x << endl; - EXPECT_FLOAT_EQ(1.0, x); - tt += crp.decrement("hi", &rng); - tt += crp.decrement("bar", &rng); - cout << crp << endl; - tt += crp.decrement("bar", &rng); - cout << crp << endl; - cout << "tt=" << tt << endl; -} - -TEST_F(CRPTest, Exchangability) { - double tot = 0; - double xt = 0; - CCRP crp(0.5, 1.0); - int cust = 10; - vector hist(cust + 1, 0); - for (int i = 0; i < cust; ++i) { crp.increment(1, 1.0, &rng); } - const int samples = 100000; - const bool simulate = true; - for (int k = 0; k < samples; ++k) { - if (!simulate) { - crp.clear(); - for (int i = 0; i < cust; ++i) { crp.increment(1, 1.0, &rng); } - } else { - int da = rng.next() * cust; - bool a = rng.next() < 0.5; - if (a) { - for (int i = 0; i < da; ++i) { crp.increment(1, 1.0, &rng); } - for (int i = 0; i < da; ++i) { crp.decrement(1, &rng); } - xt += 1.0; - } else { - for (int i = 0; i < da; ++i) { crp.decrement(1, &rng); } - for (int i = 0; i < da; ++i) { crp.increment(1, 1.0, &rng); } - } - } - int c = crp.num_tables(1); - ++hist[c]; - tot += c; - } - EXPECT_EQ(cust, crp.num_customers()); - cerr << "P(a) = " << (xt / samples) << endl; - cerr << "E[num tables] = " << (tot / samples) << endl; - double error = fabs((tot / samples) - 5.4); - cerr << " error = " << error << endl; - EXPECT_LT(error, 0.1); // it's possible for this to fail, but - // very, very unlikely - for (int i = 1; i <= cust; ++i) - cerr << i << ' ' << (hist[i]) << endl; -} - -TEST_F(CRPTest, LP) { - CCRP crp(1,1,1,1,0.1,50.0); - crp.increment("foo", 1.0, &rng); - cerr << crp.log_crp_prob() << endl; -} - -int main(int argc, char** argv) { - testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/utils/Makefile.am b/utils/Makefile.am index bb067ed9..5153ae20 100644 --- a/utils/Makefile.am +++ b/utils/Makefile.am @@ -6,13 +6,14 @@ TESTS = ts phmt mfcr_test if HAVE_GTEST noinst_PROGRAMS += \ + crp_test \ dict_test \ m_test \ weights_test \ logval_test \ small_vector_test -TESTS += small_vector_test logval_test weights_test dict_test m_test +TESTS += crp_test small_vector_test logval_test weights_test dict_test m_test endif reconstruct_weights_SOURCES = reconstruct_weights.cc @@ -50,6 +51,8 @@ mfcr_test_SOURCES = mfcr_test.cc mfcr_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) weights_test_SOURCES = weights_test.cc weights_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) +crp_test_SOURCES = weights_test.cc +crp_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) logval_test_SOURCES = logval_test.cc logval_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) small_vector_test_SOURCES = small_vector_test.cc diff --git a/utils/crp_test.cc b/utils/crp_test.cc new file mode 100644 index 00000000..561cd4dd --- /dev/null +++ b/utils/crp_test.cc @@ -0,0 +1,102 @@ +#include +#include +#include + +#include + +#include "ccrp.h" +#include "sampler.h" + +const size_t MAX_DOC_LEN_CHARS = 10000000; + +using namespace std; + +class CRPTest : public testing::Test { + public: + CRPTest() {} + protected: + virtual void SetUp() { } + virtual void TearDown() { } + MT19937 rng; +}; + +TEST_F(CRPTest, Dist) { + CCRP crp(0.1, 5); + double un = 0.25; + int tt = 0; + tt += crp.increment("hi", un, &rng); + tt += crp.increment("foo", un, &rng); + tt += crp.increment("bar", un, &rng); + tt += crp.increment("bar", un, &rng); + tt += crp.increment("bar", un, &rng); + tt += crp.increment("bar", un, &rng); + tt += crp.increment("bar", un, &rng); + tt += crp.increment("bar", un, &rng); + tt += crp.increment("bar", un, &rng); + cout << "tt=" << tt << endl; + cout << crp << endl; + cout << " P(bar)=" << crp.prob("bar", un) << endl; + cout << " P(hi)=" << crp.prob("hi", un) << endl; + cout << " P(baz)=" << crp.prob("baz", un) << endl; + cout << " P(foo)=" << crp.prob("foo", un) << endl; + double x = crp.prob("bar", un) + crp.prob("hi", un) + crp.prob("baz", un) + crp.prob("foo", un); + cout << " tot=" << x << endl; + EXPECT_FLOAT_EQ(1.0, x); + tt += crp.decrement("hi", &rng); + tt += crp.decrement("bar", &rng); + cout << crp << endl; + tt += crp.decrement("bar", &rng); + cout << crp << endl; + cout << "tt=" << tt << endl; +} + +TEST_F(CRPTest, Exchangability) { + double tot = 0; + double xt = 0; + CCRP crp(0.5, 1.0); + int cust = 10; + vector hist(cust + 1, 0); + for (int i = 0; i < cust; ++i) { crp.increment(1, 1.0, &rng); } + const int samples = 100000; + const bool simulate = true; + for (int k = 0; k < samples; ++k) { + if (!simulate) { + crp.clear(); + for (int i = 0; i < cust; ++i) { crp.increment(1, 1.0, &rng); } + } else { + int da = rng.next() * cust; + bool a = rng.next() < 0.5; + if (a) { + for (int i = 0; i < da; ++i) { crp.increment(1, 1.0, &rng); } + for (int i = 0; i < da; ++i) { crp.decrement(1, &rng); } + xt += 1.0; + } else { + for (int i = 0; i < da; ++i) { crp.decrement(1, &rng); } + for (int i = 0; i < da; ++i) { crp.increment(1, 1.0, &rng); } + } + } + int c = crp.num_tables(1); + ++hist[c]; + tot += c; + } + EXPECT_EQ(cust, crp.num_customers()); + cerr << "P(a) = " << (xt / samples) << endl; + cerr << "E[num tables] = " << (tot / samples) << endl; + double error = fabs((tot / samples) - 5.4); + cerr << " error = " << error << endl; + EXPECT_LT(error, 0.1); // it's possible for this to fail, but + // very, very unlikely + for (int i = 1; i <= cust; ++i) + cerr << i << ' ' << (hist[i]) << endl; +} + +TEST_F(CRPTest, LP) { + CCRP crp(1,1,1,1,0.1,50.0); + crp.increment("foo", 1.0, &rng); + cerr << crp.log_crp_prob() << endl; +} + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} -- cgit v1.2.3 From 15170746be2fc718e8fb026b4468e33cf0c63170 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sun, 4 Mar 2012 21:41:07 -0500 Subject: clean up crp --- utils/Makefile.am | 2 +- utils/ccrp.h | 10 +++++----- utils/fdict.h | 6 ++++-- 3 files changed, 10 insertions(+), 8 deletions(-) (limited to 'utils') diff --git a/utils/Makefile.am b/utils/Makefile.am index 5153ae20..3ea21835 100644 --- a/utils/Makefile.am +++ b/utils/Makefile.am @@ -51,7 +51,7 @@ mfcr_test_SOURCES = mfcr_test.cc mfcr_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) weights_test_SOURCES = weights_test.cc weights_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) -crp_test_SOURCES = weights_test.cc +crp_test_SOURCES = crp_test.cc crp_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) logval_test_SOURCES = logval_test.cc logval_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) diff --git a/utils/ccrp.h b/utils/ccrp.h index 61ab5576..68769635 100644 --- a/utils/ccrp.h +++ b/utils/ccrp.h @@ -242,10 +242,10 @@ class CCRP { void resample_hyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) { assert(has_discount_prior() || has_alpha_prior()); DiscountResampler dr(*this); - ConcentrationResampler cr(*this); + StrengthResampler sr(*this); for (int iter = 0; iter < nloop; ++iter) { if (has_alpha_prior()) { - alpha_ = slice_sampler1d(cr, alpha_, *rng, 0.0, + alpha_ = slice_sampler1d(sr, alpha_, *rng, 0.0, std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); } if (has_discount_prior()) { @@ -253,7 +253,7 @@ class CCRP { 1.0, 0.0, niterations, 100*niterations); } } - alpha_ = slice_sampler1d(cr, alpha_, *rng, 0.0, + alpha_ = slice_sampler1d(sr, alpha_, *rng, 0.0, std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); } @@ -265,8 +265,8 @@ class CCRP { } }; - struct ConcentrationResampler { - ConcentrationResampler(const CCRP& crp) : crp_(crp) {} + struct StrengthResampler { + StrengthResampler(const CCRP& crp) : crp_(crp) {} const CCRP& crp_; double operator()(const double& proposed_alpha) const { return crp_.log_crp_prob(crp_.discount_, proposed_alpha); diff --git a/utils/fdict.h b/utils/fdict.h index f0871b9a..0a2a9456 100644 --- a/utils/fdict.h +++ b/utils/fdict.h @@ -10,7 +10,7 @@ #ifdef HAVE_CMPH #include "perfect_hash.h" -#include "string_to.h" +#include #endif struct FD { @@ -49,7 +49,9 @@ struct FD { #ifdef HAVE_CMPH if (hash_) { static std::string tls; - tls = to_string(w); + std::ostringstream os; + os << w; + tls = os.str(); return tls; } #endif -- cgit v1.2.3 From 0c4ffecf6ccad06b426463d8edc5e0c50935b9c9 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sun, 4 Mar 2012 23:15:51 -0500 Subject: support full range of hyperparameter values for PYP (including strength <= 0) --- utils/ccrp.h | 68 ++++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 43 insertions(+), 25 deletions(-) (limited to 'utils') diff --git a/utils/ccrp.h b/utils/ccrp.h index 68769635..c883c027 100644 --- a/utils/ccrp.h +++ b/utils/ccrp.h @@ -19,29 +19,44 @@ template > class CCRP { public: CCRP(double disc, double alpha) : - num_tables_(), - num_customers_(), - discount_(disc), - alpha_(alpha), - discount_prior_alpha_(std::numeric_limits::quiet_NaN()), - discount_prior_beta_(std::numeric_limits::quiet_NaN()), - alpha_prior_shape_(std::numeric_limits::quiet_NaN()), - alpha_prior_rate_(std::numeric_limits::quiet_NaN()) {} + num_tables_(), + num_customers_(), + discount_(disc), + alpha_(alpha), + discount_prior_alpha_(std::numeric_limits::quiet_NaN()), + discount_prior_beta_(std::numeric_limits::quiet_NaN()), + alpha_prior_shape_(std::numeric_limits::quiet_NaN()), + alpha_prior_rate_(std::numeric_limits::quiet_NaN()) { + check_hyperparameters(); + } CCRP(double d_alpha, double d_beta, double c_shape, double c_rate, double d = 0.9, double c = 1.0) : - num_tables_(), - num_customers_(), - discount_(d), - alpha_(c), - discount_prior_alpha_(d_alpha), - discount_prior_beta_(d_beta), - alpha_prior_shape_(c_shape), - alpha_prior_rate_(c_rate) {} + num_tables_(), + num_customers_(), + discount_(d), + alpha_(c), + discount_prior_alpha_(d_alpha), + discount_prior_beta_(d_beta), + alpha_prior_shape_(c_shape), + alpha_prior_rate_(c_rate) { + check_hyperparameters(); + } + + void check_hyperparameters() { + if (discount_ < 0.0 || discount_ >= 1.0) { + std::cerr << "Bad discount: " << discount_ << std::endl; + abort(); + } + if (alpha_ <= -discount_) { + std::cerr << "Bad strength: " << alpha_ << " (discount=" << discount_ << ")" << std::endl; + abort(); + } + } double discount() const { return discount_; } double alpha() const { return alpha_; } - void set_discount(double d) { discount_ = d; } - void set_alpha(double a) { alpha_ = a; } + void set_discount(double d) { discount_ = d; check_hyperparameters(); } + void set_alpha(double a) { alpha_ = a; check_hyperparameters(); } bool has_discount_prior() const { return !std::isnan(discount_prior_alpha_); @@ -215,14 +230,15 @@ class CCRP { if (has_discount_prior()) lp = Md::log_beta_density(discount, discount_prior_alpha_, discount_prior_beta_); if (has_alpha_prior()) - lp += Md::log_gamma_density(alpha, alpha_prior_shape_, alpha_prior_rate_); + lp += Md::log_gamma_density(alpha + discount, alpha_prior_shape_, alpha_prior_rate_); assert(lp <= 0.0); if (num_customers_) { if (discount > 0.0) { const double r = lgamma(1.0 - discount); - lp += lgamma(alpha) - lgamma(alpha + num_customers_) - + num_tables_ * log(discount) + lgamma(alpha / discount + num_tables_) - - lgamma(alpha / discount); + if (alpha) + lp += lgamma(alpha) - lgamma(alpha / discount); + lp += - lgamma(alpha + num_customers_) + + num_tables_ * log(discount) + lgamma(alpha / discount + num_tables_); assert(std::isfinite(lp)); for (typename std::tr1::unordered_map::const_iterator it = dish_locs_.begin(); it != dish_locs_.end(); ++it) { @@ -245,15 +261,17 @@ class CCRP { StrengthResampler sr(*this); for (int iter = 0; iter < nloop; ++iter) { if (has_alpha_prior()) { - alpha_ = slice_sampler1d(sr, alpha_, *rng, 0.0, + alpha_ = slice_sampler1d(sr, alpha_, *rng, -discount_, std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); } if (has_discount_prior()) { - discount_ = slice_sampler1d(dr, discount_, *rng, std::numeric_limits::min(), + double min_discount = std::numeric_limits::min(); + if (alpha_ < 0.0) min_discount = -alpha_; + discount_ = slice_sampler1d(dr, discount_, *rng, min_discount, 1.0, 0.0, niterations, 100*niterations); } } - alpha_ = slice_sampler1d(sr, alpha_, *rng, 0.0, + alpha_ = slice_sampler1d(sr, alpha_, *rng, -discount_, std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); } -- cgit v1.2.3 From ce58cb44771a5194b71682d1602abe2fef9e6f13 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Mon, 5 Mar 2012 14:51:04 -0500 Subject: support strength=0 PYPs, final notation clean-up --- gi/pf/align-lexonly-pyp.cc | 2 +- gi/pf/conditional_pseg.h | 2 +- gi/pf/learn_cfg.cc | 4 +- gi/pf/pyp_lm.cc | 22 ++++----- phrasinator/gibbs_train_plm.cc | 2 +- utils/ccrp.h | 106 ++++++++++++++++++++++------------------- utils/mfcr.h | 105 ++++++++++++++++++++++------------------ 7 files changed, 131 insertions(+), 112 deletions(-) (limited to 'utils') diff --git a/gi/pf/align-lexonly-pyp.cc b/gi/pf/align-lexonly-pyp.cc index 4ce7cf62..87f7f6b5 100644 --- a/gi/pf/align-lexonly-pyp.cc +++ b/gi/pf/align-lexonly-pyp.cc @@ -104,7 +104,7 @@ struct HierarchicalWordBase { } void Summary() const { - cerr << "NUMBER OF CUSTOMERS: " << r.num_customers() << " (d=" << r.discount() << ",\\alpha=" << r.alpha() << ')' << endl; + cerr << "NUMBER OF CUSTOMERS: " << r.num_customers() << " (d=" << r.discount() << ",s=" << r.strength() << ')' << endl; for (MFCR >::const_iterator it = r.begin(); it != r.end(); ++it) cerr << " " << it->second.total_dish_count_ << " (on " << it->second.table_counts_.size() << " tables)" << TD::GetString(it->first) << endl; } diff --git a/gi/pf/conditional_pseg.h b/gi/pf/conditional_pseg.h index f9841cbf..86403d8d 100644 --- a/gi/pf/conditional_pseg.h +++ b/gi/pf/conditional_pseg.h @@ -22,7 +22,7 @@ struct MConditionalTranslationModel { void Summary() const { std::cerr << "Number of conditioning contexts: " << r.size() << std::endl; for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) { - std::cerr << TD::GetString(it->first) << " \t(d=" << it->second.discount() << ",\\alpha = " << it->second.alpha() << ") --------------------------" << std::endl; + std::cerr << TD::GetString(it->first) << " \t(d=" << it->second.discount() << ",s=" << it->second.strength() << ") --------------------------" << std::endl; for (MFCR::const_iterator i2 = it->second.begin(); i2 != it->second.end(); ++i2) std::cerr << " " << -1 << '\t' << i2->first << std::endl; } diff --git a/gi/pf/learn_cfg.cc b/gi/pf/learn_cfg.cc index 5b748311..bf157828 100644 --- a/gi/pf/learn_cfg.cc +++ b/gi/pf/learn_cfg.cc @@ -183,9 +183,9 @@ struct HieroLMModel { nts[i].resample_hyperparameters(rng); if (kHIERARCHICAL_PRIOR) { q0.resample_hyperparameters(rng); - cerr << "[base d=" << q0.discount() << ", alpha=" << q0.alpha() << "]"; + cerr << "[base d=" << q0.discount() << ", s=" << q0.strength() << "]"; } - cerr << " d=" << nts[0].discount() << ", alpha=" << nts[0].alpha() << endl; + cerr << " d=" << nts[0].discount() << ", s=" << nts[0].strength() << endl; } const BaseRuleModel base; diff --git a/gi/pf/pyp_lm.cc b/gi/pf/pyp_lm.cc index e5c44c8b..7ebada13 100644 --- a/gi/pf/pyp_lm.cc +++ b/gi/pf/pyp_lm.cc @@ -78,14 +78,14 @@ template struct PYPLM { backoff(vs, da, db, ss, sr), discount_a(da), discount_b(db), strength_s(ss), strength_r(sr), - d(0.8), alpha(1.0), lookup(N-1) {} + d(0.8), strength(1.0), lookup(N-1) {} void increment(WordID w, const vector& context, MT19937* rng) { const double bo = backoff.prob(w, context); for (unsigned i = 0; i < N-1; ++i) lookup[i] = context[context.size() - 1 - i]; typename unordered_map, CCRP, boost::hash > >::iterator it = p.find(lookup); if (it == p.end()) - it = p.insert(make_pair(lookup, CCRP(d,alpha))).first; + it = p.insert(make_pair(lookup, CCRP(d,strength))).first; if (it->second.increment(w, bo, rng)) backoff.increment(w, context, rng); } @@ -107,7 +107,7 @@ template struct PYPLM { } double log_likelihood() const { - return log_likelihood(d, alpha) + backoff.log_likelihood(); + return log_likelihood(d, strength) + backoff.log_likelihood(); } double log_likelihood(const double& dd, const double& aa) const { @@ -125,15 +125,15 @@ template struct PYPLM { DiscountResampler(const PYPLM& m) : m_(m) {} const PYPLM& m_; double operator()(const double& proposed_discount) const { - return m_.log_likelihood(proposed_discount, m_.alpha); + return m_.log_likelihood(proposed_discount, m_.strength); } }; struct AlphaResampler { AlphaResampler(const PYPLM& m) : m_(m) {} const PYPLM& m_; - double operator()(const double& proposed_alpha) const { - return m_.log_likelihood(m_.d, proposed_alpha); + double operator()(const double& proposed_strength) const { + return m_.log_likelihood(m_.d, proposed_strength); } }; @@ -141,25 +141,25 @@ template struct PYPLM { DiscountResampler dr(*this); AlphaResampler ar(*this); for (int iter = 0; iter < nloop; ++iter) { - alpha = slice_sampler1d(ar, alpha, *rng, 0.0, + strength = slice_sampler1d(ar, strength, *rng, 0.0, std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); d = slice_sampler1d(dr, d, *rng, std::numeric_limits::min(), 1.0, 0.0, niterations, 100*niterations); } - alpha = slice_sampler1d(ar, alpha, *rng, 0.0, + strength = slice_sampler1d(ar, strength, *rng, 0.0, std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); typename unordered_map, CCRP, boost::hash > >::iterator it; - cerr << "PYPLM<" << N << ">(d=" << d << ",a=" << alpha << ") = " << log_likelihood(d, alpha) << endl; + cerr << "PYPLM<" << N << ">(d=" << d << ",a=" << strength << ") = " << log_likelihood(d, strength) << endl; for (it = p.begin(); it != p.end(); ++it) { it->second.set_discount(d); - it->second.set_alpha(alpha); + it->second.set_strength(strength); } backoff.resample_hyperparameters(rng, nloop, niterations); } PYPLM backoff; double discount_a, discount_b, strength_s, strength_r; - double d, alpha; + double d, strength; mutable vector lookup; // thread-local unordered_map, CCRP, boost::hash > > p; }; diff --git a/phrasinator/gibbs_train_plm.cc b/phrasinator/gibbs_train_plm.cc index 54861dcb..3b99e1b6 100644 --- a/phrasinator/gibbs_train_plm.cc +++ b/phrasinator/gibbs_train_plm.cc @@ -252,7 +252,7 @@ struct UniphraseLM { void ResampleHyperparameters(MT19937* rng) { phrases_.resample_hyperparameters(rng); gen_.resample_hyperparameters(rng); - cerr << " d=" << phrases_.discount() << ",a=" << phrases_.alpha(); + cerr << " d=" << phrases_.discount() << ",s=" << phrases_.strength(); } CCRP > phrases_; diff --git a/utils/ccrp.h b/utils/ccrp.h index c883c027..5f9db7a6 100644 --- a/utils/ccrp.h +++ b/utils/ccrp.h @@ -18,27 +18,27 @@ template > class CCRP { public: - CCRP(double disc, double alpha) : + CCRP(double disc, double strength) : num_tables_(), num_customers_(), discount_(disc), - alpha_(alpha), - discount_prior_alpha_(std::numeric_limits::quiet_NaN()), + strength_(strength), + discount_prior_strength_(std::numeric_limits::quiet_NaN()), discount_prior_beta_(std::numeric_limits::quiet_NaN()), - alpha_prior_shape_(std::numeric_limits::quiet_NaN()), - alpha_prior_rate_(std::numeric_limits::quiet_NaN()) { + strength_prior_shape_(std::numeric_limits::quiet_NaN()), + strength_prior_rate_(std::numeric_limits::quiet_NaN()) { check_hyperparameters(); } - CCRP(double d_alpha, double d_beta, double c_shape, double c_rate, double d = 0.9, double c = 1.0) : + CCRP(double d_strength, double d_beta, double c_shape, double c_rate, double d = 0.9, double c = 1.0) : num_tables_(), num_customers_(), discount_(d), - alpha_(c), - discount_prior_alpha_(d_alpha), + strength_(c), + discount_prior_strength_(d_strength), discount_prior_beta_(d_beta), - alpha_prior_shape_(c_shape), - alpha_prior_rate_(c_rate) { + strength_prior_shape_(c_shape), + strength_prior_rate_(c_rate) { check_hyperparameters(); } @@ -47,23 +47,23 @@ class CCRP { std::cerr << "Bad discount: " << discount_ << std::endl; abort(); } - if (alpha_ <= -discount_) { - std::cerr << "Bad strength: " << alpha_ << " (discount=" << discount_ << ")" << std::endl; + if (strength_ <= -discount_) { + std::cerr << "Bad strength: " << strength_ << " (discount=" << discount_ << ")" << std::endl; abort(); } } double discount() const { return discount_; } - double alpha() const { return alpha_; } + double strength() const { return strength_; } void set_discount(double d) { discount_ = d; check_hyperparameters(); } - void set_alpha(double a) { alpha_ = a; check_hyperparameters(); } + void set_strength(double a) { strength_ = a; check_hyperparameters(); } bool has_discount_prior() const { - return !std::isnan(discount_prior_alpha_); + return !std::isnan(discount_prior_strength_); } - bool has_alpha_prior() const { - return !std::isnan(alpha_prior_shape_); + bool has_strength_prior() const { + return !std::isnan(strength_prior_shape_); } void clear() { @@ -97,7 +97,7 @@ class CCRP { DishLocations& loc = dish_locs_[dish]; bool share_table = false; if (loc.total_dish_count_) { - const double p_empty = (alpha_ + num_tables_ * discount_) * p0; + const double p_empty = (strength_ + num_tables_ * discount_) * p0; const double p_share = (loc.total_dish_count_ - loc.table_counts_.size() * discount_); share_table = rng->SelectSample(p_empty, p_share); } @@ -131,7 +131,7 @@ class CCRP { DishLocations& loc = dish_locs_[dish]; bool share_table = false; if (loc.total_dish_count_) { - const T p_empty = T(alpha_ + num_tables_ * discount_) * p0; + const T p_empty = T(strength_ + num_tables_ * discount_) * p0; const T p_share = T(loc.total_dish_count_ - loc.table_counts_.size() * discount_); share_table = rng->SelectSample(p_empty, p_share); } @@ -198,47 +198,47 @@ class CCRP { double prob(const Dish& dish, const double& p0) const { const typename std::tr1::unordered_map::const_iterator it = dish_locs_.find(dish); - const double r = num_tables_ * discount_ + alpha_; + const double r = num_tables_ * discount_ + strength_; if (it == dish_locs_.end()) { - return r * p0 / (num_customers_ + alpha_); + return r * p0 / (num_customers_ + strength_); } else { return (it->second.total_dish_count_ - discount_ * it->second.table_counts_.size() + r * p0) / - (num_customers_ + alpha_); + (num_customers_ + strength_); } } template T probT(const Dish& dish, const T& p0) const { const typename std::tr1::unordered_map::const_iterator it = dish_locs_.find(dish); - const T r = T(num_tables_ * discount_ + alpha_); + const T r = T(num_tables_ * discount_ + strength_); if (it == dish_locs_.end()) { - return r * p0 / T(num_customers_ + alpha_); + return r * p0 / T(num_customers_ + strength_); } else { return (T(it->second.total_dish_count_ - discount_ * it->second.table_counts_.size()) + r * p0) / - T(num_customers_ + alpha_); + T(num_customers_ + strength_); } } double log_crp_prob() const { - return log_crp_prob(discount_, alpha_); + return log_crp_prob(discount_, strength_); } // taken from http://en.wikipedia.org/wiki/Chinese_restaurant_process // does not include P_0's - double log_crp_prob(const double& discount, const double& alpha) const { + double log_crp_prob(const double& discount, const double& strength) const { double lp = 0.0; if (has_discount_prior()) - lp = Md::log_beta_density(discount, discount_prior_alpha_, discount_prior_beta_); - if (has_alpha_prior()) - lp += Md::log_gamma_density(alpha + discount, alpha_prior_shape_, alpha_prior_rate_); + lp = Md::log_beta_density(discount, discount_prior_strength_, discount_prior_beta_); + if (has_strength_prior()) + lp += Md::log_gamma_density(strength + discount, strength_prior_shape_, strength_prior_rate_); assert(lp <= 0.0); if (num_customers_) { if (discount > 0.0) { const double r = lgamma(1.0 - discount); - if (alpha) - lp += lgamma(alpha) - lgamma(alpha / discount); - lp += - lgamma(alpha + num_customers_) - + num_tables_ * log(discount) + lgamma(alpha / discount + num_tables_); + if (strength) + lp += lgamma(strength) - lgamma(strength / discount); + lp += - lgamma(strength + num_customers_) + + num_tables_ * log(discount) + lgamma(strength / discount + num_tables_); assert(std::isfinite(lp)); for (typename std::tr1::unordered_map::const_iterator it = dish_locs_.begin(); it != dish_locs_.end(); ++it) { @@ -247,8 +247,16 @@ class CCRP { lp += lgamma(*ti - discount) - r; } } + } else if (!discount) { // discount == 0.0 + lp += lgamma(strength) + num_tables_ * log(strength) - lgamma(strength + num_tables_); + assert(std::isfinite(lp)); + for (typename std::tr1::unordered_map::const_iterator it = dish_locs_.begin(); + it != dish_locs_.end(); ++it) { + const DishLocations& cur = it->second; + lp += lgamma(cur.table_counts_.size()); + } } else { - assert(!"not implemented yet"); + assert(!"discount less than 0 detected!"); } } assert(std::isfinite(lp)); @@ -256,22 +264,22 @@ class CCRP { } void resample_hyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) { - assert(has_discount_prior() || has_alpha_prior()); + assert(has_discount_prior() || has_strength_prior()); DiscountResampler dr(*this); StrengthResampler sr(*this); for (int iter = 0; iter < nloop; ++iter) { - if (has_alpha_prior()) { - alpha_ = slice_sampler1d(sr, alpha_, *rng, -discount_, + if (has_strength_prior()) { + strength_ = slice_sampler1d(sr, strength_, *rng, -discount_, std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); } if (has_discount_prior()) { double min_discount = std::numeric_limits::min(); - if (alpha_ < 0.0) min_discount = -alpha_; + if (strength_ < 0.0) min_discount = -strength_; discount_ = slice_sampler1d(dr, discount_, *rng, min_discount, 1.0, 0.0, niterations, 100*niterations); } } - alpha_ = slice_sampler1d(sr, alpha_, *rng, -discount_, + strength_ = slice_sampler1d(sr, strength_, *rng, -discount_, std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); } @@ -279,15 +287,15 @@ class CCRP { DiscountResampler(const CCRP& crp) : crp_(crp) {} const CCRP& crp_; double operator()(const double& proposed_discount) const { - return crp_.log_crp_prob(proposed_discount, crp_.alpha_); + return crp_.log_crp_prob(proposed_discount, crp_.strength_); } }; struct StrengthResampler { StrengthResampler(const CCRP& crp) : crp_(crp) {} const CCRP& crp_; - double operator()(const double& proposed_alpha) const { - return crp_.log_crp_prob(crp_.discount_, proposed_alpha); + double operator()(const double& proposed_strength) const { + return crp_.log_crp_prob(crp_.discount_, proposed_strength); } }; @@ -299,7 +307,7 @@ class CCRP { }; void Print(std::ostream* out) const { - std::cerr << "PYP(d=" << discount_ << ",c=" << alpha_ << ") customers=" << num_customers_ << std::endl; + std::cerr << "PYP(d=" << discount_ << ",c=" << strength_ << ") customers=" << num_customers_ << std::endl; for (typename std::tr1::unordered_map::const_iterator it = dish_locs_.begin(); it != dish_locs_.end(); ++it) { (*out) << it->first << " (" << it->second.total_dish_count_ << " on " << it->second.table_counts_.size() << " tables): "; @@ -324,15 +332,15 @@ class CCRP { std::tr1::unordered_map dish_locs_; double discount_; - double alpha_; + double strength_; // optional beta prior on discount_ (NaN if no prior) - double discount_prior_alpha_; + double discount_prior_strength_; double discount_prior_beta_; - // optional gamma prior on alpha_ (NaN if no prior) - double alpha_prior_shape_; - double alpha_prior_rate_; + // optional gamma prior on strength_ (NaN if no prior) + double strength_prior_shape_; + double strength_prior_rate_; }; template diff --git a/utils/mfcr.h b/utils/mfcr.h index df988f51..aeaf599d 100644 --- a/utils/mfcr.h +++ b/utils/mfcr.h @@ -39,37 +39,37 @@ template > class MFCR { public: - MFCR(unsigned num_floors, double d, double alpha) : + MFCR(unsigned num_floors, double d, double strength) : num_floors_(num_floors), num_tables_(), num_customers_(), discount_(d), - alpha_(alpha), - discount_prior_alpha_(std::numeric_limits::quiet_NaN()), + strength_(strength), + discount_prior_strength_(std::numeric_limits::quiet_NaN()), discount_prior_beta_(std::numeric_limits::quiet_NaN()), - alpha_prior_shape_(std::numeric_limits::quiet_NaN()), - alpha_prior_rate_(std::numeric_limits::quiet_NaN()) {} + strength_prior_shape_(std::numeric_limits::quiet_NaN()), + strength_prior_rate_(std::numeric_limits::quiet_NaN()) {} - MFCR(unsigned num_floors, double discount_alpha, double discount_beta, double alpha_shape, double alpha_rate, double d = 0.9, double alpha = 10.0) : + MFCR(unsigned num_floors, double discount_strength, double discount_beta, double strength_shape, double strength_rate, double d = 0.9, double strength = 10.0) : num_floors_(num_floors), num_tables_(), num_customers_(), discount_(d), - alpha_(alpha), - discount_prior_alpha_(discount_alpha), + strength_(strength), + discount_prior_strength_(discount_strength), discount_prior_beta_(discount_beta), - alpha_prior_shape_(alpha_shape), - alpha_prior_rate_(alpha_rate) {} + strength_prior_shape_(strength_shape), + strength_prior_rate_(strength_rate) {} double discount() const { return discount_; } - double alpha() const { return alpha_; } + double strength() const { return strength_; } bool has_discount_prior() const { - return !std::isnan(discount_prior_alpha_); + return !std::isnan(discount_prior_strength_); } - bool has_alpha_prior() const { - return !std::isnan(alpha_prior_shape_); + bool has_strength_prior() const { + return !std::isnan(strength_prior_shape_); } void clear() { @@ -122,7 +122,7 @@ class MFCR { int floor = -1; bool share_table = false; if (loc.total_dish_count_) { - const double p_empty = (alpha_ + num_tables_ * discount_) * marg_p0; + const double p_empty = (strength_ + num_tables_ * discount_) * marg_p0; const double p_share = (loc.total_dish_count_ - loc.table_counts_.size() * discount_); share_table = rng->SelectSample(p_empty, p_share); } @@ -206,44 +206,53 @@ class MFCR { const double marg_p0 = std::inner_product(p0s.begin(), p0s.end(), lambdas.begin(), 0.0); assert(marg_p0 <= 1.0); const typename std::tr1::unordered_map::const_iterator it = dish_locs_.find(dish); - const double r = num_tables_ * discount_ + alpha_; + const double r = num_tables_ * discount_ + strength_; if (it == dish_locs_.end()) { - return r * marg_p0 / (num_customers_ + alpha_); + return r * marg_p0 / (num_customers_ + strength_); } else { return (it->second.total_dish_count_ - discount_ * it->second.table_counts_.size() + r * marg_p0) / - (num_customers_ + alpha_); + (num_customers_ + strength_); } } double log_crp_prob() const { - return log_crp_prob(discount_, alpha_); + return log_crp_prob(discount_, strength_); } // taken from http://en.wikipedia.org/wiki/Chinese_restaurant_process // does not include draws from G_w's - double log_crp_prob(const double& d, const double& alpha) const { + double log_crp_prob(const double& discount, const double& strength) const { double lp = 0.0; if (has_discount_prior()) - lp = Md::log_beta_density(d, discount_prior_alpha_, discount_prior_beta_); - if (has_alpha_prior()) - lp += Md::log_gamma_density(alpha, alpha_prior_shape_, alpha_prior_rate_); + lp = Md::log_beta_density(discount, discount_prior_strength_, discount_prior_beta_); + if (has_strength_prior()) + lp += Md::log_gamma_density(strength + discount, strength_prior_shape_, strength_prior_rate_); assert(lp <= 0.0); if (num_customers_) { - if (d > 0.0) { - const double r = lgamma(1.0 - d); - lp += lgamma(alpha) - lgamma(alpha + num_customers_) - + num_tables_ * log(d) + lgamma(alpha / d + num_tables_) - - lgamma(alpha / d); + if (discount > 0.0) { + const double r = lgamma(1.0 - discount); + if (strength) + lp += lgamma(strength) - lgamma(strength / discount); + lp += - lgamma(strength + num_customers_) + + num_tables_ * log(discount) + lgamma(strength / discount + num_tables_); assert(std::isfinite(lp)); for (typename std::tr1::unordered_map::const_iterator it = dish_locs_.begin(); it != dish_locs_.end(); ++it) { const DishLocations& cur = it->second; for (std::list::const_iterator ti = cur.table_counts_.begin(); ti != cur.table_counts_.end(); ++ti) { - lp += lgamma(ti->count - d) - r; + lp += lgamma(ti->count - discount) - r; } } + } else if (!discount) { // discount == 0.0 + lp += lgamma(strength) + num_tables_ * log(strength) - lgamma(strength + num_tables_); + assert(std::isfinite(lp)); + for (typename std::tr1::unordered_map::const_iterator it = dish_locs_.begin(); + it != dish_locs_.end(); ++it) { + const DishLocations& cur = it->second; + lp += lgamma(cur.table_counts_.size()); + } } else { - assert(!"not implemented yet"); + assert(!"discount less than 0 detected!"); } } assert(std::isfinite(lp)); @@ -251,20 +260,22 @@ class MFCR { } void resample_hyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) { - assert(has_discount_prior() || has_alpha_prior()); + assert(has_discount_prior() || has_strength_prior()); DiscountResampler dr(*this); - ConcentrationResampler cr(*this); + StrengthResampler sr(*this); for (int iter = 0; iter < nloop; ++iter) { - if (has_alpha_prior()) { - alpha_ = slice_sampler1d(cr, alpha_, *rng, 0.0, + if (has_strength_prior()) { + strength_ = slice_sampler1d(sr, strength_, *rng, -discount_, std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); } if (has_discount_prior()) { - discount_ = slice_sampler1d(dr, discount_, *rng, std::numeric_limits::min(), + double min_discount = std::numeric_limits::min(); + if (strength_ < 0.0) min_discount = -strength_; + discount_ = slice_sampler1d(dr, discount_, *rng, min_discount, 1.0, 0.0, niterations, 100*niterations); } } - alpha_ = slice_sampler1d(cr, alpha_, *rng, 0.0, + strength_ = slice_sampler1d(sr, strength_, *rng, -discount_, std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); } @@ -272,15 +283,15 @@ class MFCR { DiscountResampler(const MFCR& crp) : crp_(crp) {} const MFCR& crp_; double operator()(const double& proposed_d) const { - return crp_.log_crp_prob(proposed_d, crp_.alpha_); + return crp_.log_crp_prob(proposed_d, crp_.strength_); } }; - struct ConcentrationResampler { - ConcentrationResampler(const MFCR& crp) : crp_(crp) {} + struct StrengthResampler { + StrengthResampler(const MFCR& crp) : crp_(crp) {} const MFCR& crp_; - double operator()(const double& proposediscount_alpha) const { - return crp_.log_crp_prob(crp_.discount_, proposediscount_alpha); + double operator()(const double& proposediscount_strength) const { + return crp_.log_crp_prob(crp_.discount_, proposediscount_strength); } }; @@ -292,7 +303,7 @@ class MFCR { }; void Print(std::ostream* out) const { - (*out) << "MFCR(d=" << discount_ << ",alpha=" << alpha_ << ") customers=" << num_customers_ << std::endl; + (*out) << "MFCR(d=" << discount_ << ",strength=" << strength_ << ") customers=" << num_customers_ << std::endl; for (typename std::tr1::unordered_map::const_iterator it = dish_locs_.begin(); it != dish_locs_.end(); ++it) { (*out) << it->first << " (" << it->second.total_dish_count_ << " on " << it->second.table_counts_.size() << " tables): "; @@ -318,15 +329,15 @@ class MFCR { std::tr1::unordered_map dish_locs_; double discount_; - double alpha_; + double strength_; // optional beta prior on discount_ (NaN if no prior) - double discount_prior_alpha_; + double discount_prior_strength_; double discount_prior_beta_; - // optional gamma prior on alpha_ (NaN if no prior) - double alpha_prior_shape_; - double alpha_prior_rate_; + // optional gamma prior on strength_ (NaN if no prior) + double strength_prior_shape_; + double strength_prior_rate_; }; template -- cgit v1.2.3 From 2048ac9943e2695a75b5f0303ca869e66ee32202 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Mon, 5 Mar 2012 16:06:45 -0500 Subject: use template parameter inference to figure out what type to use for probability computations, templatatize number of floors in MFCR rather than compile-time set --- gi/pf/align-lexonly-pyp.cc | 20 +++++++------- gi/pf/conditional_pseg.h | 22 +++++++-------- gi/pf/learn_cfg.cc | 8 +++--- utils/ccrp.h | 48 ++------------------------------ utils/mfcr.h | 68 ++++++++++++++++++++++++---------------------- utils/mfcr_test.cc | 10 +++---- 6 files changed, 68 insertions(+), 108 deletions(-) (limited to 'utils') diff --git a/gi/pf/align-lexonly-pyp.cc b/gi/pf/align-lexonly-pyp.cc index 87f7f6b5..ac0590e0 100644 --- a/gi/pf/align-lexonly-pyp.cc +++ b/gi/pf/align-lexonly-pyp.cc @@ -68,7 +68,7 @@ struct AlignedSentencePair { struct HierarchicalWordBase { explicit HierarchicalWordBase(const unsigned vocab_e_size) : - base(prob_t::One()), r(1,1,1,25,25), u0(-log(vocab_e_size)), l(1,1.0), v(1, 0.0) {} + base(prob_t::One()), r(1,1,1,1), u0(-log(vocab_e_size)), l(1,prob_t::One()), v(1, prob_t::Zero()) {} void ResampleHyperparameters(MT19937* rng) { r.resample_hyperparameters(rng); @@ -80,14 +80,14 @@ struct HierarchicalWordBase { // return p0 of rule.e_ prob_t operator()(const TRule& rule) const { - v[0] = exp(logp0(rule.e_)); - return prob_t(r.prob(rule.e_, v, l)); + v[0].logeq(logp0(rule.e_)); + return r.prob(rule.e_, v.begin(), l.begin()); } void Increment(const TRule& rule) { - v[0] = exp(logp0(rule.e_)); - if (r.increment(rule.e_, v, l, &*prng).count) { - base *= prob_t(v[0] * l[0]); + v[0].logeq(logp0(rule.e_)); + if (r.increment(rule.e_, v.begin(), l.begin(), &*prng).count) { + base *= v[0] * l[0]; } } @@ -105,15 +105,15 @@ struct HierarchicalWordBase { void Summary() const { cerr << "NUMBER OF CUSTOMERS: " << r.num_customers() << " (d=" << r.discount() << ",s=" << r.strength() << ')' << endl; - for (MFCR >::const_iterator it = r.begin(); it != r.end(); ++it) + for (MFCR<1,vector >::const_iterator it = r.begin(); it != r.end(); ++it) cerr << " " << it->second.total_dish_count_ << " (on " << it->second.table_counts_.size() << " tables)" << TD::GetString(it->first) << endl; } prob_t base; - MFCR > r; + MFCR<1,vector > r; const double u0; - const vector l; - mutable vector v; + const vector l; + mutable vector v; }; struct BasicLexicalAlignment { diff --git a/gi/pf/conditional_pseg.h b/gi/pf/conditional_pseg.h index 86403d8d..ef73e332 100644 --- a/gi/pf/conditional_pseg.h +++ b/gi/pf/conditional_pseg.h @@ -17,13 +17,13 @@ template struct MConditionalTranslationModel { explicit MConditionalTranslationModel(ConditionalBaseMeasure& rcp0) : - rp0(rcp0), lambdas(1, 1.0), p0s(1) {} + rp0(rcp0), lambdas(1, prob_t::One()), p0s(1) {} void Summary() const { std::cerr << "Number of conditioning contexts: " << r.size() << std::endl; for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) { std::cerr << TD::GetString(it->first) << " \t(d=" << it->second.discount() << ",s=" << it->second.strength() << ") --------------------------" << std::endl; - for (MFCR::const_iterator i2 = it->second.begin(); i2 != it->second.end(); ++i2) + for (MFCR<1,TRule>::const_iterator i2 = it->second.begin(); i2 != it->second.end(); ++i2) std::cerr << " " << -1 << '\t' << i2->first << std::endl; } } @@ -46,10 +46,10 @@ struct MConditionalTranslationModel { int IncrementRule(const TRule& rule, MT19937* rng) { RuleModelHash::iterator it = r.find(rule.f_); if (it == r.end()) { - it = r.insert(make_pair(rule.f_, MFCR(1, 1.0, 1.0, 1.0, 1.0, 1e-9, 4.0))).first; + it = r.insert(make_pair(rule.f_, MFCR<1,TRule>(1.0, 1.0, 1.0, 1.0, 1e-9, 4.0))).first; } - p0s[0] = rp0(rule).as_float(); - TableCount delta = it->second.increment(rule, p0s, lambdas, rng); + p0s[0] = rp0(rule); + TableCount delta = it->second.increment(rule, p0s.begin(), lambdas.begin(), rng); return delta.count; } @@ -57,10 +57,10 @@ struct MConditionalTranslationModel { prob_t p; RuleModelHash::const_iterator it = r.find(rule.f_); if (it == r.end()) { - p.logeq(log(rp0(rule))); + p = rp0(rule); } else { - p0s[0] = rp0(rule).as_float(); - p = prob_t(it->second.prob(rule, p0s, lambdas)); + p0s[0] = rp0(rule); + p = it->second.prob(rule, p0s.begin(), lambdas.begin()); } return p; } @@ -80,11 +80,11 @@ struct MConditionalTranslationModel { const ConditionalBaseMeasure& rp0; typedef std::tr1::unordered_map, - MFCR, + MFCR<1, TRule>, boost::hash > > RuleModelHash; RuleModelHash r; - std::vector lambdas; - mutable std::vector p0s; + std::vector lambdas; + mutable std::vector p0s; }; template diff --git a/gi/pf/learn_cfg.cc b/gi/pf/learn_cfg.cc index bf157828..ed1772bf 100644 --- a/gi/pf/learn_cfg.cc +++ b/gi/pf/learn_cfg.cc @@ -127,20 +127,20 @@ struct HieroLMModel { nts(num_nts, CCRP(1,1,1,1)) {} prob_t Prob(const TRule& r) const { - return nts[nt_id_to_index[-r.lhs_]].probT(r, p0(r)); + return nts[nt_id_to_index[-r.lhs_]].prob(r, p0(r)); } inline prob_t p0(const TRule& r) const { if (kHIERARCHICAL_PRIOR) - return q0.probT(r, base(r)); + return q0.prob(r, base(r)); else return base(r); } int Increment(const TRule& r, MT19937* rng) { - const int delta = nts[nt_id_to_index[-r.lhs_]].incrementT(r, p0(r), rng); + const int delta = nts[nt_id_to_index[-r.lhs_]].increment(r, p0(r), rng); if (kHIERARCHICAL_PRIOR && delta) - q0.incrementT(r, base(r), rng); + q0.increment(r, base(r), rng); return delta; // return x.increment(r); } diff --git a/utils/ccrp.h b/utils/ccrp.h index 5f9db7a6..e24130ac 100644 --- a/utils/ccrp.h +++ b/utils/ccrp.h @@ -92,42 +92,9 @@ class CCRP { return it->total_dish_count_; } - // returns +1 or 0 indicating whether a new table was opened - int increment(const Dish& dish, const double& p0, MT19937* rng) { - DishLocations& loc = dish_locs_[dish]; - bool share_table = false; - if (loc.total_dish_count_) { - const double p_empty = (strength_ + num_tables_ * discount_) * p0; - const double p_share = (loc.total_dish_count_ - loc.table_counts_.size() * discount_); - share_table = rng->SelectSample(p_empty, p_share); - } - if (share_table) { - double r = rng->next() * (loc.total_dish_count_ - loc.table_counts_.size() * discount_); - for (typename std::list::iterator ti = loc.table_counts_.begin(); - ti != loc.table_counts_.end(); ++ti) { - r -= (*ti - discount_); - if (r <= 0.0) { - ++(*ti); - break; - } - } - if (r > 0.0) { - std::cerr << "Serious error: r=" << r << std::endl; - Print(&std::cerr); - assert(r <= 0.0); - } - } else { - loc.table_counts_.push_back(1u); - ++num_tables_; - } - ++loc.total_dish_count_; - ++num_customers_; - return (share_table ? 0 : 1); - } - // returns +1 or 0 indicating whether a new table was opened template - int incrementT(const Dish& dish, const T& p0, MT19937* rng) { + int increment(const Dish& dish, const T& p0, MT19937* rng) { DishLocations& loc = dish_locs_[dish]; bool share_table = false; if (loc.total_dish_count_) { @@ -196,19 +163,8 @@ class CCRP { } } - double prob(const Dish& dish, const double& p0) const { - const typename std::tr1::unordered_map::const_iterator it = dish_locs_.find(dish); - const double r = num_tables_ * discount_ + strength_; - if (it == dish_locs_.end()) { - return r * p0 / (num_customers_ + strength_); - } else { - return (it->second.total_dish_count_ - discount_ * it->second.table_counts_.size() + r * p0) / - (num_customers_ + strength_); - } - } - template - T probT(const Dish& dish, const T& p0) const { + T prob(const Dish& dish, const T& p0) const { const typename std::tr1::unordered_map::const_iterator it = dish_locs_.find(dish); const T r = T(num_tables_ * discount_ + strength_); if (it == dish_locs_.end()) { diff --git a/utils/mfcr.h b/utils/mfcr.h index aeaf599d..6cc0ebf1 100644 --- a/utils/mfcr.h +++ b/utils/mfcr.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include "sampler.h" @@ -35,12 +36,11 @@ std::ostream& operator<<(std::ostream& o, const TableCount& tc) { // referenced therein. // http://www.aclweb.org/anthology/P/P09/P09-2085.pdf // -template > +template > class MFCR { public: - MFCR(unsigned num_floors, double d, double strength) : - num_floors_(num_floors), + MFCR(double d, double strength) : num_tables_(), num_customers_(), discount_(d), @@ -50,8 +50,7 @@ class MFCR { strength_prior_shape_(std::numeric_limits::quiet_NaN()), strength_prior_rate_(std::numeric_limits::quiet_NaN()) {} - MFCR(unsigned num_floors, double discount_strength, double discount_beta, double strength_shape, double strength_rate, double d = 0.9, double strength = 10.0) : - num_floors_(num_floors), + MFCR(double discount_strength, double discount_beta, double strength_shape, double strength_rate, double d = 0.9, double strength = 10.0) : num_tables_(), num_customers_(), discount_(d), @@ -111,22 +110,22 @@ class MFCR { } // returns (delta, floor) indicating whether a new table (delta) was opened and on which floor - TableCount increment(const Dish& dish, const std::vector& p0s, const std::vector& lambdas, MT19937* rng) { - assert(p0s.size() == num_floors_); - assert(lambdas.size() == num_floors_); - + template + TableCount increment(const Dish& dish, InputIterator p0s, InputIterator2 lambdas, MT19937* rng) { DishLocations& loc = dish_locs_[dish]; // marg_p0 = marginal probability of opening a new table on any floor with label dish - const double marg_p0 = std::inner_product(p0s.begin(), p0s.end(), lambdas.begin(), 0.0); - assert(marg_p0 <= 1.0); + typedef typename std::iterator_traits::value_type F; + const F marg_p0 = std::inner_product(p0s, p0s + Floors, lambdas, F(0.0)); + assert(marg_p0 <= F(1.0001)); int floor = -1; bool share_table = false; if (loc.total_dish_count_) { - const double p_empty = (strength_ + num_tables_ * discount_) * marg_p0; - const double p_share = (loc.total_dish_count_ - loc.table_counts_.size() * discount_); + const F p_empty = F(strength_ + num_tables_ * discount_) * marg_p0; + const F p_share = F(loc.total_dish_count_ - loc.table_counts_.size() * discount_); share_table = rng->SelectSample(p_empty, p_share); } if (share_table) { + // this can be done with doubles since P0 (which may be tiny) is not involved double r = rng->next() * (loc.total_dish_count_ - loc.table_counts_.size() * discount_); for (typename std::list::iterator ti = loc.table_counts_.begin(); ti != loc.table_counts_.end(); ++ti) { @@ -143,12 +142,18 @@ class MFCR { assert(r <= 0.0); } } else { // sit at currently empty table -- must sample what floor - double r = rng->next() * marg_p0; - for (unsigned i = 0; i < p0s.size(); ++i) { - r -= p0s[i] * lambdas[i]; - if (r <= 0.0) { - floor = i; - break; + if (Floors == 1) { + floor = 0; + } else { + F r = F(rng->next()) * marg_p0; + for (unsigned i = 0; i < Floors; ++i) { + r -= (*p0s) * (*lambdas); + ++p0s; + ++lambdas; + if (r <= F(0.0)) { + floor = i; + break; + } } } assert(floor >= 0); @@ -200,18 +205,18 @@ class MFCR { return TableCount(delta, floor); } - double prob(const Dish& dish, const std::vector& p0s, const std::vector& lambdas) const { - assert(p0s.size() == num_floors_); - assert(lambdas.size() == num_floors_); - const double marg_p0 = std::inner_product(p0s.begin(), p0s.end(), lambdas.begin(), 0.0); - assert(marg_p0 <= 1.0); + template + typename std::iterator_traits::value_type prob(const Dish& dish, InputIterator p0s, InputIterator2 lambdas) const { + typedef typename std::iterator_traits::value_type F; + const F marg_p0 = std::inner_product(p0s, p0s + Floors, lambdas, F(0.0)); + assert(marg_p0 <= F(1.0001)); const typename std::tr1::unordered_map::const_iterator it = dish_locs_.find(dish); - const double r = num_tables_ * discount_ + strength_; + const F r = F(num_tables_ * discount_ + strength_); if (it == dish_locs_.end()) { - return r * marg_p0 / (num_customers_ + strength_); + return r * marg_p0 / F(num_customers_ + strength_); } else { - return (it->second.total_dish_count_ - discount_ * it->second.table_counts_.size() + r * marg_p0) / - (num_customers_ + strength_); + return (F(it->second.total_dish_count_ - discount_ * it->second.table_counts_.size()) + F(r * marg_p0)) / + F(num_customers_ + strength_); } } @@ -303,7 +308,7 @@ class MFCR { }; void Print(std::ostream* out) const { - (*out) << "MFCR(d=" << discount_ << ",strength=" << strength_ << ") customers=" << num_customers_ << std::endl; + (*out) << "MFCR<" << Floors << ">(d=" << discount_ << ",strength=" << strength_ << ") customers=" << num_customers_ << std::endl; for (typename std::tr1::unordered_map::const_iterator it = dish_locs_.begin(); it != dish_locs_.end(); ++it) { (*out) << it->first << " (" << it->second.total_dish_count_ << " on " << it->second.table_counts_.size() << " tables): "; @@ -323,7 +328,6 @@ class MFCR { return dish_locs_.end(); } - unsigned num_floors_; unsigned num_tables_; unsigned num_customers_; std::tr1::unordered_map dish_locs_; @@ -340,8 +344,8 @@ class MFCR { double strength_prior_rate_; }; -template -std::ostream& operator<<(std::ostream& o, const MFCR& c) { +template +std::ostream& operator<<(std::ostream& o, const MFCR& c) { c.Print(&o); return o; } diff --git a/utils/mfcr_test.cc b/utils/mfcr_test.cc index 7c45a37c..cc886335 100644 --- a/utils/mfcr_test.cc +++ b/utils/mfcr_test.cc @@ -9,7 +9,7 @@ using namespace std; void test_exch(MT19937* rng) { - MFCR crp(2, 0.5, 3.0); + MFCR<2, int> crp(0.5, 3.0); vector lambdas(2); vector p0s(2); lambdas[0] = 0.2; @@ -22,23 +22,23 @@ void test_exch(MT19937* rng) { double xt = 0; int cust = 10; vector hist(cust + 1, 0), hist2(cust + 1, 0); - for (int i = 0; i < cust; ++i) { crp.increment(1, p0s, lambdas, rng); } + for (int i = 0; i < cust; ++i) { crp.increment(1, p0s.begin(), lambdas.begin(), rng); } const int samples = 100000; const bool simulate = true; for (int k = 0; k < samples; ++k) { if (!simulate) { crp.clear(); - for (int i = 0; i < cust; ++i) { crp.increment(1, p0s, lambdas, rng); } + for (int i = 0; i < cust; ++i) { crp.increment(1, p0s.begin(), lambdas.begin(), rng); } } else { int da = rng->next() * cust; bool a = rng->next() < 0.45; if (a) { - for (int i = 0; i < da; ++i) { crp.increment(1, p0s, lambdas, rng); } + for (int i = 0; i < da; ++i) { crp.increment(1, p0s.begin(), lambdas.begin(), rng); } for (int i = 0; i < da; ++i) { crp.decrement(1, rng); } xt += 1.0; } else { for (int i = 0; i < da; ++i) { crp.decrement(1, rng); } - for (int i = 0; i < da; ++i) { crp.increment(1, p0s, lambdas, rng); } + for (int i = 0; i < da; ++i) { crp.increment(1, p0s.begin(), lambdas.begin(), rng); } } } int c = crp.num_tables(1); -- cgit v1.2.3 From de34b1493df93169c991a1828f951ca5abc00cae Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Mon, 5 Mar 2012 21:36:07 -0500 Subject: tie hyperparameters for translation distributions; support theta < 0 for PYPLM --- gi/pf/align-lexonly-pyp.cc | 13 ++++----- gi/pf/conditional_pseg.h | 68 ++++++++++++++++++++++++++++++++++++---------- gi/pf/pyp_lm.cc | 12 ++++---- utils/ccrp.h | 4 +-- utils/mfcr.h | 19 +++++++++++-- 5 files changed, 84 insertions(+), 32 deletions(-) (limited to 'utils') diff --git a/gi/pf/align-lexonly-pyp.cc b/gi/pf/align-lexonly-pyp.cc index ac0590e0..13a3a487 100644 --- a/gi/pf/align-lexonly-pyp.cc +++ b/gi/pf/align-lexonly-pyp.cc @@ -68,14 +68,14 @@ struct AlignedSentencePair { struct HierarchicalWordBase { explicit HierarchicalWordBase(const unsigned vocab_e_size) : - base(prob_t::One()), r(1,1,1,1), u0(-log(vocab_e_size)), l(1,prob_t::One()), v(1, prob_t::Zero()) {} + base(prob_t::One()), r(1,1,1,1,0.66,50.0), u0(-log(vocab_e_size)), l(1,prob_t::One()), v(1, prob_t::Zero()) {} void ResampleHyperparameters(MT19937* rng) { r.resample_hyperparameters(rng); } inline double logp0(const vector& s) const { - return s.size() * u0; + return Md::log_poisson(s.size(), 7.5) + s.size() * u0; } // return p0 of rule.e_ @@ -106,7 +106,7 @@ struct HierarchicalWordBase { void Summary() const { cerr << "NUMBER OF CUSTOMERS: " << r.num_customers() << " (d=" << r.discount() << ",s=" << r.strength() << ')' << endl; for (MFCR<1,vector >::const_iterator it = r.begin(); it != r.end(); ++it) - cerr << " " << it->second.total_dish_count_ << " (on " << it->second.table_counts_.size() << " tables)" << TD::GetString(it->first) << endl; + cerr << " " << it->second.total_dish_count_ << " (on " << it->second.table_counts_.size() << " tables) " << TD::GetString(it->first) << endl; } prob_t base; @@ -167,10 +167,9 @@ struct BasicLexicalAlignment { } void ResampleHyperparemeters() { - cerr << " LLH_prev = " << Likelihood() << flush; tmodel.ResampleHyperparameters(&*prng); up0.ResampleHyperparameters(&*prng); - cerr << "\tLLH_post = " << Likelihood() << endl; + cerr << " (base d=" << up0.r.discount() << ",s=" << up0.r.strength() << ")\n"; } void ResampleCorpus(); @@ -218,7 +217,7 @@ void BasicLexicalAlignment::ResampleCorpus() { up0.Increment(r); } } - cerr << " LLH = " << tmodel.Likelihood() << endl; + cerr << " LLH = " << Likelihood() << endl; } void ExtractLetters(const set& v, vector >* l, set* letset = NULL) { @@ -311,7 +310,7 @@ int main(int argc, char** argv) { for (int i = 0; i < samples; ++i) { for (int j = 65; j < 67; ++j) Debug(corpus[j]); cerr << i << "\t" << x.tmodel.r.size() << "\t"; - if (i % 10 == 0) x.ResampleHyperparemeters(); + if (i % 7 == 6) x.ResampleHyperparemeters(); x.ResampleCorpus(); if (i > (samples / 5) && (i % 10 == 9)) for (int j = 0; j < corpus.size(); ++j) AddSample(&corpus[j]); } diff --git a/gi/pf/conditional_pseg.h b/gi/pf/conditional_pseg.h index ef73e332..8202778b 100644 --- a/gi/pf/conditional_pseg.h +++ b/gi/pf/conditional_pseg.h @@ -17,21 +17,66 @@ template struct MConditionalTranslationModel { explicit MConditionalTranslationModel(ConditionalBaseMeasure& rcp0) : - rp0(rcp0), lambdas(1, prob_t::One()), p0s(1) {} + rp0(rcp0), d(0.5), strength(1.0), lambdas(1, prob_t::One()), p0s(1) {} void Summary() const { std::cerr << "Number of conditioning contexts: " << r.size() << std::endl; for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) { std::cerr << TD::GetString(it->first) << " \t(d=" << it->second.discount() << ",s=" << it->second.strength() << ") --------------------------" << std::endl; for (MFCR<1,TRule>::const_iterator i2 = it->second.begin(); i2 != it->second.end(); ++i2) - std::cerr << " " << -1 << '\t' << i2->first << std::endl; + std::cerr << " " << i2->second.total_dish_count_ << '\t' << i2->first << std::endl; } } + double log_likelihood(const double& dd, const double& aa) const { + if (aa <= -dd) return -std::numeric_limits::infinity(); + //double llh = Md::log_beta_density(dd, 10, 3) + Md::log_gamma_density(aa, 1, 1); + double llh = Md::log_beta_density(dd, 1, 1) + + Md::log_gamma_density(dd + aa, 1, 1); + typename std::tr1::unordered_map, MFCR<1,TRule>, boost::hash > >::const_iterator it; + for (it = r.begin(); it != r.end(); ++it) + llh += it->second.log_crp_prob(dd, aa); + return llh; + } + + struct DiscountResampler { + DiscountResampler(const MConditionalTranslationModel& m) : m_(m) {} + const MConditionalTranslationModel& m_; + double operator()(const double& proposed_discount) const { + return m_.log_likelihood(proposed_discount, m_.strength); + } + }; + + struct AlphaResampler { + AlphaResampler(const MConditionalTranslationModel& m) : m_(m) {} + const MConditionalTranslationModel& m_; + double operator()(const double& proposed_strength) const { + return m_.log_likelihood(m_.d, proposed_strength); + } + }; + void ResampleHyperparameters(MT19937* rng) { - for (RuleModelHash::iterator it = r.begin(); it != r.end(); ++it) - it->second.resample_hyperparameters(rng); - } + const unsigned nloop = 5; + const unsigned niterations = 10; + DiscountResampler dr(*this); + AlphaResampler ar(*this); + for (int iter = 0; iter < nloop; ++iter) { + strength = slice_sampler1d(ar, strength, *rng, -d + std::numeric_limits::min(), + std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); + double min_discount = std::numeric_limits::min(); + if (strength < 0.0) min_discount -= strength; + d = slice_sampler1d(dr, d, *rng, min_discount, + 1.0, 0.0, niterations, 100*niterations); + } + strength = slice_sampler1d(ar, strength, *rng, -d, + std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); + typename std::tr1::unordered_map, MFCR<1,TRule>, boost::hash > >::iterator it; + std::cerr << "MConditionalTranslationModel(d=" << d << ",s=" << strength << ") = " << log_likelihood(d, strength) << std::endl; + for (it = r.begin(); it != r.end(); ++it) { + it->second.set_discount(d); + it->second.set_strength(strength); + } + } int DecrementRule(const TRule& rule, MT19937* rng) { RuleModelHash::iterator it = r.find(rule.f_); @@ -46,7 +91,7 @@ struct MConditionalTranslationModel { int IncrementRule(const TRule& rule, MT19937* rng) { RuleModelHash::iterator it = r.find(rule.f_); if (it == r.end()) { - it = r.insert(make_pair(rule.f_, MFCR<1,TRule>(1.0, 1.0, 1.0, 1.0, 1e-9, 4.0))).first; + it = r.insert(make_pair(rule.f_, MFCR<1,TRule>(d, strength))).first; } p0s[0] = rp0(rule); TableCount delta = it->second.increment(rule, p0s.begin(), lambdas.begin(), rng); @@ -66,15 +111,7 @@ struct MConditionalTranslationModel { } prob_t Likelihood() const { - prob_t p = prob_t::One(); -#if 0 - for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) { - prob_t q; q.logeq(it->second.log_crp_prob()); - p *= q; - for (CCRP_NoTable::const_iterator i2 = it->second.begin(); i2 != it->second.end(); ++i2) - p *= rp0(i2->first); - } -#endif + prob_t p; p.logeq(log_likelihood(d, strength)); return p; } @@ -83,6 +120,7 @@ struct MConditionalTranslationModel { MFCR<1, TRule>, boost::hash > > RuleModelHash; RuleModelHash r; + double d, strength; std::vector lambdas; mutable std::vector p0s; }; diff --git a/gi/pf/pyp_lm.cc b/gi/pf/pyp_lm.cc index 7ebada13..104f356b 100644 --- a/gi/pf/pyp_lm.cc +++ b/gi/pf/pyp_lm.cc @@ -18,7 +18,7 @@ // I use templates to handle the recursive formalation of the prior, so // the order of the model has to be specified here, at compile time: -#define kORDER 3 +#define kORDER 4 using namespace std; using namespace tr1; @@ -114,7 +114,7 @@ template struct PYPLM { if (aa <= -dd) return -std::numeric_limits::infinity(); //double llh = Md::log_beta_density(dd, 10, 3) + Md::log_gamma_density(aa, 1, 1); double llh = Md::log_beta_density(dd, discount_a, discount_b) + - Md::log_gamma_density(aa, strength_s, strength_r); + Md::log_gamma_density(aa + dd, strength_s, strength_r); typename unordered_map, CCRP, boost::hash > >::const_iterator it; for (it = p.begin(); it != p.end(); ++it) llh += it->second.log_crp_prob(dd, aa); @@ -141,12 +141,14 @@ template struct PYPLM { DiscountResampler dr(*this); AlphaResampler ar(*this); for (int iter = 0; iter < nloop; ++iter) { - strength = slice_sampler1d(ar, strength, *rng, 0.0, + strength = slice_sampler1d(ar, strength, *rng, -d + std::numeric_limits::min(), std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); - d = slice_sampler1d(dr, d, *rng, std::numeric_limits::min(), + double min_discount = std::numeric_limits::min(); + if (strength < 0.0) min_discount -= strength; + d = slice_sampler1d(dr, d, *rng, min_discount, 1.0, 0.0, niterations, 100*niterations); } - strength = slice_sampler1d(ar, strength, *rng, 0.0, + strength = slice_sampler1d(ar, strength, *rng, -d + std::numeric_limits::min(), std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); typename unordered_map, CCRP, boost::hash > >::iterator it; cerr << "PYPLM<" << N << ">(d=" << d << ",a=" << strength << ") = " << log_likelihood(d, strength) << endl; diff --git a/utils/ccrp.h b/utils/ccrp.h index e24130ac..439d7e1e 100644 --- a/utils/ccrp.h +++ b/utils/ccrp.h @@ -225,12 +225,12 @@ class CCRP { StrengthResampler sr(*this); for (int iter = 0; iter < nloop; ++iter) { if (has_strength_prior()) { - strength_ = slice_sampler1d(sr, strength_, *rng, -discount_, + strength_ = slice_sampler1d(sr, strength_, *rng, -discount_ + std::numeric_limits::min(), std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); } if (has_discount_prior()) { double min_discount = std::numeric_limits::min(); - if (strength_ < 0.0) min_discount = -strength_; + if (strength_ < 0.0) min_discount -= strength_; discount_ = slice_sampler1d(dr, discount_, *rng, min_discount, 1.0, 0.0, niterations, 100*niterations); } diff --git a/utils/mfcr.h b/utils/mfcr.h index 6cc0ebf1..886f01ef 100644 --- a/utils/mfcr.h +++ b/utils/mfcr.h @@ -48,7 +48,7 @@ class MFCR { discount_prior_strength_(std::numeric_limits::quiet_NaN()), discount_prior_beta_(std::numeric_limits::quiet_NaN()), strength_prior_shape_(std::numeric_limits::quiet_NaN()), - strength_prior_rate_(std::numeric_limits::quiet_NaN()) {} + strength_prior_rate_(std::numeric_limits::quiet_NaN()) { check_hyperparameters(); } MFCR(double discount_strength, double discount_beta, double strength_shape, double strength_rate, double d = 0.9, double strength = 10.0) : num_tables_(), @@ -58,10 +58,23 @@ class MFCR { discount_prior_strength_(discount_strength), discount_prior_beta_(discount_beta), strength_prior_shape_(strength_shape), - strength_prior_rate_(strength_rate) {} + strength_prior_rate_(strength_rate) { check_hyperparameters(); } + + void check_hyperparameters() { + if (discount_ < 0.0 || discount_ >= 1.0) { + std::cerr << "Bad discount: " << discount_ << std::endl; + abort(); + } + if (strength_ <= -discount_) { + std::cerr << "Bad strength: " << strength_ << " (discount=" << discount_ << ")" << std::endl; + abort(); + } + } double discount() const { return discount_; } double strength() const { return strength_; } + void set_discount(double d) { discount_ = d; check_hyperparameters(); } + void set_strength(double a) { strength_ = a; check_hyperparameters(); } bool has_discount_prior() const { return !std::isnan(discount_prior_strength_); @@ -275,7 +288,7 @@ class MFCR { } if (has_discount_prior()) { double min_discount = std::numeric_limits::min(); - if (strength_ < 0.0) min_discount = -strength_; + if (strength_ < 0.0) min_discount -= strength_; discount_ = slice_sampler1d(dr, discount_, *rng, min_discount, 1.0, 0.0, niterations, 100*niterations); } -- cgit v1.2.3 From 27e0de58bf49a4fc74bbf58718d1b89525a154a6 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 6 Mar 2012 23:20:16 -0500 Subject: a few statistical helpers i'm using to figure some algorithms out --- utils/m.h | 45 +++++++++++++++++++++++++++++++++++++++++++++ utils/m_test.cc | 16 ++++++++++++++++ 2 files changed, 61 insertions(+) (limited to 'utils') diff --git a/utils/m.h b/utils/m.h index 5e45efee..dc881b36 100644 --- a/utils/m.h +++ b/utils/m.h @@ -4,6 +4,10 @@ #include #include #include +#include + +// TODO right now I sometimes assert that x is in the support of the distributions +// should be configurable to return -inf instead template struct M { @@ -59,6 +63,47 @@ struct M { return (alpha-1)*std::log(x)+(beta-1)*std::log(1-x) - log_beta_fn(alpha, beta); } + // support x \in R + static inline F log_laplace_density(const F& x, const F& mu, const F& b) { + assert(b > 0.0); + return -std::log(2*b) - std::fabs(x - mu) / b; + } + + // support x \in R + // this is NOT the "log normal" density, it is the log of the "normal density at x" + static inline F log_gaussian_density(const F& x, const F& mu, const F& var) { + assert(var > 0.0); + return -0.5 * std::log(var * 2 * boost::math::constants::pi()) - (x - mu)*(x - mu) / (2 * var); + } + + // (x1,x2) \in R^2 + // parameterized in terms of two means, a two "variances", a correlation < 1 + static inline F log_bivariate_gaussian_density(const F& x1, const F& x2, + const F& mu1, const F& mu2, + const F& var1, const F& var2, + const F& cor) { + assert(var1 > 0); + assert(var2 > 0); + assert(std::fabs(cor) < 1.0); + const F cor2 = cor*cor; + const F var1var22 = var1 * var2; + const F Z = 0.5 * std::log(var1var22 * (1 - cor2)) + std::log(2 * boost::math::constants::pi()); + return -Z -1.0 / (2 * (1 - cor2)) * ((x1 - mu1)*(x1-mu1) / var1 + (x2-mu2)*(x2-mu2) / var2 - 2*cor*(x1 - mu1)*(x2-mu2) / std::sqrt(var1var22)); + } + + // support x \in [a,b] + static inline F log_triangle_density(const F& x, const F& a, const F& b, const F& c) { + assert(a < b); + assert(a <= c); + assert(c <= b); + assert(x >= a); + assert(x <= b); + if (x <= c) + return std::log(2) + std::log(x - a) - std::log(b - a) - std::log(c - a); + else + return std::log(2) + std::log(b - x) - std::log(b - a) - std::log(b - c); + } + // note: this has been adapted so that 0 is in the support of the distribution // support [0, 1, 2 ...) static inline F log_yule_simon(unsigned x, const F& rho) { diff --git a/utils/m_test.cc b/utils/m_test.cc index fca8f895..c4d6a166 100644 --- a/utils/m_test.cc +++ b/utils/m_test.cc @@ -14,6 +14,22 @@ class MTest : public testing::Test { virtual void TearDown() { } }; +TEST_F(MTest, Densities) { + double px1 = Md::log_gaussian_density(1.0, 0.0, 1.0); + double px2 = Md::log_gaussian_density(-1.0, 0.0, 1.0); + double py1 = Md::log_laplace_density(1.0, 0.0, 1.0); + double py2 = Md::log_laplace_density(1.0, 0.0, 1.0); + double pz1 = Md::log_triangle_density(1.0, -2.0, 2.0, 0.0); + double pz2 = Md::log_triangle_density(1.0, -2.0, 2.0, 0.0); + cerr << px1 << " " << py1 << " " << pz2 << endl; + EXPECT_FLOAT_EQ(px1, px2); + EXPECT_FLOAT_EQ(py1, py2); + EXPECT_FLOAT_EQ(pz1, pz2); + double b1 = Md::log_bivariate_gaussian_density(1.0, -1.0, 0.0, 0.0, 1.0, 1.0, -0.8); + double b2 = Md::log_bivariate_gaussian_density(-1.0, 1.0, 0.0, 0.0, 1.0, 1.0, -0.8); + cerr << b1 << " " << b2 << endl; +} + TEST_F(MTest, Poisson) { double prev = 1.0; double tot = 0; -- cgit v1.2.3 From 113317266853abff2e1c0c3e889017d0eee55c93 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Fri, 9 Mar 2012 22:23:50 -0500 Subject: moar --- gi/pf/Makefile.am | 3 +- gi/pf/align-lexonly-pyp.cc | 207 ++++++++++------------------------------- gi/pf/align-tl.cc | 18 ++-- gi/pf/backward.cc | 89 ++++++++++++++++++ gi/pf/backward.h | 33 +++++++ gi/pf/base_distributions.h | 8 +- gi/pf/guess-translits.pl | 2 +- gi/pf/nuisance_test.cc | 6 +- gi/pf/pyp_lm.cc | 2 +- gi/pf/pyp_tm.cc | 113 +++++++++++++++++++++++ gi/pf/pyp_tm.h | 34 +++++++ gi/pf/pyp_word_model.cc | 20 ++++ gi/pf/pyp_word_model.h | 58 ++++++++++++ gi/pf/reachability.cc | 8 +- gi/pf/reachability.h | 8 +- gi/pf/transliterations.cc | 223 ++++++++++++++++++++++++++++++++++++++++----- gi/pf/transliterations.h | 3 +- utils/ccrp_nt.h | 17 ++-- 18 files changed, 628 insertions(+), 224 deletions(-) create mode 100644 gi/pf/backward.cc create mode 100644 gi/pf/backward.h create mode 100644 gi/pf/pyp_tm.cc create mode 100644 gi/pf/pyp_tm.h create mode 100644 gi/pf/pyp_word_model.cc create mode 100644 gi/pf/pyp_word_model.h (limited to 'utils') diff --git a/gi/pf/Makefile.am b/gi/pf/Makefile.am index 94364c3d..4ce72ba1 100644 --- a/gi/pf/Makefile.am +++ b/gi/pf/Makefile.am @@ -2,7 +2,7 @@ bin_PROGRAMS = cbgi brat dpnaive pfbrat pfdist itg pfnaive condnaive align-lexon noinst_LIBRARIES = libpf.a -libpf_a_SOURCES = base_distributions.cc reachability.cc cfg_wfst_composer.cc corpus.cc unigrams.cc ngram_base.cc transliterations.cc +libpf_a_SOURCES = base_distributions.cc reachability.cc cfg_wfst_composer.cc corpus.cc unigrams.cc ngram_base.cc transliterations.cc backward.cc pyp_word_model.cc pyp_tm.cc nuisance_test_SOURCES = nuisance_test.cc nuisance_test_LDADD = libpf.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz @@ -10,6 +10,7 @@ nuisance_test_LDADD = libpf.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mtev align_lexonly_SOURCES = align-lexonly.cc align_lexonly_pyp_SOURCES = align-lexonly-pyp.cc +align_lexonly_pyp_LDADD = libpf.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz align_tl_SOURCES = align-tl.cc align_tl_LDADD = libpf.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz diff --git a/gi/pf/align-lexonly-pyp.cc b/gi/pf/align-lexonly-pyp.cc index 13a3a487..d68a4b8f 100644 --- a/gi/pf/align-lexonly-pyp.cc +++ b/gi/pf/align-lexonly-pyp.cc @@ -1,27 +1,18 @@ #include -#include #include -#include #include #include -#include "array2d.h" -#include "base_distributions.h" -#include "monotonic_pseg.h" -#include "conditional_pseg.h" -#include "trule.h" #include "tdict.h" #include "stringlib.h" #include "filelib.h" -#include "dict.h" +#include "array2d.h" #include "sampler.h" -#include "mfcr.h" #include "corpus.h" -#include "ngram_base.h" +#include "pyp_tm.h" using namespace std; -using namespace tr1; namespace po = boost::program_options; void InitCommandLine(int argc, char** argv, po::variables_map* conf) { @@ -51,7 +42,7 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) { } } -shared_ptr prng; +MT19937* prng; struct LexicalAlignment { unsigned char src_index; @@ -66,159 +57,59 @@ struct AlignedSentencePair { Array2D posterior; }; -struct HierarchicalWordBase { - explicit HierarchicalWordBase(const unsigned vocab_e_size) : - base(prob_t::One()), r(1,1,1,1,0.66,50.0), u0(-log(vocab_e_size)), l(1,prob_t::One()), v(1, prob_t::Zero()) {} - - void ResampleHyperparameters(MT19937* rng) { - r.resample_hyperparameters(rng); - } - - inline double logp0(const vector& s) const { - return Md::log_poisson(s.size(), 7.5) + s.size() * u0; - } - - // return p0 of rule.e_ - prob_t operator()(const TRule& rule) const { - v[0].logeq(logp0(rule.e_)); - return r.prob(rule.e_, v.begin(), l.begin()); - } - - void Increment(const TRule& rule) { - v[0].logeq(logp0(rule.e_)); - if (r.increment(rule.e_, v.begin(), l.begin(), &*prng).count) { - base *= v[0] * l[0]; - } - } - - void Decrement(const TRule& rule) { - if (r.decrement(rule.e_, &*prng).count) { - base /= prob_t(exp(logp0(rule.e_))); - } - } - - prob_t Likelihood() const { - prob_t p; p.logeq(r.log_crp_prob()); - p *= base; - return p; +struct Aligner { + Aligner(const vector >& lets, int num_letters, vector* c) : + corpus(*c), + model(lets, num_letters), + kNULL(TD::Convert("NULL")) { + assert(lets[kNULL].size() == 0); } - void Summary() const { - cerr << "NUMBER OF CUSTOMERS: " << r.num_customers() << " (d=" << r.discount() << ",s=" << r.strength() << ')' << endl; - for (MFCR<1,vector >::const_iterator it = r.begin(); it != r.end(); ++it) - cerr << " " << it->second.total_dish_count_ << " (on " << it->second.table_counts_.size() << " tables) " << TD::GetString(it->first) << endl; - } - - prob_t base; - MFCR<1,vector > r; - const double u0; - const vector l; - mutable vector v; -}; - -struct BasicLexicalAlignment { - explicit BasicLexicalAlignment(const vector >& lets, - const unsigned words_e, - const unsigned letters_e, - vector* corp) : - letters(lets), - corpus(*corp), - //up0(words_e), - //up0("en.chars.1gram", letters_e), - //up0("en.words.1gram"), - up0(letters_e), - //up0("en.chars.2gram"), - tmodel(up0) { - } + vector& corpus; + PYPLexicalTranslation model; + const WordID kNULL; - void InstantiateRule(const WordID src, - const WordID trg, - TRule* rule) const { - static const WordID kX = TD::Convert("X") * -1; - rule->lhs_ = kX; - rule->e_ = letters[trg]; - rule->f_ = letters[src]; + void ResampleHyperparameters() { + model.ResampleHyperparameters(prng); } void InitializeRandom() { - const WordID kNULL = TD::Convert("NULL"); cerr << "Initializing with random alignments ...\n"; for (unsigned i = 0; i < corpus.size(); ++i) { AlignedSentencePair& asp = corpus[i]; asp.a.resize(asp.trg.size()); for (unsigned j = 0; j < asp.trg.size(); ++j) { - const unsigned char a_j = prng->next() * (1 + asp.src.size()); + unsigned char& a_j = asp.a[j].src_index; + a_j = prng->next() * (1 + asp.src.size()); const WordID f_a_j = (a_j ? asp.src[a_j - 1] : kNULL); - TRule r; - InstantiateRule(f_a_j, asp.trg[j], &r); - asp.a[j].is_transliteration = false; - asp.a[j].src_index = a_j; - if (tmodel.IncrementRule(r, &*prng)) - up0.Increment(r); + model.Increment(f_a_j, asp.trg[j], &*prng); } } - cerr << " LLH = " << Likelihood() << endl; - } - - prob_t Likelihood() const { - prob_t p = tmodel.Likelihood(); - p *= up0.Likelihood(); - return p; - } - - void ResampleHyperparemeters() { - tmodel.ResampleHyperparameters(&*prng); - up0.ResampleHyperparameters(&*prng); - cerr << " (base d=" << up0.r.discount() << ",s=" << up0.r.strength() << ")\n"; + cerr << "Corpus intialized randomly. LLH = " << model.Likelihood() << endl; } - void ResampleCorpus(); - - const vector >& letters; // spelling dictionary - vector& corpus; - //PhraseConditionalUninformativeBase up0; - //PhraseConditionalUninformativeUnigramBase up0; - //UnigramWordBase up0; - //HierarchicalUnigramBase up0; - HierarchicalWordBase up0; - //CompletelyUniformBase up0; - //FixedNgramBase up0; - //ConditionalTranslationModel tmodel; - //ConditionalTranslationModel tmodel; - //ConditionalTranslationModel tmodel; - //ConditionalTranslationModel tmodel; - MConditionalTranslationModel tmodel; - //ConditionalTranslationModel tmodel; - //ConditionalTranslationModel tmodel; -}; - -void BasicLexicalAlignment::ResampleCorpus() { - static const WordID kNULL = TD::Convert("NULL"); - for (unsigned i = 0; i < corpus.size(); ++i) { - AlignedSentencePair& asp = corpus[i]; - SampleSet ss; ss.resize(asp.src.size() + 1); - for (unsigned j = 0; j < asp.trg.size(); ++j) { - TRule r; - unsigned char& a_j = asp.a[j].src_index; - WordID f_a_j = (a_j ? asp.src[a_j - 1] : kNULL); - InstantiateRule(f_a_j, asp.trg[j], &r); - if (tmodel.DecrementRule(r, &*prng)) - up0.Decrement(r); - - for (unsigned prop_a_j = 0; prop_a_j <= asp.src.size(); ++prop_a_j) { - const WordID prop_f = (prop_a_j ? asp.src[prop_a_j - 1] : kNULL); - InstantiateRule(prop_f, asp.trg[j], &r); - ss[prop_a_j] = tmodel.RuleProbability(r); + void ResampleCorpus() { + for (unsigned i = 0; i < corpus.size(); ++i) { + AlignedSentencePair& asp = corpus[i]; + SampleSet ss; ss.resize(asp.src.size() + 1); + for (unsigned j = 0; j < asp.trg.size(); ++j) { + unsigned char& a_j = asp.a[j].src_index; + const WordID e_j = asp.trg[j]; + WordID f_a_j = (a_j ? asp.src[a_j - 1] : kNULL); + model.Decrement(f_a_j, e_j, prng); + + for (unsigned prop_a_j = 0; prop_a_j <= asp.src.size(); ++prop_a_j) { + const WordID prop_f = (prop_a_j ? asp.src[prop_a_j - 1] : kNULL); + ss[prop_a_j] = model.Prob(prop_f, e_j); + } + a_j = prng->SelectSample(ss); + f_a_j = (a_j ? asp.src[a_j - 1] : kNULL); + model.Increment(f_a_j, e_j, prng); } - a_j = prng->SelectSample(ss); - f_a_j = (a_j ? asp.src[a_j - 1] : kNULL); - InstantiateRule(f_a_j, asp.trg[j], &r); - if (tmodel.IncrementRule(r, &*prng)) - up0.Increment(r); } + cerr << "LLH = " << model.Likelihood() << " " << model.UniqueConditioningContexts() << endl; } - cerr << " LLH = " << Likelihood() << endl; -} +}; void ExtractLetters(const set& v, vector >* l, set* letset = NULL) { for (set::const_iterator it = v.begin(); it != v.end(); ++it) { @@ -240,8 +131,10 @@ void ExtractLetters(const set& v, vector >* l, set a(asp.src.size(), asp.trg.size()); - for (unsigned j = 0; j < asp.trg.size(); ++j) + for (unsigned j = 0; j < asp.trg.size(); ++j) { + assert(asp.a[j].src_index <= asp.src.size()); if (asp.a[j].src_index) a(asp.a[j].src_index - 1, j) = true; + } cerr << a << endl; } @@ -275,10 +168,9 @@ int main(int argc, char** argv) { InitCommandLine(argc, argv, &conf); if (conf.count("random_seed")) - prng.reset(new MT19937(conf["random_seed"].as())); + prng = new MT19937(conf["random_seed"].as()); else - prng.reset(new MT19937); -// MT19937& rng = *prng; + prng = new MT19937; vector > corpuse, corpusf; set vocabe, vocabf; @@ -304,23 +196,18 @@ int main(int argc, char** argv) { ExtractLetters(vocabf, &letters, NULL); letters[TD::Convert("NULL")].clear(); - BasicLexicalAlignment x(letters, vocabe.size(), letset.size(), &corpus); - x.InitializeRandom(); + Aligner aligner(letters, letset.size(), &corpus); + aligner.InitializeRandom(); + const unsigned samples = conf["samples"].as(); for (int i = 0; i < samples; ++i) { for (int j = 65; j < 67; ++j) Debug(corpus[j]); - cerr << i << "\t" << x.tmodel.r.size() << "\t"; - if (i % 7 == 6) x.ResampleHyperparemeters(); - x.ResampleCorpus(); + if (i % 7 == 6) aligner.ResampleHyperparameters(); + aligner.ResampleCorpus(); if (i > (samples / 5) && (i % 10 == 9)) for (int j = 0; j < corpus.size(); ++j) AddSample(&corpus[j]); } for (unsigned i = 0; i < corpus.size(); ++i) WriteAlignments(corpus[i]); - //ModelAndData posterior(x, &corpus, vocabe, vocabf); - x.tmodel.Summary(); - x.up0.Summary(); - - //posterior.Sample(); return 0; } diff --git a/gi/pf/align-tl.cc b/gi/pf/align-tl.cc index fc9b7ca5..cbe8c6c8 100644 --- a/gi/pf/align-tl.cc +++ b/gi/pf/align-tl.cc @@ -6,6 +6,7 @@ #include #include +#include "backward.h" #include "array2d.h" #include "base_distributions.h" #include "monotonic_pseg.h" @@ -30,10 +31,11 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) { opts.add_options() ("samples,s",po::value()->default_value(1000),"Number of samples") ("input,i",po::value(),"Read parallel data from") + ("s2t", po::value(), "character level source-to-target prior transliteration probabilities") + ("t2s", po::value(), "character level target-to-source prior transliteration probabilities") ("max_src_chunk", po::value()->default_value(4), "Maximum size of translitered chunk in source") ("max_trg_chunk", po::value()->default_value(4), "Maximum size of translitered chunk in target") - ("min_transliterated_src_length", po::value()->default_value(3), "Minimum length of source words considered for transliteration") - ("filter_ratio", po::value()->default_value(0.66), "Filter ratio: basically, if the lengths differ by less than this ratio, mark the pair as non-transliteratable") + ("expected_src_to_trg_ratio", po::value()->default_value(1.0), "If a word is transliterated, what is the expected length ratio from source to target?") ("random_seed,S",po::value(), "Random seed"); po::options_description clo("Command line options"); clo.add_options() @@ -303,7 +305,7 @@ int main(int argc, char** argv) { corpusf.clear(); corpuse.clear(); vocabf.insert(TD::Convert("NULL")); - vector > letters(TD::NumWords()); + vector > letters(TD::NumWords() + 1); set letset; ExtractLetters(vocabe, &letters, &letset); ExtractLetters(vocabf, &letters, NULL); @@ -312,9 +314,9 @@ int main(int argc, char** argv) { // TODO configure this const int max_src_chunk = conf["max_src_chunk"].as(); const int max_trg_chunk = conf["max_trg_chunk"].as(); - const double filter_rat = conf["filter_ratio"].as(); - const int min_trans_src = conf["min_transliterated_src_length"].as(); - Transliterations tl(max_src_chunk, max_trg_chunk, filter_rat); + const double s2t_rat = conf["expected_src_to_trg_ratio"].as(); + const BackwardEstimator be(conf["s2t"].as(), conf["t2s"].as()); + Transliterations tl(max_src_chunk, max_trg_chunk, s2t_rat, be); cerr << "Initializing transliteration graph structures ...\n"; for (int i = 0; i < corpus.size(); ++i) { @@ -325,8 +327,8 @@ int main(int argc, char** argv) { for (int k = 0; k < trg.size(); ++k) { const vector& trg_let = letters[trg[k]]; tl.Initialize(src[j], src_let, trg[k], trg_let); - if (src_let.size() < min_trans_src) - tl.Forbid(src[j], src_let, trg[k], trg_let); + //if (src_let.size() < min_trans_src) + // tl.Forbid(src[j], src_let, trg[k], trg_let); } } } diff --git a/gi/pf/backward.cc b/gi/pf/backward.cc new file mode 100644 index 00000000..b92629fd --- /dev/null +++ b/gi/pf/backward.cc @@ -0,0 +1,89 @@ +#include "backward.h" + +#include +#include + +#include "array2d.h" +#include "reachability.h" +#include "base_distributions.h" + +using namespace std; + +BackwardEstimator::BackwardEstimator(const string& s2t, + const string& t2s) : m1(new Model1(s2t)), m1inv(new Model1(t2s)) {} + +BackwardEstimator::~BackwardEstimator() { + delete m1; m1 = NULL; + delete m1inv; m1inv = NULL; +} + +float BackwardEstimator::ComputeBackwardProb(const std::vector& src, + const std::vector& trg, + unsigned src_covered, + unsigned trg_covered, + double s2t_ratio) const { + if (src_covered == src.size() || trg_covered == trg.size()) { + assert(src_covered == src.size()); + assert(trg_covered == trg.size()); + return 0; + } + static const WordID kNULL = TD::Convert(""); + const prob_t uniform_alignment(1.0 / (src.size() - src_covered + 1)); + // TODO factor in expected length ratio + prob_t e; e.logeq(Md::log_poisson(trg.size() - trg_covered, (src.size() - src_covered) * s2t_ratio)); // p(trg len remaining | src len remaining) + for (unsigned j = trg_covered; j < trg.size(); ++j) { + prob_t p = (*m1)(kNULL, trg[j]) + prob_t(1e-12); + for (unsigned i = src_covered; i < src.size(); ++i) + p += (*m1)(src[i], trg[j]); + if (p.is_0()) { + cerr << "ERROR: p(" << TD::Convert(trg[j]) << " | " << TD::GetString(src) << ") = 0!\n"; + assert(!"failed"); + } + p *= uniform_alignment; + e *= p; + } + // TODO factor in expected length ratio + const prob_t inv_uniform(1.0 / (trg.size() - trg_covered + 1.0)); + prob_t inv; + inv.logeq(Md::log_poisson(src.size() - src_covered, (trg.size() - trg_covered) / s2t_ratio)); + for (unsigned i = src_covered; i < src.size(); ++i) { + prob_t p = (*m1inv)(kNULL, src[i]) + prob_t(1e-12); + for (unsigned j = trg_covered; j < trg.size(); ++j) + p += (*m1inv)(trg[j], src[i]); + if (p.is_0()) { + cerr << "ERROR: p_inv(" << TD::Convert(src[i]) << " | " << TD::GetString(trg) << ") = 0!\n"; + assert(!"failed"); + } + p *= inv_uniform; + inv *= p; + } + return (log(e) + log(inv)) / 2; +} + +void BackwardEstimator::InitializeGrid(const vector& src, + const vector& trg, + const Reachability& r, + double s2t_ratio, + float* grid) const { + queue > q; + q.push(make_pair(0,0)); + Array2D done(src.size()+1, trg.size()+1, false); + //cerr << TD::GetString(src) << " ||| " << TD::GetString(trg) << endl; + while(!q.empty()) { + const pair n = q.front(); + q.pop(); + if (done(n.first,n.second)) continue; + done(n.first,n.second) = true; + + float lp = ComputeBackwardProb(src, trg, n.first, n.second, s2t_ratio); + if (n.first == 0 && n.second == 0) grid[0] = lp; + //cerr << " " << n.first << "," << n.second << "\t" << lp << endl; + + if (n.first == src.size() || n.second == trg.size()) continue; + const vector >& edges = r.valid_deltas[n.first][n.second]; + for (int i = 0; i < edges.size(); ++i) + q.push(make_pair(n.first + edges[i].first, n.second + edges[i].second)); + } + //static int cc = 0; ++cc; if (cc == 80) exit(1); +} + diff --git a/gi/pf/backward.h b/gi/pf/backward.h new file mode 100644 index 00000000..e67eff0c --- /dev/null +++ b/gi/pf/backward.h @@ -0,0 +1,33 @@ +#ifndef _BACKWARD_H_ +#define _BACKWARD_H_ + +#include +#include +#include "wordid.h" + +struct Reachability; +struct Model1; + +struct BackwardEstimator { + BackwardEstimator(const std::string& s2t, + const std::string& t2s); + ~BackwardEstimator(); + + void InitializeGrid(const std::vector& src, + const std::vector& trg, + const Reachability& r, + double src2trg_ratio, + float* grid) const; + + private: + float ComputeBackwardProb(const std::vector& src, + const std::vector& trg, + unsigned src_covered, + unsigned trg_covered, + double src2trg_ratio) const; + + Model1* m1; + Model1* m1inv; +}; + +#endif diff --git a/gi/pf/base_distributions.h b/gi/pf/base_distributions.h index 0d597c5c..84dacdf2 100644 --- a/gi/pf/base_distributions.h +++ b/gi/pf/base_distributions.h @@ -14,13 +14,7 @@ #include "tdict.h" #include "sampler.h" #include "m.h" - -inline std::ostream& operator<<(std::ostream& os, const std::vector& p) { - os << '['; - for (int i = 0; i < p.size(); ++i) - os << (i==0 ? "" : " ") << TD::Convert(p[i]); - return os << ']'; -} +#include "os_phrase.h" struct Model1 { explicit Model1(const std::string& fname) : diff --git a/gi/pf/guess-translits.pl b/gi/pf/guess-translits.pl index aafec13a..d00c2168 100755 --- a/gi/pf/guess-translits.pl +++ b/gi/pf/guess-translits.pl @@ -69,4 +69,4 @@ for my $f (keys %fs) { } } print STDERR "Extracted $num pairs.\n"; -print STDERR "Recommend running:\n ../../training/model1 -t -99999 output.txt\n"; +print STDERR "Recommend running:\n ../../training/model1 -v -d -t -99999 output.txt\n"; diff --git a/gi/pf/nuisance_test.cc b/gi/pf/nuisance_test.cc index 0f44fe95..fc0af9cb 100644 --- a/gi/pf/nuisance_test.cc +++ b/gi/pf/nuisance_test.cc @@ -124,9 +124,9 @@ int main(int argc, char** argv) { WordID y = TD::Convert("remember"); vector src; TD::ConvertSentence("s o u v e n o n s", &src); vector trg; TD::ConvertSentence("r e m e m b e r", &trg); - Transliterations xx; - xx.Initialize(x, src, y, trg); - return 1; +// Transliterations xx; +// xx.Initialize(x, src, y, trg); +// return 1; for (int j = 0; j < ITERS; ++j) { Base b; diff --git a/gi/pf/pyp_lm.cc b/gi/pf/pyp_lm.cc index 104f356b..52e6be2c 100644 --- a/gi/pf/pyp_lm.cc +++ b/gi/pf/pyp_lm.cc @@ -18,7 +18,7 @@ // I use templates to handle the recursive formalation of the prior, so // the order of the model has to be specified here, at compile time: -#define kORDER 4 +#define kORDER 3 using namespace std; using namespace tr1; diff --git a/gi/pf/pyp_tm.cc b/gi/pf/pyp_tm.cc new file mode 100644 index 00000000..94cbe7c3 --- /dev/null +++ b/gi/pf/pyp_tm.cc @@ -0,0 +1,113 @@ +#include "pyp_tm.h" + +#include +#include +#include + +#include "base_distributions.h" +#include "monotonic_pseg.h" +#include "conditional_pseg.h" +#include "tdict.h" +#include "ccrp.h" +#include "pyp_word_model.h" + +using namespace std; +using namespace std::tr1; + +template +struct ConditionalPYPWordModel { + ConditionalPYPWordModel(Base* b) : base(*b) {} + + void Summary() const { + cerr << "Number of conditioning contexts: " << r.size() << endl; + for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) { + cerr << TD::Convert(it->first) << " \tPYP(d=" << it->second.discount() << ",s=" << it->second.strength() << ") --------------------------" << endl; + for (CCRP >::const_iterator i2 = it->second.begin(); i2 != it->second.end(); ++i2) + cerr << " " << i2->second.total_dish_count_ << '\t' << TD::GetString(i2->first) << endl; + } + } + + void ResampleHyperparameters(MT19937* rng) { + for (RuleModelHash::iterator it = r.begin(); it != r.end(); ++it) + it->second.resample_hyperparameters(rng); + } + + prob_t Prob(const WordID src, const vector& trglets) const { + RuleModelHash::const_iterator it = r.find(src); + if (it == r.end()) { + return base(trglets); + } else { + return it->second.prob(trglets, base(trglets)); + } + } + + void Increment(const WordID src, const vector& trglets, MT19937* rng) { + RuleModelHash::iterator it = r.find(src); + if (it == r.end()) + it = r.insert(make_pair(src, CCRP >(1,1,1,1,0.5,1.0))).first; + if (it->second.increment(trglets, base(trglets), rng)) + base.Increment(trglets, rng); + } + + void Decrement(const WordID src, const vector& trglets, MT19937* rng) { + RuleModelHash::iterator it = r.find(src); + assert(it != r.end()); + if (it->second.decrement(trglets, rng)) { + base.Decrement(trglets, rng); + if (it->second.num_customers() == 0) + r.erase(it); + } + } + + prob_t Likelihood() const { + prob_t p = prob_t::One(); + for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) { + prob_t q; q.logeq(it->second.log_crp_prob()); + p *= q; + } + return p; + } + + unsigned UniqueConditioningContexts() const { + return r.size(); + } + + Base& base; + typedef unordered_map > > RuleModelHash; + RuleModelHash r; +}; + +PYPLexicalTranslation::PYPLexicalTranslation(const vector >& lets, + const unsigned num_letters) : + letters(lets), + up0(new PYPWordModel(num_letters)), + tmodel(new ConditionalPYPWordModel(up0)), + kX(-TD::Convert("X")) {} + +prob_t PYPLexicalTranslation::Likelihood() const { + prob_t p = up0->Likelihood(); + p *= tmodel->Likelihood(); + return p; +} + +void PYPLexicalTranslation::ResampleHyperparameters(MT19937* rng) { + tmodel->ResampleHyperparameters(rng); + up0->ResampleHyperparameters(rng); +} + +unsigned PYPLexicalTranslation::UniqueConditioningContexts() const { + return tmodel->UniqueConditioningContexts(); +} + +prob_t PYPLexicalTranslation::Prob(WordID src, WordID trg) const { + return tmodel->Prob(src, letters[trg]); +} + +void PYPLexicalTranslation::Increment(WordID src, WordID trg, MT19937* rng) { + tmodel->Increment(src, letters[trg], rng); +} + +void PYPLexicalTranslation::Decrement(WordID src, WordID trg, MT19937* rng) { + tmodel->Decrement(src, letters[trg], rng); +} + diff --git a/gi/pf/pyp_tm.h b/gi/pf/pyp_tm.h new file mode 100644 index 00000000..fa0fb28f --- /dev/null +++ b/gi/pf/pyp_tm.h @@ -0,0 +1,34 @@ +#ifndef PYP_LEX_TRANS +#define PYP_LEX_TRANS + +#include +#include "wordid.h" +#include "prob.h" +#include "sampler.h" + +struct TRule; +struct PYPWordModel; +template struct ConditionalPYPWordModel; + +struct PYPLexicalTranslation { + explicit PYPLexicalTranslation(const std::vector >& lets, + const unsigned num_letters); + + prob_t Likelihood() const; + + void ResampleHyperparameters(MT19937* rng); + prob_t Prob(WordID src, WordID trg) const; // return p(trg | src) + void Summary() const; + void Increment(WordID src, WordID trg, MT19937* rng); + void Decrement(WordID src, WordID trg, MT19937* rng); + unsigned UniqueConditioningContexts() const; + + private: + const std::vector >& letters; // spelling dictionary + PYPWordModel* up0; // base distribuction (model English word) + ConditionalPYPWordModel* tmodel; // translation distributions + // (model English word | French word) + const WordID kX; +}; + +#endif diff --git a/gi/pf/pyp_word_model.cc b/gi/pf/pyp_word_model.cc new file mode 100644 index 00000000..12df4abf --- /dev/null +++ b/gi/pf/pyp_word_model.cc @@ -0,0 +1,20 @@ +#include "pyp_word_model.h" + +#include + +using namespace std; + +void PYPWordModel::ResampleHyperparameters(MT19937* rng) { + r.resample_hyperparameters(rng); + cerr << " PYPWordModel(d=" << r.discount() << ",s=" << r.strength() << ")\n"; +} + +void PYPWordModel::Summary() const { + cerr << "PYPWordModel: generations=" << r.num_customers() + << " PYP(d=" << r.discount() << ",s=" << r.strength() << ')' << endl; + for (CCRP >::const_iterator it = r.begin(); it != r.end(); ++it) + cerr << " " << it->second.total_dish_count_ + << " (on " << it->second.table_counts_.size() << " tables) " + << TD::GetString(it->first) << endl; +} + diff --git a/gi/pf/pyp_word_model.h b/gi/pf/pyp_word_model.h new file mode 100644 index 00000000..800a4fd7 --- /dev/null +++ b/gi/pf/pyp_word_model.h @@ -0,0 +1,58 @@ +#ifndef _PYP_WORD_MODEL_H_ +#define _PYP_WORD_MODEL_H_ + +#include +#include +#include +#include "prob.h" +#include "ccrp.h" +#include "m.h" +#include "tdict.h" +#include "os_phrase.h" + +// PYP(d,s,poisson-uniform) represented as a CRP +struct PYPWordModel { + explicit PYPWordModel(const unsigned vocab_e_size, const double mean_len = 7.5) : + base(prob_t::One()), r(1,1,1,1,0.66,50.0), u0(-std::log(vocab_e_size)), mean_length(mean_len) {} + + void ResampleHyperparameters(MT19937* rng); + + inline prob_t operator()(const std::vector& s) const { + return r.prob(s, p0(s)); + } + + inline void Increment(const std::vector& s, MT19937* rng) { + if (r.increment(s, p0(s), rng)) + base *= p0(s); + } + + inline void Decrement(const std::vector& s, MT19937 *rng) { + if (r.decrement(s, rng)) + base /= p0(s); + } + + inline prob_t Likelihood() const { + prob_t p; p.logeq(r.log_crp_prob()); + p *= base; + return p; + } + + void Summary() const; + + private: + inline double logp0(const std::vector& s) const { + return Md::log_poisson(s.size(), mean_length) + s.size() * u0; + } + + inline prob_t p0(const std::vector& s) const { + prob_t p; p.logeq(logp0(s)); + return p; + } + + prob_t base; // keeps track of the draws from the base distribution + CCRP > r; + const double u0; // uniform log prob of generating a letter + const double mean_length; // mean length of a word in the base distribution +}; + +#endif diff --git a/gi/pf/reachability.cc b/gi/pf/reachability.cc index c10000f2..7d0d04ac 100644 --- a/gi/pf/reachability.cc +++ b/gi/pf/reachability.cc @@ -12,7 +12,7 @@ struct SState { int prev_trg_covered; }; -void Reachability::ComputeReachability(int srclen, int trglen, int src_max_phrase_len, int trg_max_phrase_len, double filter_ratio) { +void Reachability::ComputeReachability(int srclen, int trglen, int src_max_phrase_len, int trg_max_phrase_len) { typedef boost::multi_array, 2> array_type; array_type a(boost::extents[srclen + 1][trglen + 1]); a[0][0].push_back(SState()); @@ -31,9 +31,9 @@ void Reachability::ComputeReachability(int srclen, int trglen, int src_max_phras } a[0][0].clear(); //cerr << srclen << "," << trglen << ": Final cell contains " << a[srclen][trglen].size() << " back pointers\n"; - size_t min_allowed = (src_max_phrase_len + 1) * (trg_max_phrase_len + 1) * (filter_ratio * filter_ratio); - if (a[srclen][trglen].size() < min_allowed) { - cerr << "Sequence pair with lengths (" << srclen << ',' << trglen << ") violates reachability constraint of min indegree " << min_allowed << " with " << a[srclen][trglen].size() << " in edges\n"; + if (a[srclen][trglen].empty()) { + cerr << "Sequence pair with lengths (" << srclen << ',' << trglen << ") violates reachability constraints\n"; + nodes = 0; return; } diff --git a/gi/pf/reachability.h b/gi/pf/reachability.h index 03967d44..1e22c76a 100644 --- a/gi/pf/reachability.h +++ b/gi/pf/reachability.h @@ -18,19 +18,17 @@ struct Reachability { boost::multi_array node_addresses; // na[src_covered][trg_covered] -- the index of the node in a one-dimensional array (of size "nodes") boost::multi_array >, 2> valid_deltas; // valid_deltas[src_covered][trg_covered] list of valid transitions leaving a particular node - // filter_ratio says if the number of outgoing edges from the first cell is less than - // src_max * trg_max * filter_rat^2 then mark as non reachable - Reachability(int srclen, int trglen, int src_max_phrase_len, int trg_max_phrase_len, double filter_ratio = 0.0) : + Reachability(int srclen, int trglen, int src_max_phrase_len, int trg_max_phrase_len) : nodes(), edges(boost::extents[srclen][trglen][src_max_phrase_len+1][trg_max_phrase_len+1]), max_src_delta(boost::extents[srclen][trglen]), node_addresses(boost::extents[srclen][trglen]), valid_deltas(boost::extents[srclen][trglen]) { - ComputeReachability(srclen, trglen, src_max_phrase_len, trg_max_phrase_len, filter_ratio); + ComputeReachability(srclen, trglen, src_max_phrase_len, trg_max_phrase_len); } private: - void ComputeReachability(int srclen, int trglen, int src_max_phrase_len, int trg_max_phrase_len, double filter_ratio); + void ComputeReachability(int srclen, int trglen, int src_max_phrase_len, int trg_max_phrase_len); }; #endif diff --git a/gi/pf/transliterations.cc b/gi/pf/transliterations.cc index 8ea4ebd2..2200715e 100644 --- a/gi/pf/transliterations.cc +++ b/gi/pf/transliterations.cc @@ -5,14 +5,173 @@ #include "boost/shared_ptr.hpp" +#include "backward.h" #include "filelib.h" -#include "ccrp.h" +#include "tdict.h" +#include "trule.h" +#include "filelib.h" +#include "ccrp_nt.h" #include "m.h" #include "reachability.h" using namespace std; using namespace std::tr1; +struct TruncatedConditionalLengthModel { + TruncatedConditionalLengthModel(unsigned max_src_size, unsigned max_trg_size, double expected_src_to_trg_ratio) : + plens(max_src_size+1, vector(max_trg_size+1, 0.0)) { + for (unsigned i = 1; i <= max_src_size; ++i) { + prob_t z = prob_t::Zero(); + for (unsigned j = 1; j <= max_trg_size; ++j) + z += (plens[i][j] = prob_t(0.01 + exp(Md::log_poisson(j, i * expected_src_to_trg_ratio)))); + for (unsigned j = 1; j <= max_trg_size; ++j) + plens[i][j] /= z; + //for (unsigned j = 1; j <= max_trg_size; ++j) + // cerr << "P(trg_len=" << j << " | src_len=" << i << ") = " << plens[i][j] << endl; + } + } + + // return p(tlen | slen) for *chunks* not full words + inline const prob_t& operator()(int slen, int tlen) const { + return plens[slen][tlen]; + } + + vector > plens; +}; + +struct CondBaseDist { + CondBaseDist(unsigned max_src_size, unsigned max_trg_size, double expected_src_to_trg_ratio) : + tclm(max_src_size, max_trg_size, expected_src_to_trg_ratio) {} + + prob_t operator()(const vector& src, unsigned sf, unsigned st, + const vector& trg, unsigned tf, unsigned tt) const { + prob_t p = tclm(st - sf, tt - tf); // target len | source length ~ TCLM(source len) + assert(!"not impl"); + return p; + } + inline prob_t operator()(const vector& src, const vector& trg) const { + return (*this)(src, 0, src.size(), trg, 0, trg.size()); + } + TruncatedConditionalLengthModel tclm; +}; + +// represents transliteration phrase probabilities, e.g. +// p( a l - | A l ) , p( o | A w ) , ... +struct TransliterationChunkConditionalModel { + explicit TransliterationChunkConditionalModel(const CondBaseDist& pp0) : + d(0.0), + strength(1.0), + rp0(pp0) { + } + + void Summary() const { + std::cerr << "Number of conditioning contexts: " << r.size() << std::endl; + for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) { + std::cerr << TD::GetString(it->first) << " \t(\\alpha = " << it->second.alpha() << ") --------------------------" << std::endl; + for (CCRP_NoTable::const_iterator i2 = it->second.begin(); i2 != it->second.end(); ++i2) + std::cerr << " " << i2->second << '\t' << i2->first << std::endl; + } + } + + int DecrementRule(const TRule& rule) { + RuleModelHash::iterator it = r.find(rule.f_); + assert(it != r.end()); + int count = it->second.decrement(rule); + if (count) { + if (it->second.num_customers() == 0) r.erase(it); + } + return count; + } + + int IncrementRule(const TRule& rule) { + RuleModelHash::iterator it = r.find(rule.f_); + if (it == r.end()) { + it = r.insert(make_pair(rule.f_, CCRP_NoTable(strength))).first; + } + int count = it->second.increment(rule); + return count; + } + + void IncrementRules(const std::vector& rules) { + for (int i = 0; i < rules.size(); ++i) + IncrementRule(*rules[i]); + } + + void DecrementRules(const std::vector& rules) { + for (int i = 0; i < rules.size(); ++i) + DecrementRule(*rules[i]); + } + + prob_t RuleProbability(const TRule& rule) const { + prob_t p; + RuleModelHash::const_iterator it = r.find(rule.f_); + if (it == r.end()) { + p = rp0(rule.f_, rule.e_); + } else { + p = it->second.prob(rule, rp0(rule.f_, rule.e_)); + } + return p; + } + + double LogLikelihood(const double& dd, const double& aa) const { + if (aa <= -dd) return -std::numeric_limits::infinity(); + //double llh = Md::log_beta_density(dd, 10, 3) + Md::log_gamma_density(aa, 1, 1); + double llh = //Md::log_beta_density(dd, 1, 1) + + Md::log_gamma_density(dd + aa, 1, 1); + typename std::tr1::unordered_map, CCRP_NoTable, boost::hash > >::const_iterator it; + for (it = r.begin(); it != r.end(); ++it) + llh += it->second.log_crp_prob(aa); + return llh; + } + + struct AlphaResampler { + AlphaResampler(const TransliterationChunkConditionalModel& m) : m_(m) {} + const TransliterationChunkConditionalModel& m_; + double operator()(const double& proposed_strength) const { + return m_.LogLikelihood(m_.d, proposed_strength); + } + }; + + void ResampleHyperparameters(MT19937* rng) { + typename std::tr1::unordered_map, CCRP_NoTable, boost::hash > >::iterator it; + //const unsigned nloop = 5; + const unsigned niterations = 10; + //DiscountResampler dr(*this); + AlphaResampler ar(*this); +#if 0 + for (int iter = 0; iter < nloop; ++iter) { + strength = slice_sampler1d(ar, strength, *rng, -d + std::numeric_limits::min(), + std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); + double min_discount = std::numeric_limits::min(); + if (strength < 0.0) min_discount -= strength; + d = slice_sampler1d(dr, d, *rng, min_discount, + 1.0, 0.0, niterations, 100*niterations); + } +#endif + strength = slice_sampler1d(ar, strength, *rng, -d, + std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); + std::cerr << "CTMModel(alpha=" << strength << ") = " << LogLikelihood(d, strength) << std::endl; + for (it = r.begin(); it != r.end(); ++it) { +#if 0 + it->second.set_discount(d); +#endif + it->second.set_alpha(strength); + } + } + + prob_t Likelihood() const { + prob_t p; p.logeq(LogLikelihood(d, strength)); + return p; + } + + const CondBaseDist& rp0; + typedef std::tr1::unordered_map, + CCRP_NoTable, + boost::hash > > RuleModelHash; + RuleModelHash r; + double d, strength; +}; + struct GraphStructure { GraphStructure() : r() {} // leak memory - these are basically static @@ -20,9 +179,9 @@ struct GraphStructure { bool IsReachable() const { return r->nodes > 0; } }; -struct BackwardEstimates { - BackwardEstimates() : gs(), backward() {} - explicit BackwardEstimates(const GraphStructure& g) : +struct ProbabilityEstimates { + ProbabilityEstimates() : gs(), backward() {} + explicit ProbabilityEstimates(const GraphStructure& g) : gs(&g), backward() { if (g.r->nodes > 0) backward = new float[g.r->nodes]; @@ -36,24 +195,32 @@ struct BackwardEstimates { } // returns an backward estimate - double operator()(int src_covered, int trg_covered) const { + double Backward(int src_covered, int trg_covered) const { if (!backward) return 0; int ind = gs->r->node_addresses[src_covered][trg_covered]; if (ind < 0) return 0; return backward[ind]; } + + prob_t estp; + float* backward; private: const GraphStructure* gs; - float* backward; }; struct TransliterationsImpl { - TransliterationsImpl(int max_src, int max_trg, double fr) : + TransliterationsImpl(int max_src, int max_trg, double sr, const BackwardEstimator& b) : + cp0(max_src, max_trg, sr), + tccm(cp0), + be(b), kMAX_SRC_CHUNK(max_src), kMAX_TRG_CHUNK(max_trg), - kFILTER_RATIO(fr), + kS2T_RATIO(sr), tot_pairs(), tot_mem() { } + const CondBaseDist cp0; + TransliterationChunkConditionalModel tccm; + const BackwardEstimator& be; void Initialize(WordID src, const vector& src_lets, WordID trg, const vector& trg_lets) { const size_t src_len = src_lets.size(); @@ -63,20 +230,29 @@ struct TransliterationsImpl { if (src_len >= graphs.size()) graphs.resize(src_len + 1); if (trg_len >= graphs[src_len].size()) graphs[src_len].resize(trg_len + 1); GraphStructure& gs = graphs[src_len][trg_len]; - if (!gs.r) - gs.r = new Reachability(src_len, trg_len, kMAX_SRC_CHUNK, kMAX_TRG_CHUNK, kFILTER_RATIO); + if (!gs.r) { + double rat = exp(fabs(log(trg_len / (src_len * kS2T_RATIO)))); + if (rat > 1.5 || (rat > 2.4 && src_len < 6)) { + cerr << " ** Forbidding transliterations of size " << src_len << "," << trg_len << ": " << rat << endl; + gs.r = new Reachability(src_len, trg_len, 0, 0); + } else { + gs.r = new Reachability(src_len, trg_len, kMAX_SRC_CHUNK, kMAX_TRG_CHUNK); + } + } + const Reachability& r = *gs.r; // init backward estimates - if (src >= bes.size()) bes.resize(src + 1); - unordered_map::iterator it = bes[src].find(trg); - if (it != bes[src].end()) return; // already initialized + if (src >= ests.size()) ests.resize(src + 1); + unordered_map::iterator it = ests[src].find(trg); + if (it != ests[src].end()) return; // already initialized - it = bes[src].insert(make_pair(trg, BackwardEstimates(gs))).first; - BackwardEstimates& b = it->second; + it = ests[src].insert(make_pair(trg, ProbabilityEstimates(gs))).first; + ProbabilityEstimates& est = it->second; if (!gs.r->nodes) return; // not derivable subject to length constraints - // TODO + be.InitializeGrid(src_lets, trg_lets, r, kS2T_RATIO, est.backward); + cerr << TD::GetString(src_lets) << " ||| " << TD::GetString(trg_lets) << " ||| " << (est.backward[0] / trg_lets.size()) << endl; tot_pairs++; tot_mem += sizeof(float) * gs.r->nodes; } @@ -92,8 +268,11 @@ struct TransliterationsImpl { const vector& tv = graphs[src.size()]; assert(trg.size() < tv.size()); const GraphStructure& gs = tv[trg.size()]; - // TODO: do prob - return prob_t::Zero(); + if (gs.r->nodes == 0) + return prob_t::Zero(); + const unordered_map::const_iterator it = ests[s].find(t); + assert(it != ests[s].end()); + return it->second.estp; } void GraphSummary() const { @@ -126,15 +305,15 @@ struct TransliterationsImpl { const int kMAX_SRC_CHUNK; const int kMAX_TRG_CHUNK; - const double kFILTER_RATIO; + const double kS2T_RATIO; unsigned tot_pairs; size_t tot_mem; vector > graphs; // graphs[src_len][trg_len] - vector > bes; // bes[src][trg] + vector > ests; // ests[src][trg] }; -Transliterations::Transliterations(int max_src, int max_trg, double fr) : - pimpl_(new TransliterationsImpl(max_src, max_trg, fr)) {} +Transliterations::Transliterations(int max_src, int max_trg, double sr, const BackwardEstimator& be) : + pimpl_(new TransliterationsImpl(max_src, max_trg, sr, be)) {} Transliterations::~Transliterations() { delete pimpl_; } void Transliterations::Initialize(WordID src, const vector& src_lets, WordID trg, const vector& trg_lets) { diff --git a/gi/pf/transliterations.h b/gi/pf/transliterations.h index ea9f9d3f..49d14684 100644 --- a/gi/pf/transliterations.h +++ b/gi/pf/transliterations.h @@ -5,11 +5,12 @@ #include "wordid.h" #include "prob.h" +struct BackwardEstimator; struct TransliterationsImpl; struct Transliterations { // max_src and max_trg indicate how big the transliteration phrases can be // see reachability.h for information about filter_ratio - explicit Transliterations(int max_src, int max_trg, double filter_ratio); + explicit Transliterations(int max_src, int max_trg, double s2t_rat, const BackwardEstimator& be); ~Transliterations(); void Initialize(WordID src, const std::vector& src_lets, WordID trg, const std::vector& trg_lets); void Forbid(WordID src, const std::vector& src_lets, WordID trg, const std::vector& trg_lets); diff --git a/utils/ccrp_nt.h b/utils/ccrp_nt.h index 79321493..6efbfc78 100644 --- a/utils/ccrp_nt.h +++ b/utils/ccrp_nt.h @@ -11,6 +11,7 @@ #include #include "sampler.h" #include "slice_sampler.h" +#include "m.h" // Chinese restaurant process (1 parameter) template > @@ -29,6 +30,7 @@ class CCRP_NoTable { alpha_prior_rate_(c_rate) {} double alpha() const { return alpha_; } + void set_alpha(const double& alpha) { alpha_ = alpha; assert(alpha_ > 0.0); } bool has_alpha_prior() const { return !std::isnan(alpha_prior_shape_); @@ -71,9 +73,10 @@ class CCRP_NoTable { return table_diff; } - double prob(const Dish& dish, const double& p0) const { + template + F prob(const Dish& dish, const F& p0) const { const unsigned at_table = num_customers(dish); - return (at_table + p0 * alpha_) / (num_customers_ + alpha_); + return (F(at_table) + p0 * F(alpha_)) / F(num_customers_ + alpha_); } double logprob(const Dish& dish, const double& logp0) const { @@ -85,20 +88,12 @@ class CCRP_NoTable { return log_crp_prob(alpha_); } - static double log_gamma_density(const double& x, const double& shape, const double& rate) { - assert(x >= 0.0); - assert(shape > 0.0); - assert(rate > 0.0); - const double lp = (shape-1)*log(x) - shape*log(rate) - x/rate - lgamma(shape); - return lp; - } - // taken from http://en.wikipedia.org/wiki/Chinese_restaurant_process // does not include P_0's double log_crp_prob(const double& alpha) const { double lp = 0.0; if (has_alpha_prior()) - lp += log_gamma_density(alpha, alpha_prior_shape_, alpha_prior_rate_); + lp += Md::log_gamma_density(alpha, alpha_prior_shape_, alpha_prior_rate_); assert(lp <= 0.0); if (num_customers_) { lp += lgamma(alpha) - lgamma(alpha + num_customers_) + -- cgit v1.2.3 From ef614a1d968aebbf463ed57876fee395b4c24635 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Fri, 9 Mar 2012 23:13:09 -0500 Subject: logging after alignment --- gi/pf/align-lexonly-pyp.cc | 1 + gi/pf/pyp_tm.cc | 7 +++++-- gi/pf/pyp_word_model.h | 2 +- utils/ccrp.h | 1 + 4 files changed, 8 insertions(+), 3 deletions(-) (limited to 'utils') diff --git a/gi/pf/align-lexonly-pyp.cc b/gi/pf/align-lexonly-pyp.cc index d68a4b8f..4a1d1db6 100644 --- a/gi/pf/align-lexonly-pyp.cc +++ b/gi/pf/align-lexonly-pyp.cc @@ -208,6 +208,7 @@ int main(int argc, char** argv) { } for (unsigned i = 0; i < corpus.size(); ++i) WriteAlignments(corpus[i]); + aligner.model.Summary(); return 0; } diff --git a/gi/pf/pyp_tm.cc b/gi/pf/pyp_tm.cc index 94cbe7c3..b5262f47 100644 --- a/gi/pf/pyp_tm.cc +++ b/gi/pf/pyp_tm.cc @@ -54,8 +54,6 @@ struct ConditionalPYPWordModel { assert(it != r.end()); if (it->second.decrement(trglets, rng)) { base.Decrement(trglets, rng); - if (it->second.num_customers() == 0) - r.erase(it); } } @@ -84,6 +82,11 @@ PYPLexicalTranslation::PYPLexicalTranslation(const vector >& lets tmodel(new ConditionalPYPWordModel(up0)), kX(-TD::Convert("X")) {} +void PYPLexicalTranslation::Summary() const { + tmodel->Summary(); + up0->Summary(); +} + prob_t PYPLexicalTranslation::Likelihood() const { prob_t p = up0->Likelihood(); p *= tmodel->Likelihood(); diff --git a/gi/pf/pyp_word_model.h b/gi/pf/pyp_word_model.h index 800a4fd7..ff366865 100644 --- a/gi/pf/pyp_word_model.h +++ b/gi/pf/pyp_word_model.h @@ -12,7 +12,7 @@ // PYP(d,s,poisson-uniform) represented as a CRP struct PYPWordModel { - explicit PYPWordModel(const unsigned vocab_e_size, const double mean_len = 7.5) : + explicit PYPWordModel(const unsigned vocab_e_size, const double mean_len = 5) : base(prob_t::One()), r(1,1,1,1,0.66,50.0), u0(-std::log(vocab_e_size)), mean_length(mean_len) {} void ResampleHyperparameters(MT19937* rng); diff --git a/utils/ccrp.h b/utils/ccrp.h index 439d7e1e..4a8b80e7 100644 --- a/utils/ccrp.h +++ b/utils/ccrp.h @@ -221,6 +221,7 @@ class CCRP { void resample_hyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) { assert(has_discount_prior() || has_strength_prior()); + if (num_customers() == 0) return; DiscountResampler dr(*this); StrengthResampler sr(*this); for (int iter = 0; iter < nloop; ++iter) { -- cgit v1.2.3