From 0acc92a0eecf04a2c429f6f7685bfcaa68c7ec3a Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 11 Oct 2011 12:06:32 +0100 Subject: check in some experimental particle filtering code, some gitignore fixes --- gi/pf/cbgi.cc | 340 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 340 insertions(+) create mode 100644 gi/pf/cbgi.cc (limited to 'gi/pf/cbgi.cc') diff --git a/gi/pf/cbgi.cc b/gi/pf/cbgi.cc new file mode 100644 index 00000000..20204e8a --- /dev/null +++ b/gi/pf/cbgi.cc @@ -0,0 +1,340 @@ +#include +#include +#include + +#include +#include + +#include "sampler.h" +#include "filelib.h" +#include "hg_io.h" +#include "hg.h" +#include "ccrp_nt.h" +#include "trule.h" +#include "inside_outside.h" + +using namespace std; +using namespace std::tr1; + +double log_poisson(unsigned x, const double& lambda) { + assert(lambda > 0.0); + return log(lambda) * x - lgamma(x + 1) - lambda; +} + +double log_decay(unsigned x, const double& b) { + assert(b > 1.0); + assert(x > 0); + return log(b - 1) - x * log(b); +} + +size_t hash_value(const TRule& r) { + // TODO fix hash function + size_t h = boost::hash_value(r.e_) * boost::hash_value(r.f_) * r.lhs_; + return h; +} + +bool operator==(const TRule& a, const TRule& b) { + return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_); +} + +struct SimpleBase { + SimpleBase(unsigned esize, unsigned fsize, unsigned ntsize = 144) : + uniform_e(-log(esize)), + uniform_f(-log(fsize)), + uniform_nt(-log(ntsize)) { + } + + // binomial coefficient + static double choose(unsigned n, unsigned k) { + return exp(lgamma(n + 1) - lgamma(k + 1) - lgamma(n - k + 1)); + } + + // count the number of patterns of terminals and NTs in the rule, given elen and flen + static double log_number_of_patterns(const unsigned flen, const unsigned elen) { + static vector > counts; + if (elen >= counts.size()) counts.resize(elen + 1); + if (flen >= counts[elen].size()) counts[elen].resize(flen + 1); + double& count = counts[elen][flen]; + if (count) return log(count); + const unsigned max_arity = min(elen, flen); + for (unsigned a = 0; a <= max_arity; ++a) + count += choose(elen, a) * choose(flen, a); + return log(count); + } + + // return logp0 of rule | LHS + double operator()(const TRule& rule) const { + const unsigned flen = rule.f_.size(); + const unsigned elen = rule.e_.size(); +#if 0 + double p = 0; + p += log_poisson(flen, 0.5); // flen ~Pois(0.5) + p += log_poisson(elen, flen); // elen | flen ~Pois(flen) + p -= log_number_of_patterns(flen, elen); // pattern | flen,elen ~Uniform + for (unsigned i = 0; i < flen; ++i) { // for each position in f-RHS + if (rule.f_[i] <= 0) // according to pattern + p += uniform_nt; // draw NT ~Uniform + else + p += uniform_f; // draw f terminal ~Uniform + } + p -= lgamma(rule.Arity() + 1); // draw permutation ~Uniform + for (unsigned i = 0; i < elen; ++i) { // for each position in e-RHS + if (rule.e_[i] > 0) // according to pattern + p += uniform_e; // draw e|f term ~Uniform + // TODO this should prob be model 1 + } +#else + double p = 0; + bool is_abstract = rule.f_[0] <= 0; + p += log(0.5); + if (is_abstract) { + if (flen == 2) p += log(0.99); else p += log(0.01); + } else { + p += log_decay(flen, 3); + } + + for (unsigned i = 0; i < flen; ++i) { // for each position in f-RHS + if (rule.f_[i] <= 0) // according to pattern + p += uniform_nt; // draw NT ~Uniform + else + p += uniform_f; // draw f terminal ~Uniform + } +#endif + return p; + } + const double uniform_e; + const double uniform_f; + const double uniform_nt; + vector arities; +}; + +MT19937* rng = NULL; + +template +struct MHSamplerEdgeProb { + MHSamplerEdgeProb(const Hypergraph& hg, + const map >& rdp, + const Base& logp0, + const bool exclude_multiword_terminals) : edge_probs(hg.edges_.size()) { + for (int i = 0; i < edge_probs.size(); ++i) { + const TRule& rule = *hg.edges_[i].rule_; + const map >::const_iterator it = rdp.find(rule.lhs_); + assert(it != rdp.end()); + const CCRP_NoTable& crp = it->second; + edge_probs[i].logeq(crp.logprob(rule, logp0(rule))); + if (exclude_multiword_terminals && rule.f_[0] > 0 && rule.f_.size() > 1) + edge_probs[i] = prob_t::Zero(); + } + } + inline prob_t operator()(const Hypergraph::Edge& e) const { + return edge_probs[e.id_]; + } + prob_t DerivationProb(const vector& d) const { + prob_t p = prob_t::One(); + for (unsigned i = 0; i < d.size(); ++i) + p *= edge_probs[d[i]]; + return p; + } + vector edge_probs; +}; + +template +struct ModelAndData { + ModelAndData() : + base_lh(prob_t::One()), + logp0(10000, 10000), + mh_samples(), + mh_rejects() {} + + void SampleCorpus(const string& hgpath, int i); + void ResampleHyperparameters() { + for (map >::iterator it = rules.begin(); it != rules.end(); ++it) + it->second.resample_hyperparameters(rng); + } + + CCRP_NoTable& RuleCRP(int lhs) { + map >::iterator it = rules.find(lhs); + if (it == rules.end()) { + rules.insert(make_pair(lhs, CCRP_NoTable(1,1))); + it = rules.find(lhs); + } + return it->second; + } + + void IncrementRule(const TRule& rule) { + CCRP_NoTable& crp = RuleCRP(rule.lhs_); + if (crp.increment(rule)) { + prob_t p; p.logeq(logp0(rule)); + base_lh *= p; + } + } + + void DecrementRule(const TRule& rule) { + CCRP_NoTable& crp = RuleCRP(rule.lhs_); + if (crp.decrement(rule)) { + prob_t p; p.logeq(logp0(rule)); + base_lh /= p; + } + } + + void DecrementDerivation(const Hypergraph& hg, const vector& d) { + for (unsigned i = 0; i < d.size(); ++i) { + const TRule& rule = *hg.edges_[d[i]].rule_; + DecrementRule(rule); + } + } + + void IncrementDerivation(const Hypergraph& hg, const vector& d) { + for (unsigned i = 0; i < d.size(); ++i) { + const TRule& rule = *hg.edges_[d[i]].rule_; + IncrementRule(rule); + } + } + + prob_t Likelihood() const { + prob_t p = prob_t::One(); + for (map >::const_iterator it = rules.begin(); it != rules.end(); ++it) { + prob_t q; q.logeq(it->second.log_crp_prob()); + p *= q; + } + p *= base_lh; + return p; + } + + void ResampleDerivation(const Hypergraph& hg, vector* sampled_derivation); + + map > rules; // [lhs] -> distribution over RHSs + prob_t base_lh; + SimpleBase logp0; + vector > samples; // sampled derivations + unsigned int mh_samples; + unsigned int mh_rejects; +}; + +template +void ModelAndData::SampleCorpus(const string& hgpath, int n) { + vector hgs(n); hgs.clear(); + boost::unordered_map acc; + map tot; + for (int i = 0; i < n; ++i) { + ostringstream os; + os << hgpath << '/' << i << ".json.gz"; + if (!FileExists(os.str())) continue; + hgs.push_back(Hypergraph()); + ReadFile rf(os.str()); + HypergraphIO::ReadFromJSON(rf.stream(), &hgs.back()); + } + cerr << "Read " << hgs.size() << " alignment hypergraphs.\n"; + samples.resize(hgs.size()); + const unsigned SAMPLES = 2000; + const unsigned burnin = 3 * SAMPLES / 4; + const unsigned every = 20; + for (unsigned s = 0; s < SAMPLES; ++s) { + if (s % 10 == 0) { + if (s > 0) { cerr << endl; ResampleHyperparameters(); } + cerr << "[" << s << " LLH=" << log(Likelihood()) << " REJECTS=" << ((double)mh_rejects / mh_samples) << " LHS's=" << rules.size() << " base=" << log(base_lh) << "] "; + } + cerr << '.'; + for (unsigned i = 0; i < hgs.size(); ++i) { + ResampleDerivation(hgs[i], &samples[i]); + if (s > burnin && s % every == 0) { + for (unsigned j = 0; j < samples[i].size(); ++j) { + const TRule& rule = *hgs[i].edges_[samples[i][j]].rule_; + ++acc[rule]; + ++tot[rule.lhs_]; + } + } + } + } + cerr << endl; + for (boost::unordered_map::iterator it = acc.begin(); it != acc.end(); ++it) { + cout << it->first << " MyProb=" << log(it->second)-log(tot[it->first.lhs_]) << endl; + } +} + +template +void ModelAndData::ResampleDerivation(const Hypergraph& hg, vector* sampled_deriv) { + vector cur; + cur.swap(*sampled_deriv); + + const prob_t p_cur = Likelihood(); + DecrementDerivation(hg, cur); + if (cur.empty()) { + // first iteration, create restaurants + for (int i = 0; i < hg.edges_.size(); ++i) + RuleCRP(hg.edges_[i].rule_->lhs_); + } + MHSamplerEdgeProb wf(hg, rules, logp0, cur.empty()); +// MHSamplerEdgeProb wf(hg, rules, logp0, false); + const prob_t q_cur = wf.DerivationProb(cur); + vector node_probs; + Inside >(hg, &node_probs, wf); + queue q; + q.push(hg.nodes_.size() - 3); + while(!q.empty()) { + unsigned cur_node_id = q.front(); +// cerr << "NODE=" << cur_node_id << endl; + q.pop(); + const Hypergraph::Node& node = hg.nodes_[cur_node_id]; + const unsigned num_in_edges = node.in_edges_.size(); + unsigned sampled_edge = 0; + if (num_in_edges == 1) { + sampled_edge = node.in_edges_[0]; + } else { + prob_t z; + assert(num_in_edges > 1); + SampleSet ss; + for (unsigned j = 0; j < num_in_edges; ++j) { + const Hypergraph::Edge& edge = hg.edges_[node.in_edges_[j]]; + prob_t p = wf.edge_probs[edge.id_]; // edge proposal prob + for (unsigned k = 0; k < edge.tail_nodes_.size(); ++k) + p *= node_probs[edge.tail_nodes_[k]]; + ss.add(p); +// cerr << log(ss[j]) << " ||| " << edge.rule_->AsString() << endl; + z += p; + } +// for (unsigned j = 0; j < num_in_edges; ++j) { +// const Hypergraph::Edge& edge = hg.edges_[node.in_edges_[j]]; +// cerr << exp(log(ss[j] / z)) << " ||| " << edge.rule_->AsString() << endl; +// } +// cerr << " --- \n"; + sampled_edge = node.in_edges_[rng->SelectSample(ss)]; + } + sampled_deriv->push_back(sampled_edge); + const Hypergraph::Edge& edge = hg.edges_[sampled_edge]; + for (unsigned j = 0; j < edge.tail_nodes_.size(); ++j) { + q.push(edge.tail_nodes_[j]); + } + } + IncrementDerivation(hg, *sampled_deriv); + +// cerr << "sampled derivation contains " << sampled_deriv->size() << " edges\n"; +// cerr << "DERIV:\n"; +// for (int i = 0; i < sampled_deriv->size(); ++i) { +// cerr << " " << hg.edges_[(*sampled_deriv)[i]].rule_->AsString() << endl; +// } + + if (cur.empty()) return; // accept first sample + + ++mh_samples; + // only need to do MH if proposal is different to current state + if (cur != *sampled_deriv) { + const prob_t q_prop = wf.DerivationProb(*sampled_deriv); + const prob_t p_prop = Likelihood(); + if (!rng->AcceptMetropolisHastings(p_prop, p_cur, q_prop, q_cur)) { + ++mh_rejects; + DecrementDerivation(hg, *sampled_deriv); + IncrementDerivation(hg, cur); + swap(cur, *sampled_deriv); + } + } +} + +int main(int argc, char** argv) { + rng = new MT19937; + ModelAndData m; + m.SampleCorpus("./hgs", 50); + // m.SampleCorpus("./btec/hgs", 5000); + return 0; +} + -- cgit v1.2.3 From 0af7d663194beddcde420349bbd91430e0b2e423 Mon Sep 17 00:00:00 2001 From: Guest_account Guest_account prguest11 Date: Tue, 11 Oct 2011 16:16:53 +0100 Subject: remove implicit conversion-to-double operator from LogVal that caused overflow errors, clean up some pf code --- decoder/aligner.cc | 2 +- decoder/cfg.cc | 2 +- decoder/cfg_format.h | 2 +- decoder/decoder.cc | 10 ++++---- decoder/hg.cc | 4 ++-- decoder/rule_lexer.l | 2 ++ decoder/trule.h | 15 +++++++++++- gi/pf/brat.cc | 11 --------- gi/pf/cbgi.cc | 10 -------- gi/pf/dpnaive.cc | 12 ---------- gi/pf/itg.cc | 11 --------- gi/pf/pfbrat.cc | 11 --------- gi/pf/pfdist.cc | 11 --------- gi/pf/pfnaive.cc | 11 --------- mteval/mbr_kbest.cc | 4 ++-- phrasinator/ccrp_nt.h | 24 +++++++++++++++---- training/mpi_batch_optimize.cc | 2 +- training/mpi_compute_cllh.cc | 51 +++++++++++++++++++---------------------- training/mpi_online_optimize.cc | 4 ++-- utils/logval.h | 10 ++++---- 20 files changed, 78 insertions(+), 131 deletions(-) (limited to 'gi/pf/cbgi.cc') diff --git a/decoder/aligner.cc b/decoder/aligner.cc index 292ee123..53e059fb 100644 --- a/decoder/aligner.cc +++ b/decoder/aligner.cc @@ -165,7 +165,7 @@ inline void WriteProbGrid(const Array2D& m, ostream* pos) { if (m(i,j) == prob_t::Zero()) { os << "\t---X---"; } else { - snprintf(b, 1024, "%0.5f", static_cast(m(i,j))); + snprintf(b, 1024, "%0.5f", m(i,j).as_float()); os << '\t' << b; } } diff --git a/decoder/cfg.cc b/decoder/cfg.cc index 651978d2..cd7e66e9 100755 --- a/decoder/cfg.cc +++ b/decoder/cfg.cc @@ -639,7 +639,7 @@ void CFG::Print(std::ostream &o,CFGFormat const& f) const { o << '['<& src, SparseVector* trg) { for (SparseVector::const_iterator it = src.begin(); it != src.end(); ++it) - trg->set_value(it->first, it->second); + trg->set_value(it->first, it->second.as_float()); } }; @@ -788,10 +788,10 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) { const bool show_tree_structure=conf.count("show_tree_structure"); if (!SILENT) forest_stats(forest," Init. forest",show_tree_structure,oracle.show_derivation); if (conf.count("show_expected_length")) { - const PRPair res = - Inside, - PRWeightFunction >(forest); - cerr << " Expected length (words): " << res.r / res.p << "\t" << res << endl; + const PRPair res = + Inside, + PRWeightFunction >(forest); + cerr << " Expected length (words): " << (res.r / res.p).as_float() << "\t" << res << endl; } if (conf.count("show_partition")) { diff --git a/decoder/hg.cc b/decoder/hg.cc index 3ad17f1a..180986d7 100644 --- a/decoder/hg.cc +++ b/decoder/hg.cc @@ -157,14 +157,14 @@ prob_t Hypergraph::ComputeEdgePosteriors(double scale, vector* posts) co const ScaledEdgeProb weight(scale); const ScaledTransitionEventWeightFunction w2(scale); SparseVector pv; - const double inside = InsideOutside, ScaledTransitionEventWeightFunction>(*this, &pv, weight, w2); posts->resize(edges_.size()); for (int i = 0; i < edges_.size(); ++i) (*posts)[i] = prob_t(pv.value(i)); - return prob_t(inside); + return inside; } prob_t Hypergraph::ComputeBestPathThroughEdges(vector* post) const { diff --git a/decoder/rule_lexer.l b/decoder/rule_lexer.l index 9331d8ed..083a5bb1 100644 --- a/decoder/rule_lexer.l +++ b/decoder/rule_lexer.l @@ -220,6 +220,8 @@ NT [^\t \[\],]+ std::cerr << "Line " << lex_line << ": LHS and RHS arity mismatch!\n"; abort(); } + // const bool ignore_grammar_features = false; + // if (ignore_grammar_features) scfglex_num_feats = 0; TRulePtr rp(new TRule(scfglex_lhs, scfglex_src_rhs, scfglex_src_rhs_size, scfglex_trg_rhs, scfglex_trg_rhs_size, scfglex_feat_ids, scfglex_feat_vals, scfglex_num_feats, scfglex_src_arity, scfglex_als, scfglex_num_als)); check_and_update_ctf_stack(rp); TRulePtr coarse_rp = ((ctf_level == 0) ? TRulePtr() : ctf_rule_stack.top()); diff --git a/decoder/trule.h b/decoder/trule.h index 4df4ec90..8eb2a059 100644 --- a/decoder/trule.h +++ b/decoder/trule.h @@ -5,7 +5,9 @@ #include #include #include -#include + +#include "boost/shared_ptr.hpp" +#include "boost/functional/hash.hpp" #include "sparse_vector.h" #include "wordid.h" @@ -162,4 +164,15 @@ class TRule { bool SanityCheck() const; }; +inline size_t hash_value(const TRule& r) { + size_t h = boost::hash_value(r.e_); + boost::hash_combine(h, -r.lhs_); + boost::hash_combine(h, boost::hash_value(r.f_)); + return h; +} + +inline bool operator==(const TRule& a, const TRule& b) { + return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_); +} + #endif diff --git a/gi/pf/brat.cc b/gi/pf/brat.cc index 4c6ba3ef..7b60ef23 100644 --- a/gi/pf/brat.cc +++ b/gi/pf/brat.cc @@ -25,17 +25,6 @@ static unsigned kMAX_SRC_PHRASE; static unsigned kMAX_TRG_PHRASE; struct FSTState; -size_t hash_value(const TRule& r) { - size_t h = 2 - r.lhs_; - boost::hash_combine(h, boost::hash_value(r.e_)); - boost::hash_combine(h, boost::hash_value(r.f_)); - return h; -} - -bool operator==(const TRule& a, const TRule& b) { - return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_); -} - double log_poisson(unsigned x, const double& lambda) { assert(lambda > 0.0); return log(lambda) * x - lgamma(x + 1) - lambda; diff --git a/gi/pf/cbgi.cc b/gi/pf/cbgi.cc index 20204e8a..97f1ba34 100644 --- a/gi/pf/cbgi.cc +++ b/gi/pf/cbgi.cc @@ -27,16 +27,6 @@ double log_decay(unsigned x, const double& b) { return log(b - 1) - x * log(b); } -size_t hash_value(const TRule& r) { - // TODO fix hash function - size_t h = boost::hash_value(r.e_) * boost::hash_value(r.f_) * r.lhs_; - return h; -} - -bool operator==(const TRule& a, const TRule& b) { - return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_); -} - struct SimpleBase { SimpleBase(unsigned esize, unsigned fsize, unsigned ntsize = 144) : uniform_e(-log(esize)), diff --git a/gi/pf/dpnaive.cc b/gi/pf/dpnaive.cc index 582d1be7..608f73d5 100644 --- a/gi/pf/dpnaive.cc +++ b/gi/pf/dpnaive.cc @@ -20,18 +20,6 @@ namespace po = boost::program_options; static unsigned kMAX_SRC_PHRASE; static unsigned kMAX_TRG_PHRASE; -struct FSTState; - -size_t hash_value(const TRule& r) { - size_t h = 2 - r.lhs_; - boost::hash_combine(h, boost::hash_value(r.e_)); - boost::hash_combine(h, boost::hash_value(r.f_)); - return h; -} - -bool operator==(const TRule& a, const TRule& b) { - return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_); -} void InitCommandLine(int argc, char** argv, po::variables_map* conf) { po::options_description opts("Configuration options"); diff --git a/gi/pf/itg.cc b/gi/pf/itg.cc index 2c2a86f9..ac3c16a3 100644 --- a/gi/pf/itg.cc +++ b/gi/pf/itg.cc @@ -27,17 +27,6 @@ ostream& operator<<(ostream& os, const vector& p) { return os << ']'; } -size_t hash_value(const TRule& r) { - size_t h = boost::hash_value(r.e_); - boost::hash_combine(h, -r.lhs_); - boost::hash_combine(h, boost::hash_value(r.f_)); - return h; -} - -bool operator==(const TRule& a, const TRule& b) { - return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_); -} - double log_poisson(unsigned x, const double& lambda) { assert(lambda > 0.0); return log(lambda) * x - lgamma(x + 1) - lambda; diff --git a/gi/pf/pfbrat.cc b/gi/pf/pfbrat.cc index 4c6ba3ef..7b60ef23 100644 --- a/gi/pf/pfbrat.cc +++ b/gi/pf/pfbrat.cc @@ -25,17 +25,6 @@ static unsigned kMAX_SRC_PHRASE; static unsigned kMAX_TRG_PHRASE; struct FSTState; -size_t hash_value(const TRule& r) { - size_t h = 2 - r.lhs_; - boost::hash_combine(h, boost::hash_value(r.e_)); - boost::hash_combine(h, boost::hash_value(r.f_)); - return h; -} - -bool operator==(const TRule& a, const TRule& b) { - return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_); -} - double log_poisson(unsigned x, const double& lambda) { assert(lambda > 0.0); return log(lambda) * x - lgamma(x + 1) - lambda; diff --git a/gi/pf/pfdist.cc b/gi/pf/pfdist.cc index 18dfd03b..81abd61b 100644 --- a/gi/pf/pfdist.cc +++ b/gi/pf/pfdist.cc @@ -24,17 +24,6 @@ namespace po = boost::program_options; shared_ptr prng; -size_t hash_value(const TRule& r) { - size_t h = boost::hash_value(r.e_); - boost::hash_combine(h, -r.lhs_); - boost::hash_combine(h, boost::hash_value(r.f_)); - return h; -} - -bool operator==(const TRule& a, const TRule& b) { - return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_); -} - void InitCommandLine(int argc, char** argv, po::variables_map* conf) { po::options_description opts("Configuration options"); opts.add_options() diff --git a/gi/pf/pfnaive.cc b/gi/pf/pfnaive.cc index 43c604c3..c30e7c4f 100644 --- a/gi/pf/pfnaive.cc +++ b/gi/pf/pfnaive.cc @@ -24,17 +24,6 @@ namespace po = boost::program_options; shared_ptr prng; -size_t hash_value(const TRule& r) { - size_t h = boost::hash_value(r.e_); - boost::hash_combine(h, -r.lhs_); - boost::hash_combine(h, boost::hash_value(r.f_)); - return h; -} - -bool operator==(const TRule& a, const TRule& b) { - return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_); -} - void InitCommandLine(int argc, char** argv, po::variables_map* conf) { po::options_description opts("Configuration options"); opts.add_options() diff --git a/mteval/mbr_kbest.cc b/mteval/mbr_kbest.cc index 2867b36b..64a6a8bf 100644 --- a/mteval/mbr_kbest.cc +++ b/mteval/mbr_kbest.cc @@ -32,7 +32,7 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) { } struct LossComparer { - bool operator()(const pair, double>& a, const pair, double>& b) const { + bool operator()(const pair, prob_t>& a, const pair, prob_t>& b) const { return a.second < b.second; } }; @@ -108,7 +108,7 @@ int main(int argc, char** argv) { ScoreP s = scorer->ScoreCandidate(list[j].first); double loss = 1.0 - s->ComputeScore(); if (type == TER || type == AER) loss = 1.0 - loss; - double weighted_loss = loss * (joints[j] / marginal); + double weighted_loss = loss * (joints[j] / marginal).as_float(); wl_acc += weighted_loss; if ((!output_list) && wl_acc > mbr_loss) break; } diff --git a/phrasinator/ccrp_nt.h b/phrasinator/ccrp_nt.h index 163b643a..811bce73 100644 --- a/phrasinator/ccrp_nt.h +++ b/phrasinator/ccrp_nt.h @@ -50,15 +50,26 @@ class CCRP_NoTable { return it->second; } - void increment(const Dish& dish) { - ++custs_[dish]; + int increment(const Dish& dish) { + int table_diff = 0; + if (++custs_[dish] == 1) + table_diff = 1; ++num_customers_; + return table_diff; } - void decrement(const Dish& dish) { - if ((--custs_[dish]) == 0) + int decrement(const Dish& dish) { + int table_diff = 0; + int nc = --custs_[dish]; + if (nc == 0) { custs_.erase(dish); + table_diff = -1; + } else if (nc < 0) { + std::cerr << "Dish counts dropped below zero for: " << dish << std::endl; + abort(); + } --num_customers_; + return table_diff; } double prob(const Dish& dish, const double& p0) const { @@ -66,6 +77,11 @@ class CCRP_NoTable { return (at_table + p0 * concentration_) / (num_customers_ + concentration_); } + double logprob(const Dish& dish, const double& logp0) const { + const unsigned at_table = num_customers(dish); + return log(at_table + exp(logp0 + log(concentration_))) - log(num_customers_ + concentration_); + } + double log_crp_prob() const { return log_crp_prob(concentration_); } diff --git a/training/mpi_batch_optimize.cc b/training/mpi_batch_optimize.cc index 0ba8c530..046e921c 100644 --- a/training/mpi_batch_optimize.cc +++ b/training/mpi_batch_optimize.cc @@ -92,7 +92,7 @@ struct TrainingObserver : public DecoderObserver { void SetLocalGradientAndObjective(vector* g, double* o) const { *o = acc_obj; for (SparseVector::const_iterator it = acc_grad.begin(); it != acc_grad.end(); ++it) - (*g)[it->first] = it->second; + (*g)[it->first] = it->second.as_float(); } virtual void NotifyDecodingStart(const SentenceMetadata& smeta) { diff --git a/training/mpi_compute_cllh.cc b/training/mpi_compute_cllh.cc index b496d196..d5caa745 100644 --- a/training/mpi_compute_cllh.cc +++ b/training/mpi_compute_cllh.cc @@ -1,6 +1,4 @@ -#include #include -#include #include #include #include @@ -12,6 +10,7 @@ #include #include +#include "sentence_metadata.h" #include "verbose.h" #include "hg.h" #include "prob.h" @@ -52,7 +51,8 @@ bool InitCommandLine(int argc, char** argv, po::variables_map* conf) { return true; } -void ReadTrainingCorpus(const string& fname, int rank, int size, vector* c, vector* ids) { +void ReadInstances(const string& fname, int rank, int size, vector* c) { + assert(fname != "-"); ReadFile rf(fname); istream& in = *rf.stream(); string line; @@ -60,20 +60,16 @@ void ReadTrainingCorpus(const string& fname, int rank, int size, vector* while(in) { getline(in, line); if (!in) break; - if (lc % size == rank) { - c->push_back(line); - ids->push_back(lc); - } + if (lc % size == rank) c->push_back(line); ++lc; } } static const double kMINUS_EPSILON = -1e-6; -struct TrainingObserver : public DecoderObserver { - void Reset() { - acc_obj = 0; - } +struct ConditionalLikelihoodObserver : public DecoderObserver { + + ConditionalLikelihoodObserver() : trg_words(), acc_obj(), cur_obj() {} virtual void NotifyDecodingStart(const SentenceMetadata&) { cur_obj = 0; @@ -120,8 +116,10 @@ struct TrainingObserver : public DecoderObserver { } assert(!isnan(log_ref_z)); acc_obj += (cur_obj - log_ref_z); + trg_words += smeta.GetReference().size(); } + unsigned trg_words; double acc_obj; double cur_obj; int state; @@ -161,35 +159,32 @@ int main(int argc, char** argv) { if (conf.count("weights")) Weights::InitFromFile(conf["weights"].as(), &weights); - // freeze feature set - //const bool freeze_feature_set = conf.count("freeze_feature_set"); - //if (freeze_feature_set) FD::Freeze(); - - vector corpus; vector ids; - ReadTrainingCorpus(conf["training_data"].as(), rank, size, &corpus, &ids); + vector corpus; + ReadInstances(conf["training_data"].as(), rank, size, &corpus); assert(corpus.size() > 0); - assert(corpus.size() == ids.size()); - - TrainingObserver observer; - double objective = 0; - observer.Reset(); if (rank == 0) - cerr << "Each processor is decoding " << corpus.size() << " training examples...\n"; + cerr << "Each processor is decoding ~" << corpus.size() << " training examples...\n"; - for (int i = 0; i < corpus.size(); ++i) { - decoder.SetId(ids[i]); + ConditionalLikelihoodObserver observer; + for (int i = 0; i < corpus.size(); ++i) decoder.Decode(corpus[i], &observer); - } + double objective = 0; + unsigned total_words = 0; #ifdef HAVE_MPI reduce(world, observer.acc_obj, objective, std::plus(), 0); + reduce(world, observer.trg_words, total_words, std::plus(), 0); #else objective = observer.acc_obj; #endif - if (rank == 0) - cout << "OBJECTIVE: " << objective << endl; + if (rank == 0) { + cout << "CONDITIONAL LOG_e LIKELIHOOD: " << objective << endl; + cout << "CONDITIONAL LOG_2 LIKELIHOOD: " << (objective/log(2)) << endl; + cout << " CONDITIONAL ENTROPY: " << (objective/log(2) / total_words) << endl; + cout << " PERPLEXITY: " << pow(2, (objective/log(2) / total_words)) << endl; + } return 0; } diff --git a/training/mpi_online_optimize.cc b/training/mpi_online_optimize.cc index 2ef4a2e7..f87b7274 100644 --- a/training/mpi_online_optimize.cc +++ b/training/mpi_online_optimize.cc @@ -94,7 +94,7 @@ struct TrainingObserver : public DecoderObserver { void SetLocalGradientAndObjective(vector* g, double* o) const { *o = acc_obj; for (SparseVector::const_iterator it = acc_grad.begin(); it != acc_grad.end(); ++it) - (*g)[it->first] = it->second; + (*g)[it->first] = it->second.as_float(); } virtual void NotifyDecodingStart(const SentenceMetadata& smeta) { @@ -158,7 +158,7 @@ struct TrainingObserver : public DecoderObserver { void GetGradient(SparseVector* g) const { g->clear(); for (SparseVector::const_iterator it = acc_grad.begin(); it != acc_grad.end(); ++it) - g->set_value(it->first, it->second); + g->set_value(it->first, it->second.as_float()); } int total_complete; diff --git a/utils/logval.h b/utils/logval.h index 6fdc2c42..8a59d0b1 100644 --- a/utils/logval.h +++ b/utils/logval.h @@ -25,12 +25,13 @@ class LogVal { typedef LogVal Self; LogVal() : s_(), v_(LOGVAL_LOG0) {} - explicit LogVal(double x) : s_(std::signbit(x)), v_(s_ ? std::log(-x) : std::log(x)) {} + LogVal(double x) : s_(std::signbit(x)), v_(s_ ? std::log(-x) : std::log(x)) {} + const Self& operator=(double x) { s_ = std::signbit(x); v_ = s_ ? std::log(-x) : std::log(x); return *this; } LogVal(init_minus_1) : s_(true),v_(0) { } LogVal(init_1) : s_(),v_(0) { } LogVal(init_0) : s_(),v_(LOGVAL_LOG0) { } - LogVal(int x) : s_(x<0), v_(s_ ? std::log(-x) : std::log(x)) {} - LogVal(unsigned x) : s_(0), v_(std::log(x)) { } + explicit LogVal(int x) : s_(x<0), v_(s_ ? std::log(-x) : std::log(x)) {} + explicit LogVal(unsigned x) : s_(0), v_(std::log(x)) { } LogVal(double lnx,bool sign) : s_(sign),v_(lnx) {} LogVal(double lnx,init_lnx) : s_(),v_(lnx) {} static Self exp(T lnx) { return Self(lnx,false); } @@ -141,9 +142,6 @@ class LogVal { return pow(1/root); } - operator T() const { - if (s_) return -std::exp(v_); else return std::exp(v_); - } T as_float() const { if (s_) return -std::exp(v_); else return std::exp(v_); } -- cgit v1.2.3