From 0acc92a0eecf04a2c429f6f7685bfcaa68c7ec3a Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 11 Oct 2011 12:06:32 +0100 Subject: check in some experimental particle filtering code, some gitignore fixes --- gi/pf/pfnaive.cc | 385 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 385 insertions(+) create mode 100644 gi/pf/pfnaive.cc (limited to 'gi/pf/pfnaive.cc') diff --git a/gi/pf/pfnaive.cc b/gi/pf/pfnaive.cc new file mode 100644 index 00000000..43c604c3 --- /dev/null +++ b/gi/pf/pfnaive.cc @@ -0,0 +1,385 @@ +#include +#include +#include + +#include +#include +#include + +#include "base_measures.h" +#include "reachability.h" +#include "viterbi.h" +#include "hg.h" +#include "trule.h" +#include "tdict.h" +#include "filelib.h" +#include "dict.h" +#include "sampler.h" +#include "ccrp_nt.h" +#include "ccrp_onetable.h" + +using namespace std; +using namespace tr1; +namespace po = boost::program_options; + +shared_ptr prng; + +size_t hash_value(const TRule& r) { + size_t h = boost::hash_value(r.e_); + boost::hash_combine(h, -r.lhs_); + boost::hash_combine(h, boost::hash_value(r.f_)); + return h; +} + +bool operator==(const TRule& a, const TRule& b) { + return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_); +} + +void InitCommandLine(int argc, char** argv, po::variables_map* conf) { + po::options_description opts("Configuration options"); + opts.add_options() + ("samples,s",po::value()->default_value(1000),"Number of samples") + ("particles,p",po::value()->default_value(30),"Number of particles") + ("filter_frequency,f",po::value()->default_value(5),"Number of time steps between filterings") + ("input,i",po::value(),"Read parallel data from") + ("max_src_phrase",po::value()->default_value(5),"Maximum length of source language phrases") + ("max_trg_phrase",po::value()->default_value(5),"Maximum length of target language phrases") + ("model1,m",po::value(),"Model 1 parameters (used in base distribution)") + ("inverse_model1,M",po::value(),"Inverse Model 1 parameters (used in backward estimate)") + ("model1_interpolation_weight",po::value()->default_value(0.95),"Mixing proportion of model 1 with uniform target distribution") + ("random_seed,S",po::value(), "Random seed"); + po::options_description clo("Command line options"); + clo.add_options() + ("config", po::value(), "Configuration file") + ("help,h", "Print this help message and exit"); + po::options_description dconfig_options, dcmdline_options; + dconfig_options.add(opts); + dcmdline_options.add(opts).add(clo); + + po::store(parse_command_line(argc, argv, dcmdline_options), *conf); + if (conf->count("config")) { + ifstream config((*conf)["config"].as().c_str()); + po::store(po::parse_config_file(config, dconfig_options), *conf); + } + po::notify(*conf); + + if (conf->count("help") || (conf->count("input") == 0)) { + cerr << dcmdline_options << endl; + exit(1); + } +} + +void ReadParallelCorpus(const string& filename, + vector >* f, + vector >* e, + set* vocab_f, + set* vocab_e) { + f->clear(); + e->clear(); + vocab_f->clear(); + vocab_e->clear(); + istream* in; + if (filename == "-") + in = &cin; + else + in = new ifstream(filename.c_str()); + assert(*in); + string line; + const WordID kDIV = TD::Convert("|||"); + vector tmp; + while(*in) { + getline(*in, line); + if (line.empty() && !*in) break; + e->push_back(vector()); + f->push_back(vector()); + vector& le = e->back(); + vector& lf = f->back(); + tmp.clear(); + TD::ConvertSentence(line, &tmp); + bool isf = true; + for (unsigned i = 0; i < tmp.size(); ++i) { + const int cur = tmp[i]; + if (isf) { + if (kDIV == cur) { isf = false; } else { + lf.push_back(cur); + vocab_f->insert(cur); + } + } else { + assert(cur != kDIV); + le.push_back(cur); + vocab_e->insert(cur); + } + } + assert(isf == false); + } + if (in != &cin) delete in; +} + +struct MyJointModel { + MyJointModel(PhraseJointBase& rcp0) : + rp0(rcp0), base(prob_t::One()), rules(1,1) {} + + void DecrementRule(const TRule& rule) { + if (rules.decrement(rule)) + base /= rp0(rule); + } + + void IncrementRule(const TRule& rule) { + if (rules.increment(rule)) + base *= rp0(rule); + } + + void IncrementRules(const vector& rules) { + for (int i = 0; i < rules.size(); ++i) + IncrementRule(*rules[i]); + } + + void DecrementRules(const vector& rules) { + for (int i = 0; i < rules.size(); ++i) + DecrementRule(*rules[i]); + } + + prob_t RuleProbability(const TRule& rule) const { + prob_t p; p.logeq(rules.logprob(rule, log(rp0(rule)))); + return p; + } + + prob_t Likelihood() const { + prob_t p = base; + prob_t q; q.logeq(rules.log_crp_prob()); + p *= q; + for (unsigned l = 1; l < src_jumps.size(); ++l) { + if (src_jumps[l].num_customers() > 0) { + prob_t q; + q.logeq(src_jumps[l].log_crp_prob()); + p *= q; + } + } + return p; + } + + const PhraseJointBase& rp0; + prob_t base; + CCRP_NoTable rules; + vector > src_jumps; +}; + +struct BackwardEstimateSym { + BackwardEstimateSym(const Model1& m1, + const Model1& invm1, const vector& src, const vector& trg) : + model1_(m1), invmodel1_(invm1), src_(src), trg_(trg) { + } + const prob_t& operator()(unsigned src_cov, unsigned trg_cov) const { + assert(src_cov <= src_.size()); + assert(trg_cov <= trg_.size()); + prob_t& e = cache_[src_cov][trg_cov]; + if (e.is_0()) { + if (trg_cov == trg_.size()) { e = prob_t::One(); return e; } + vector r(src_.size() + 1); r.clear(); + for (int i = src_cov; i < src_.size(); ++i) + r.push_back(src_[i]); + r.push_back(0); // NULL word + const prob_t uniform_alignment(1.0 / r.size()); + e.logeq(log_poisson(trg_.size() - trg_cov, r.size() - 1)); // p(trg len remaining | src len remaining) + for (unsigned j = trg_cov; j < trg_.size(); ++j) { + prob_t p; + for (unsigned i = 0; i < r.size(); ++i) + p += model1_(r[i], trg_[j]); + if (p.is_0()) { + cerr << "ERROR: p(" << TD::Convert(trg_[j]) << " | " << TD::GetString(r) << ") = 0!\n"; + abort(); + } + p *= uniform_alignment; + e *= p; + } + r.pop_back(); + const prob_t inv_uniform(1.0 / (trg_.size() - trg_cov + 1.0)); + prob_t inv; + inv.logeq(log_poisson(r.size(), trg_.size() - trg_cov)); + for (unsigned i = 0; i < r.size(); ++i) { + prob_t p; + for (unsigned j = trg_cov - 1; j < trg_.size(); ++j) + p += invmodel1_(j < trg_cov ? 0 : trg_[j], r[i]); + if (p.is_0()) { + cerr << "ERROR: p_inv(" << TD::Convert(r[i]) << " | " << TD::GetString(trg_) << ") = 0!\n"; + abort(); + } + p *= inv_uniform; + inv *= p; + } + prob_t x = pow(e * inv, 0.5); + e = x; + //cerr << "Forward: " << log(e) << "\tBackward: " << log(inv) << "\t prop: " << log(x) << endl; + } + return e; + } + const Model1& model1_; + const Model1& invmodel1_; + const vector& src_; + const vector& trg_; + mutable unordered_map > cache_; +}; + +struct Particle { + Particle() : weight(prob_t::One()), src_cov(), trg_cov() {} + prob_t weight; + prob_t gamma_last; + vector rules; + int src_cov; + int trg_cov; +}; + +ostream& operator<<(ostream& o, const vector& v) { + for (int i = 0; i < v.size(); ++i) + o << (v[i] ? '1' : '0'); + return o; +} +ostream& operator<<(ostream& o, const Particle& p) { + o << "[src_cov=" << p.src_cov << " trg_cov=" << p.trg_cov << " num_rules=" << p.rules.size() << " w=" << log(p.weight) << ']'; + return o; +} + +void FilterCrapParticlesAndReweight(vector* pps) { + vector& ps = *pps; + SampleSet ss; + for (int i = 0; i < ps.size(); ++i) + ss.add(ps[i].weight); + vector nps; nps.reserve(ps.size()); + const prob_t uniform_weight(1.0 / ps.size()); + for (int i = 0; i < ps.size(); ++i) { + nps.push_back(ps[prng->SelectSample(ss)]); + nps[i].weight = uniform_weight; + } + nps.swap(ps); +} + +int main(int argc, char** argv) { + po::variables_map conf; + InitCommandLine(argc, argv, &conf); + const unsigned kMAX_TRG_PHRASE = conf["max_trg_phrase"].as(); + const unsigned kMAX_SRC_PHRASE = conf["max_src_phrase"].as(); + const unsigned particles = conf["particles"].as(); + const unsigned samples = conf["samples"].as(); + const unsigned rejuv_freq = conf["filter_frequency"].as(); + + if (!conf.count("model1")) { + cerr << argv[0] << "Please use --model1 to specify model 1 parameters\n"; + return 1; + } + if (conf.count("random_seed")) + prng.reset(new MT19937(conf["random_seed"].as())); + else + prng.reset(new MT19937); + MT19937& rng = *prng; + + vector > corpuse, corpusf; + set vocabe, vocabf; + cerr << "Reading corpus...\n"; + ReadParallelCorpus(conf["input"].as(), &corpusf, &corpuse, &vocabf, &vocabe); + cerr << "F-corpus size: " << corpusf.size() << " sentences\t (" << vocabf.size() << " word types)\n"; + cerr << "E-corpus size: " << corpuse.size() << " sentences\t (" << vocabe.size() << " word types)\n"; + assert(corpusf.size() == corpuse.size()); + + const int kLHS = -TD::Convert("X"); + Model1 m1(conf["model1"].as()); + Model1 invm1(conf["inverse_model1"].as()); + +#if 0 + PhraseConditionalBase lp0(m1, conf["model1_interpolation_weight"].as(), vocabe.size()); + MyConditionalModel m(lp0); +#else + PhraseJointBase lp0(m1, conf["model1_interpolation_weight"].as(), vocabe.size(), vocabf.size()); + MyJointModel m(lp0); +#endif + + cerr << "Initializing reachability limits...\n"; + vector ps(corpusf.size()); + vector reaches; reaches.reserve(corpusf.size()); + for (int ci = 0; ci < corpusf.size(); ++ci) + reaches.push_back(Reachability(corpusf[ci].size(), + corpuse[ci].size(), + kMAX_SRC_PHRASE, + kMAX_TRG_PHRASE)); + cerr << "Sampling...\n"; + vector tmp_p(10000); // work space + SampleSet pfss; + for (int SS=0; SS < samples; ++SS) { + for (int ci = 0; ci < corpusf.size(); ++ci) { + vector& src = corpusf[ci]; + vector& trg = corpuse[ci]; + m.DecrementRules(ps[ci].rules); + + BackwardEstimateSym be(m1, invm1, src, trg); + const Reachability& r = reaches[ci]; + vector lps(particles); + + bool all_complete = false; + while(!all_complete) { + SampleSet ss; + + // all particles have now been extended a bit, we will reweight them now + if (lps[0].trg_cov > 0) + FilterCrapParticlesAndReweight(&lps); + + // loop over all particles and extend them + bool done_nothing = true; + for (int pi = 0; pi < particles; ++pi) { + Particle& p = lps[pi]; + int tic = 0; + while(p.trg_cov < trg.size() && tic < rejuv_freq) { + ++tic; + done_nothing = false; + ss.clear(); + TRule x; x.lhs_ = kLHS; + prob_t z; + + for (int trg_len = 1; trg_len <= kMAX_TRG_PHRASE; ++trg_len) { + x.e_.push_back(trg[trg_len - 1 + p.trg_cov]); + for (int src_len = 1; src_len <= kMAX_SRC_PHRASE; ++src_len) { + if (!r.edges[p.src_cov][p.trg_cov][src_len][trg_len]) continue; + + int i = p.src_cov; + assert(ss.size() < tmp_p.size()); // if fails increase tmp_p size + Particle& np = tmp_p[ss.size()]; + np = p; + x.f_.clear(); + for (int j = 0; j < src_len; ++j) + x.f_.push_back(src[i + j]); + np.src_cov += x.f_.size(); + np.trg_cov += x.e_.size(); + prob_t rp = m.RuleProbability(x); + np.gamma_last = rp; + const prob_t u = pow(np.gamma_last * pow(be(np.src_cov, np.trg_cov), 1.2), 0.1); + //cerr << "**rule=" << x << endl; + //cerr << " u=" << log(u) << " rule=" << rp << endl; + ss.add(u); + np.rules.push_back(TRulePtr(new TRule(x))); + z += u; + } + } + //cerr << "number of edges to consider: " << ss.size() << endl; + const int sampled = rng.SelectSample(ss); + prob_t q_n = ss[sampled] / z; + p = tmp_p[sampled]; + //m.IncrementRule(*p.rules.back()); + p.weight *= p.gamma_last / q_n; + //cerr << "[w=" << log(p.weight) << "]\tsampled rule: " << p.rules.back()->AsString() << endl; + //cerr << p << endl; + } + } // loop over particles (pi = 0 .. particles) + if (done_nothing) all_complete = true; + } + pfss.clear(); + for (int i = 0; i < lps.size(); ++i) + pfss.add(lps[i].weight); + const int sampled = rng.SelectSample(pfss); + ps[ci] = lps[sampled]; + m.IncrementRules(lps[sampled].rules); + for (int i = 0; i < lps[sampled].rules.size(); ++i) { cerr << "S:\t" << lps[sampled].rules[i]->AsString() << "\n"; } + cerr << "tmp-LLH: " << log(m.Likelihood()) << endl; + } + cerr << "LLH: " << log(m.Likelihood()) << endl; + } + return 0; +} + -- cgit v1.2.3 From 0af7d663194beddcde420349bbd91430e0b2e423 Mon Sep 17 00:00:00 2001 From: Guest_account Guest_account prguest11 Date: Tue, 11 Oct 2011 16:16:53 +0100 Subject: remove implicit conversion-to-double operator from LogVal that caused overflow errors, clean up some pf code --- decoder/aligner.cc | 2 +- decoder/cfg.cc | 2 +- decoder/cfg_format.h | 2 +- decoder/decoder.cc | 10 ++++---- decoder/hg.cc | 4 ++-- decoder/rule_lexer.l | 2 ++ decoder/trule.h | 15 +++++++++++- gi/pf/brat.cc | 11 --------- gi/pf/cbgi.cc | 10 -------- gi/pf/dpnaive.cc | 12 ---------- gi/pf/itg.cc | 11 --------- gi/pf/pfbrat.cc | 11 --------- gi/pf/pfdist.cc | 11 --------- gi/pf/pfnaive.cc | 11 --------- mteval/mbr_kbest.cc | 4 ++-- phrasinator/ccrp_nt.h | 24 +++++++++++++++---- training/mpi_batch_optimize.cc | 2 +- training/mpi_compute_cllh.cc | 51 +++++++++++++++++++---------------------- training/mpi_online_optimize.cc | 4 ++-- utils/logval.h | 10 ++++---- 20 files changed, 78 insertions(+), 131 deletions(-) (limited to 'gi/pf/pfnaive.cc') diff --git a/decoder/aligner.cc b/decoder/aligner.cc index 292ee123..53e059fb 100644 --- a/decoder/aligner.cc +++ b/decoder/aligner.cc @@ -165,7 +165,7 @@ inline void WriteProbGrid(const Array2D& m, ostream* pos) { if (m(i,j) == prob_t::Zero()) { os << "\t---X---"; } else { - snprintf(b, 1024, "%0.5f", static_cast(m(i,j))); + snprintf(b, 1024, "%0.5f", m(i,j).as_float()); os << '\t' << b; } } diff --git a/decoder/cfg.cc b/decoder/cfg.cc index 651978d2..cd7e66e9 100755 --- a/decoder/cfg.cc +++ b/decoder/cfg.cc @@ -639,7 +639,7 @@ void CFG::Print(std::ostream &o,CFGFormat const& f) const { o << '['<& src, SparseVector* trg) { for (SparseVector::const_iterator it = src.begin(); it != src.end(); ++it) - trg->set_value(it->first, it->second); + trg->set_value(it->first, it->second.as_float()); } }; @@ -788,10 +788,10 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) { const bool show_tree_structure=conf.count("show_tree_structure"); if (!SILENT) forest_stats(forest," Init. forest",show_tree_structure,oracle.show_derivation); if (conf.count("show_expected_length")) { - const PRPair res = - Inside, - PRWeightFunction >(forest); - cerr << " Expected length (words): " << res.r / res.p << "\t" << res << endl; + const PRPair res = + Inside, + PRWeightFunction >(forest); + cerr << " Expected length (words): " << (res.r / res.p).as_float() << "\t" << res << endl; } if (conf.count("show_partition")) { diff --git a/decoder/hg.cc b/decoder/hg.cc index 3ad17f1a..180986d7 100644 --- a/decoder/hg.cc +++ b/decoder/hg.cc @@ -157,14 +157,14 @@ prob_t Hypergraph::ComputeEdgePosteriors(double scale, vector* posts) co const ScaledEdgeProb weight(scale); const ScaledTransitionEventWeightFunction w2(scale); SparseVector pv; - const double inside = InsideOutside, ScaledTransitionEventWeightFunction>(*this, &pv, weight, w2); posts->resize(edges_.size()); for (int i = 0; i < edges_.size(); ++i) (*posts)[i] = prob_t(pv.value(i)); - return prob_t(inside); + return inside; } prob_t Hypergraph::ComputeBestPathThroughEdges(vector* post) const { diff --git a/decoder/rule_lexer.l b/decoder/rule_lexer.l index 9331d8ed..083a5bb1 100644 --- a/decoder/rule_lexer.l +++ b/decoder/rule_lexer.l @@ -220,6 +220,8 @@ NT [^\t \[\],]+ std::cerr << "Line " << lex_line << ": LHS and RHS arity mismatch!\n"; abort(); } + // const bool ignore_grammar_features = false; + // if (ignore_grammar_features) scfglex_num_feats = 0; TRulePtr rp(new TRule(scfglex_lhs, scfglex_src_rhs, scfglex_src_rhs_size, scfglex_trg_rhs, scfglex_trg_rhs_size, scfglex_feat_ids, scfglex_feat_vals, scfglex_num_feats, scfglex_src_arity, scfglex_als, scfglex_num_als)); check_and_update_ctf_stack(rp); TRulePtr coarse_rp = ((ctf_level == 0) ? TRulePtr() : ctf_rule_stack.top()); diff --git a/decoder/trule.h b/decoder/trule.h index 4df4ec90..8eb2a059 100644 --- a/decoder/trule.h +++ b/decoder/trule.h @@ -5,7 +5,9 @@ #include #include #include -#include + +#include "boost/shared_ptr.hpp" +#include "boost/functional/hash.hpp" #include "sparse_vector.h" #include "wordid.h" @@ -162,4 +164,15 @@ class TRule { bool SanityCheck() const; }; +inline size_t hash_value(const TRule& r) { + size_t h = boost::hash_value(r.e_); + boost::hash_combine(h, -r.lhs_); + boost::hash_combine(h, boost::hash_value(r.f_)); + return h; +} + +inline bool operator==(const TRule& a, const TRule& b) { + return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_); +} + #endif diff --git a/gi/pf/brat.cc b/gi/pf/brat.cc index 4c6ba3ef..7b60ef23 100644 --- a/gi/pf/brat.cc +++ b/gi/pf/brat.cc @@ -25,17 +25,6 @@ static unsigned kMAX_SRC_PHRASE; static unsigned kMAX_TRG_PHRASE; struct FSTState; -size_t hash_value(const TRule& r) { - size_t h = 2 - r.lhs_; - boost::hash_combine(h, boost::hash_value(r.e_)); - boost::hash_combine(h, boost::hash_value(r.f_)); - return h; -} - -bool operator==(const TRule& a, const TRule& b) { - return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_); -} - double log_poisson(unsigned x, const double& lambda) { assert(lambda > 0.0); return log(lambda) * x - lgamma(x + 1) - lambda; diff --git a/gi/pf/cbgi.cc b/gi/pf/cbgi.cc index 20204e8a..97f1ba34 100644 --- a/gi/pf/cbgi.cc +++ b/gi/pf/cbgi.cc @@ -27,16 +27,6 @@ double log_decay(unsigned x, const double& b) { return log(b - 1) - x * log(b); } -size_t hash_value(const TRule& r) { - // TODO fix hash function - size_t h = boost::hash_value(r.e_) * boost::hash_value(r.f_) * r.lhs_; - return h; -} - -bool operator==(const TRule& a, const TRule& b) { - return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_); -} - struct SimpleBase { SimpleBase(unsigned esize, unsigned fsize, unsigned ntsize = 144) : uniform_e(-log(esize)), diff --git a/gi/pf/dpnaive.cc b/gi/pf/dpnaive.cc index 582d1be7..608f73d5 100644 --- a/gi/pf/dpnaive.cc +++ b/gi/pf/dpnaive.cc @@ -20,18 +20,6 @@ namespace po = boost::program_options; static unsigned kMAX_SRC_PHRASE; static unsigned kMAX_TRG_PHRASE; -struct FSTState; - -size_t hash_value(const TRule& r) { - size_t h = 2 - r.lhs_; - boost::hash_combine(h, boost::hash_value(r.e_)); - boost::hash_combine(h, boost::hash_value(r.f_)); - return h; -} - -bool operator==(const TRule& a, const TRule& b) { - return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_); -} void InitCommandLine(int argc, char** argv, po::variables_map* conf) { po::options_description opts("Configuration options"); diff --git a/gi/pf/itg.cc b/gi/pf/itg.cc index 2c2a86f9..ac3c16a3 100644 --- a/gi/pf/itg.cc +++ b/gi/pf/itg.cc @@ -27,17 +27,6 @@ ostream& operator<<(ostream& os, const vector& p) { return os << ']'; } -size_t hash_value(const TRule& r) { - size_t h = boost::hash_value(r.e_); - boost::hash_combine(h, -r.lhs_); - boost::hash_combine(h, boost::hash_value(r.f_)); - return h; -} - -bool operator==(const TRule& a, const TRule& b) { - return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_); -} - double log_poisson(unsigned x, const double& lambda) { assert(lambda > 0.0); return log(lambda) * x - lgamma(x + 1) - lambda; diff --git a/gi/pf/pfbrat.cc b/gi/pf/pfbrat.cc index 4c6ba3ef..7b60ef23 100644 --- a/gi/pf/pfbrat.cc +++ b/gi/pf/pfbrat.cc @@ -25,17 +25,6 @@ static unsigned kMAX_SRC_PHRASE; static unsigned kMAX_TRG_PHRASE; struct FSTState; -size_t hash_value(const TRule& r) { - size_t h = 2 - r.lhs_; - boost::hash_combine(h, boost::hash_value(r.e_)); - boost::hash_combine(h, boost::hash_value(r.f_)); - return h; -} - -bool operator==(const TRule& a, const TRule& b) { - return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_); -} - double log_poisson(unsigned x, const double& lambda) { assert(lambda > 0.0); return log(lambda) * x - lgamma(x + 1) - lambda; diff --git a/gi/pf/pfdist.cc b/gi/pf/pfdist.cc index 18dfd03b..81abd61b 100644 --- a/gi/pf/pfdist.cc +++ b/gi/pf/pfdist.cc @@ -24,17 +24,6 @@ namespace po = boost::program_options; shared_ptr prng; -size_t hash_value(const TRule& r) { - size_t h = boost::hash_value(r.e_); - boost::hash_combine(h, -r.lhs_); - boost::hash_combine(h, boost::hash_value(r.f_)); - return h; -} - -bool operator==(const TRule& a, const TRule& b) { - return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_); -} - void InitCommandLine(int argc, char** argv, po::variables_map* conf) { po::options_description opts("Configuration options"); opts.add_options() diff --git a/gi/pf/pfnaive.cc b/gi/pf/pfnaive.cc index 43c604c3..c30e7c4f 100644 --- a/gi/pf/pfnaive.cc +++ b/gi/pf/pfnaive.cc @@ -24,17 +24,6 @@ namespace po = boost::program_options; shared_ptr prng; -size_t hash_value(const TRule& r) { - size_t h = boost::hash_value(r.e_); - boost::hash_combine(h, -r.lhs_); - boost::hash_combine(h, boost::hash_value(r.f_)); - return h; -} - -bool operator==(const TRule& a, const TRule& b) { - return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_); -} - void InitCommandLine(int argc, char** argv, po::variables_map* conf) { po::options_description opts("Configuration options"); opts.add_options() diff --git a/mteval/mbr_kbest.cc b/mteval/mbr_kbest.cc index 2867b36b..64a6a8bf 100644 --- a/mteval/mbr_kbest.cc +++ b/mteval/mbr_kbest.cc @@ -32,7 +32,7 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) { } struct LossComparer { - bool operator()(const pair, double>& a, const pair, double>& b) const { + bool operator()(const pair, prob_t>& a, const pair, prob_t>& b) const { return a.second < b.second; } }; @@ -108,7 +108,7 @@ int main(int argc, char** argv) { ScoreP s = scorer->ScoreCandidate(list[j].first); double loss = 1.0 - s->ComputeScore(); if (type == TER || type == AER) loss = 1.0 - loss; - double weighted_loss = loss * (joints[j] / marginal); + double weighted_loss = loss * (joints[j] / marginal).as_float(); wl_acc += weighted_loss; if ((!output_list) && wl_acc > mbr_loss) break; } diff --git a/phrasinator/ccrp_nt.h b/phrasinator/ccrp_nt.h index 163b643a..811bce73 100644 --- a/phrasinator/ccrp_nt.h +++ b/phrasinator/ccrp_nt.h @@ -50,15 +50,26 @@ class CCRP_NoTable { return it->second; } - void increment(const Dish& dish) { - ++custs_[dish]; + int increment(const Dish& dish) { + int table_diff = 0; + if (++custs_[dish] == 1) + table_diff = 1; ++num_customers_; + return table_diff; } - void decrement(const Dish& dish) { - if ((--custs_[dish]) == 0) + int decrement(const Dish& dish) { + int table_diff = 0; + int nc = --custs_[dish]; + if (nc == 0) { custs_.erase(dish); + table_diff = -1; + } else if (nc < 0) { + std::cerr << "Dish counts dropped below zero for: " << dish << std::endl; + abort(); + } --num_customers_; + return table_diff; } double prob(const Dish& dish, const double& p0) const { @@ -66,6 +77,11 @@ class CCRP_NoTable { return (at_table + p0 * concentration_) / (num_customers_ + concentration_); } + double logprob(const Dish& dish, const double& logp0) const { + const unsigned at_table = num_customers(dish); + return log(at_table + exp(logp0 + log(concentration_))) - log(num_customers_ + concentration_); + } + double log_crp_prob() const { return log_crp_prob(concentration_); } diff --git a/training/mpi_batch_optimize.cc b/training/mpi_batch_optimize.cc index 0ba8c530..046e921c 100644 --- a/training/mpi_batch_optimize.cc +++ b/training/mpi_batch_optimize.cc @@ -92,7 +92,7 @@ struct TrainingObserver : public DecoderObserver { void SetLocalGradientAndObjective(vector* g, double* o) const { *o = acc_obj; for (SparseVector::const_iterator it = acc_grad.begin(); it != acc_grad.end(); ++it) - (*g)[it->first] = it->second; + (*g)[it->first] = it->second.as_float(); } virtual void NotifyDecodingStart(const SentenceMetadata& smeta) { diff --git a/training/mpi_compute_cllh.cc b/training/mpi_compute_cllh.cc index b496d196..d5caa745 100644 --- a/training/mpi_compute_cllh.cc +++ b/training/mpi_compute_cllh.cc @@ -1,6 +1,4 @@ -#include #include -#include #include #include #include @@ -12,6 +10,7 @@ #include #include +#include "sentence_metadata.h" #include "verbose.h" #include "hg.h" #include "prob.h" @@ -52,7 +51,8 @@ bool InitCommandLine(int argc, char** argv, po::variables_map* conf) { return true; } -void ReadTrainingCorpus(const string& fname, int rank, int size, vector* c, vector* ids) { +void ReadInstances(const string& fname, int rank, int size, vector* c) { + assert(fname != "-"); ReadFile rf(fname); istream& in = *rf.stream(); string line; @@ -60,20 +60,16 @@ void ReadTrainingCorpus(const string& fname, int rank, int size, vector* while(in) { getline(in, line); if (!in) break; - if (lc % size == rank) { - c->push_back(line); - ids->push_back(lc); - } + if (lc % size == rank) c->push_back(line); ++lc; } } static const double kMINUS_EPSILON = -1e-6; -struct TrainingObserver : public DecoderObserver { - void Reset() { - acc_obj = 0; - } +struct ConditionalLikelihoodObserver : public DecoderObserver { + + ConditionalLikelihoodObserver() : trg_words(), acc_obj(), cur_obj() {} virtual void NotifyDecodingStart(const SentenceMetadata&) { cur_obj = 0; @@ -120,8 +116,10 @@ struct TrainingObserver : public DecoderObserver { } assert(!isnan(log_ref_z)); acc_obj += (cur_obj - log_ref_z); + trg_words += smeta.GetReference().size(); } + unsigned trg_words; double acc_obj; double cur_obj; int state; @@ -161,35 +159,32 @@ int main(int argc, char** argv) { if (conf.count("weights")) Weights::InitFromFile(conf["weights"].as(), &weights); - // freeze feature set - //const bool freeze_feature_set = conf.count("freeze_feature_set"); - //if (freeze_feature_set) FD::Freeze(); - - vector corpus; vector ids; - ReadTrainingCorpus(conf["training_data"].as(), rank, size, &corpus, &ids); + vector corpus; + ReadInstances(conf["training_data"].as(), rank, size, &corpus); assert(corpus.size() > 0); - assert(corpus.size() == ids.size()); - - TrainingObserver observer; - double objective = 0; - observer.Reset(); if (rank == 0) - cerr << "Each processor is decoding " << corpus.size() << " training examples...\n"; + cerr << "Each processor is decoding ~" << corpus.size() << " training examples...\n"; - for (int i = 0; i < corpus.size(); ++i) { - decoder.SetId(ids[i]); + ConditionalLikelihoodObserver observer; + for (int i = 0; i < corpus.size(); ++i) decoder.Decode(corpus[i], &observer); - } + double objective = 0; + unsigned total_words = 0; #ifdef HAVE_MPI reduce(world, observer.acc_obj, objective, std::plus(), 0); + reduce(world, observer.trg_words, total_words, std::plus(), 0); #else objective = observer.acc_obj; #endif - if (rank == 0) - cout << "OBJECTIVE: " << objective << endl; + if (rank == 0) { + cout << "CONDITIONAL LOG_e LIKELIHOOD: " << objective << endl; + cout << "CONDITIONAL LOG_2 LIKELIHOOD: " << (objective/log(2)) << endl; + cout << " CONDITIONAL ENTROPY: " << (objective/log(2) / total_words) << endl; + cout << " PERPLEXITY: " << pow(2, (objective/log(2) / total_words)) << endl; + } return 0; } diff --git a/training/mpi_online_optimize.cc b/training/mpi_online_optimize.cc index 2ef4a2e7..f87b7274 100644 --- a/training/mpi_online_optimize.cc +++ b/training/mpi_online_optimize.cc @@ -94,7 +94,7 @@ struct TrainingObserver : public DecoderObserver { void SetLocalGradientAndObjective(vector* g, double* o) const { *o = acc_obj; for (SparseVector::const_iterator it = acc_grad.begin(); it != acc_grad.end(); ++it) - (*g)[it->first] = it->second; + (*g)[it->first] = it->second.as_float(); } virtual void NotifyDecodingStart(const SentenceMetadata& smeta) { @@ -158,7 +158,7 @@ struct TrainingObserver : public DecoderObserver { void GetGradient(SparseVector* g) const { g->clear(); for (SparseVector::const_iterator it = acc_grad.begin(); it != acc_grad.end(); ++it) - g->set_value(it->first, it->second); + g->set_value(it->first, it->second.as_float()); } int total_complete; diff --git a/utils/logval.h b/utils/logval.h index 6fdc2c42..8a59d0b1 100644 --- a/utils/logval.h +++ b/utils/logval.h @@ -25,12 +25,13 @@ class LogVal { typedef LogVal Self; LogVal() : s_(), v_(LOGVAL_LOG0) {} - explicit LogVal(double x) : s_(std::signbit(x)), v_(s_ ? std::log(-x) : std::log(x)) {} + LogVal(double x) : s_(std::signbit(x)), v_(s_ ? std::log(-x) : std::log(x)) {} + const Self& operator=(double x) { s_ = std::signbit(x); v_ = s_ ? std::log(-x) : std::log(x); return *this; } LogVal(init_minus_1) : s_(true),v_(0) { } LogVal(init_1) : s_(),v_(0) { } LogVal(init_0) : s_(),v_(LOGVAL_LOG0) { } - LogVal(int x) : s_(x<0), v_(s_ ? std::log(-x) : std::log(x)) {} - LogVal(unsigned x) : s_(0), v_(std::log(x)) { } + explicit LogVal(int x) : s_(x<0), v_(s_ ? std::log(-x) : std::log(x)) {} + explicit LogVal(unsigned x) : s_(0), v_(std::log(x)) { } LogVal(double lnx,bool sign) : s_(sign),v_(lnx) {} LogVal(double lnx,init_lnx) : s_(),v_(lnx) {} static Self exp(T lnx) { return Self(lnx,false); } @@ -141,9 +142,6 @@ class LogVal { return pow(1/root); } - operator T() const { - if (s_) return -std::exp(v_); else return std::exp(v_); - } T as_float() const { if (s_) return -std::exp(v_); else return std::exp(v_); } -- cgit v1.2.3 From ee84ab027c0be54800cac0c9bff62dd097354f6d Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Wed, 12 Oct 2011 14:57:15 +0100 Subject: model lenght properly, clean up --- gi/pf/Makefile.am | 2 +- gi/pf/corpus.cc | 57 ++++++++++++++++++++++++ gi/pf/corpus.h | 19 ++++++++ gi/pf/dpnaive.cc | 95 +++++++++++----------------------------- gi/pf/monotonic_pseg.h | 88 +++++++++++++++++++++++++++++++++++++ gi/pf/pfnaive.cc | 116 +++++-------------------------------------------- utils/logval_test.cc | 14 +++--- 7 files changed, 209 insertions(+), 182 deletions(-) create mode 100644 gi/pf/corpus.cc create mode 100644 gi/pf/corpus.h create mode 100644 gi/pf/monotonic_pseg.h (limited to 'gi/pf/pfnaive.cc') diff --git a/gi/pf/Makefile.am b/gi/pf/Makefile.am index c9764ad5..42758939 100644 --- a/gi/pf/Makefile.am +++ b/gi/pf/Makefile.am @@ -1,7 +1,7 @@ bin_PROGRAMS = cbgi brat dpnaive pfbrat pfdist itg pfnaive noinst_LIBRARIES = libpf.a -libpf_a_SOURCES = base_measures.cc reachability.cc cfg_wfst_composer.cc +libpf_a_SOURCES = base_measures.cc reachability.cc cfg_wfst_composer.cc corpus.cc itg_SOURCES = itg.cc diff --git a/gi/pf/corpus.cc b/gi/pf/corpus.cc new file mode 100644 index 00000000..a408e7cf --- /dev/null +++ b/gi/pf/corpus.cc @@ -0,0 +1,57 @@ +#include "corpus.h" + +#include +#include +#include + +#include "tdict.h" +#include "filelib.h" + +using namespace std; + +namespace corpus { + +void ReadParallelCorpus(const string& filename, + vector >* f, + vector >* e, + set* vocab_f, + set* vocab_e) { + f->clear(); + e->clear(); + vocab_f->clear(); + vocab_e->clear(); + ReadFile rf(filename); + istream* in = rf.stream(); + assert(*in); + string line; + const WordID kDIV = TD::Convert("|||"); + vector tmp; + while(*in) { + getline(*in, line); + if (line.empty() && !*in) break; + e->push_back(vector()); + f->push_back(vector()); + vector& le = e->back(); + vector& lf = f->back(); + tmp.clear(); + TD::ConvertSentence(line, &tmp); + bool isf = true; + for (unsigned i = 0; i < tmp.size(); ++i) { + const int cur = tmp[i]; + if (isf) { + if (kDIV == cur) { isf = false; } else { + lf.push_back(cur); + vocab_f->insert(cur); + } + } else { + assert(cur != kDIV); + le.push_back(cur); + vocab_e->insert(cur); + } + } + assert(isf == false); + } +} + +} + diff --git a/gi/pf/corpus.h b/gi/pf/corpus.h new file mode 100644 index 00000000..e7febdb7 --- /dev/null +++ b/gi/pf/corpus.h @@ -0,0 +1,19 @@ +#ifndef _CORPUS_H_ +#define _CORPUS_H_ + +#include +#include +#include +#include "wordid.h" + +namespace corpus { + +void ReadParallelCorpus(const std::string& filename, + std::vector >* f, + std::vector >* e, + std::set* vocab_f, + std::set* vocab_e); + +} + +#endif diff --git a/gi/pf/dpnaive.cc b/gi/pf/dpnaive.cc index 608f73d5..c926487b 100644 --- a/gi/pf/dpnaive.cc +++ b/gi/pf/dpnaive.cc @@ -7,12 +7,14 @@ #include #include "base_measures.h" +#include "monotonic_pseg.h" #include "trule.h" #include "tdict.h" #include "filelib.h" #include "dict.h" #include "sampler.h" #include "ccrp_nt.h" +#include "corpus.h" using namespace std; using namespace std::tr1; @@ -52,57 +54,12 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) { } } -void ReadParallelCorpus(const string& filename, - vector >* f, - vector >* e, - set* vocab_e, - set* vocab_f) { - f->clear(); - e->clear(); - vocab_f->clear(); - vocab_e->clear(); - istream* in; - if (filename == "-") - in = &cin; - else - in = new ifstream(filename.c_str()); - assert(*in); - string line; - const WordID kDIV = TD::Convert("|||"); - vector tmp; - while(*in) { - getline(*in, line); - if (line.empty() && !*in) break; - e->push_back(vector()); - f->push_back(vector()); - vector& le = e->back(); - vector& lf = f->back(); - tmp.clear(); - TD::ConvertSentence(line, &tmp); - bool isf = true; - for (unsigned i = 0; i < tmp.size(); ++i) { - const int cur = tmp[i]; - if (isf) { - if (kDIV == cur) { isf = false; } else { - lf.push_back(cur); - vocab_f->insert(cur); - } - } else { - assert(cur != kDIV); - le.push_back(cur); - vocab_e->insert(cur); - } - } - assert(isf == false); - } - if (in != &cin) delete in; -} - shared_ptr prng; template struct ModelAndData { - explicit ModelAndData(const Base& b, const vector >& ce, const vector >& cf, const set& ve, const set& vf) : + explicit ModelAndData(MonotonicParallelSegementationModel& m, const Base& b, const vector >& ce, const vector >& cf, const set& ve, const set& vf) : + model(m), rng(&*prng), p0(b), baseprob(prob_t::One()), @@ -110,14 +67,12 @@ struct ModelAndData { corpusf(cf), vocabe(ve), vocabf(vf), - rules(1,1), mh_samples(), mh_rejects(), kX(-TD::Convert("X")), derivations(corpuse.size()) {} void ResampleHyperparameters() { - rules.resample_hyperparameters(&*prng); } void InstantiateRule(const pair& from, @@ -139,12 +94,10 @@ struct ModelAndData { TRule x; for (int i = 1; i < d.size(); ++i) { InstantiateRule(d[i], d[i-1], sentf, sente, &x); - //cerr << "REMOVE: " << x.AsString() << endl; - if (rules.decrement(x)) { - baseprob /= p0(x); - //cerr << " (REMOVED ONLY INSTANCE)\n"; - } + model.DecrementRule(x); + model.DecrementContinue(); } + model.DecrementStop(); } void PrintDerivation(const vector >& d, const vector& sentf, const vector& sente) { @@ -161,39 +114,38 @@ struct ModelAndData { TRule x; for (int i = 1; i < d.size(); ++i) { InstantiateRule(d[i], d[i-1], sentf, sente, &x); - if (rules.increment(x)) { - baseprob *= p0(x); - } + model.IncrementRule(x); + model.IncrementContinue(); } + model.IncrementStop(); } prob_t Likelihood() const { - prob_t p; - p.logeq(rules.log_crp_prob()); - return p * baseprob; + return model.Likelihood(); } prob_t DerivationProposalProbability(const vector >& d, const vector& sentf, const vector& sente) const { - prob_t p = prob_t::One(); + prob_t p = model.StopProbability(); if (d.size() < 2) return p; TRule x; + const prob_t p_cont = model.ContinueProbability(); for (int i = 1; i < d.size(); ++i) { InstantiateRule(d[i], d[i-1], sentf, sente, &x); - prob_t rp; rp.logeq(rules.logprob(x, log(p0(x)))); - p *= rp; + p *= p_cont; + p *= model.RuleProbability(x); } return p; } void Sample(); + MonotonicParallelSegementationModel& model; MT19937* rng; const Base& p0; prob_t baseprob; // cached value of generating the table table labels from p0 // this can't be used if we go to a hierarchical prior! const vector >& corpuse, corpusf; const set& vocabe, vocabf; - CCRP_NoTable rules; unsigned mh_samples, mh_rejects; const int kX; vector > > derivations; @@ -201,8 +153,8 @@ struct ModelAndData { template void ModelAndData::Sample() { - unsigned MAXK = 4; - unsigned MAXL = 4; + unsigned MAXK = kMAX_SRC_PHRASE; + unsigned MAXL = kMAX_TRG_PHRASE; TRule x; x.lhs_ = -TD::Convert("X"); for (int samples = 0; samples < 1000; ++samples) { @@ -228,6 +180,8 @@ void ModelAndData::Sample() { boost::multi_array a(boost::extents[sentf.size() + 1][sente.size() + 1]); boost::multi_array trans(boost::extents[sentf.size() + 1][sente.size() + 1][MAXK][MAXL]); a[0][0] = prob_t::One(); + const prob_t q_stop = model.StopProbability(); + const prob_t q_cont = model.ContinueProbability(); for (int i = 0; i < sentf.size(); ++i) { for (int j = 0; j < sente.size(); ++j) { const prob_t src_a = a[i][j]; @@ -239,7 +193,9 @@ void ModelAndData::Sample() { for (int l = 1; l <= MAXL; ++l) { if (j + l > sente.size()) break; x.e_.push_back(sente[j + l - 1]); - trans[i][j][k - 1][l - 1].logeq(rules.logprob(x, log(p0(x)))); + const bool stop_now = ((j + l) == sente.size()) && ((i + k) == sentf.size()); + const prob_t& cp = stop_now ? q_stop : q_cont; + trans[i][j][k - 1][l - 1] = model.RuleProbability(x) * cp; a[i + k][j + l] += src_a * trans[i][j][k - 1][l - 1]; } } @@ -319,7 +275,7 @@ int main(int argc, char** argv) { vector > corpuse, corpusf; set vocabe, vocabf; - ReadParallelCorpus(conf["input"].as(), &corpusf, &corpuse, &vocabf, &vocabe); + corpus::ReadParallelCorpus(conf["input"].as(), &corpusf, &corpuse, &vocabf, &vocabe); cerr << "f-Corpus size: " << corpusf.size() << " sentences\n"; cerr << "f-Vocabulary size: " << vocabf.size() << " types\n"; cerr << "f-Corpus size: " << corpuse.size() << " sentences\n"; @@ -328,8 +284,9 @@ int main(int argc, char** argv) { Model1 m1(conf["model1"].as()); PhraseJointBase lp0(m1, conf["model1_interpolation_weight"].as(), vocabe.size(), vocabf.size()); + MonotonicParallelSegementationModel m(lp0); - ModelAndData posterior(lp0, corpuse, corpusf, vocabe, vocabf); + ModelAndData posterior(m, lp0, corpuse, corpusf, vocabe, vocabf); posterior.Sample(); return 0; diff --git a/gi/pf/monotonic_pseg.h b/gi/pf/monotonic_pseg.h new file mode 100644 index 00000000..7e6af3fc --- /dev/null +++ b/gi/pf/monotonic_pseg.h @@ -0,0 +1,88 @@ +#ifndef _MONOTONIC_PSEG_H_ +#define _MONOTONIC_PSEG_H_ + +#include + +#include "prob.h" +#include "ccrp_nt.h" +#include "trule.h" +#include "base_measures.h" + +struct MonotonicParallelSegementationModel { + explicit MonotonicParallelSegementationModel(PhraseJointBase& rcp0) : + rp0(rcp0), base(prob_t::One()), rules(1,1), stop(1.0) {} + + void DecrementRule(const TRule& rule) { + if (rules.decrement(rule)) + base /= rp0(rule); + } + + void IncrementRule(const TRule& rule) { + if (rules.increment(rule)) + base *= rp0(rule); + } + + void IncrementRulesAndStops(const std::vector& rules) { + for (int i = 0; i < rules.size(); ++i) + IncrementRule(*rules[i]); + if (rules.size()) IncrementContinue(rules.size() - 1); + IncrementStop(); + } + + void DecrementRulesAndStops(const std::vector& rules) { + for (int i = 0; i < rules.size(); ++i) + DecrementRule(*rules[i]); + if (rules.size()) { + DecrementContinue(rules.size() - 1); + DecrementStop(); + } + } + + prob_t RuleProbability(const TRule& rule) const { + prob_t p; p.logeq(rules.logprob(rule, log(rp0(rule)))); + return p; + } + + prob_t Likelihood() const { + prob_t p = base; + prob_t q; q.logeq(rules.log_crp_prob()); + p *= q; + q.logeq(stop.log_crp_prob()); + p *= q; + return p; + } + + void IncrementStop() { + stop.increment(true); + } + + void IncrementContinue(int n = 1) { + for (int i = 0; i < n; ++i) + stop.increment(false); + } + + void DecrementStop() { + stop.decrement(true); + } + + void DecrementContinue(int n = 1) { + for (int i = 0; i < n; ++i) + stop.decrement(false); + } + + prob_t StopProbability() const { + return prob_t(stop.prob(true, 0.5)); + } + + prob_t ContinueProbability() const { + return prob_t(stop.prob(false, 0.5)); + } + + const PhraseJointBase& rp0; + prob_t base; + CCRP_NoTable rules; + CCRP_NoTable stop; +}; + +#endif + diff --git a/gi/pf/pfnaive.cc b/gi/pf/pfnaive.cc index c30e7c4f..33dc08c3 100644 --- a/gi/pf/pfnaive.cc +++ b/gi/pf/pfnaive.cc @@ -7,6 +7,7 @@ #include #include "base_measures.h" +#include "monotonic_pseg.h" #include "reachability.h" #include "viterbi.h" #include "hg.h" @@ -17,6 +18,7 @@ #include "sampler.h" #include "ccrp_nt.h" #include "ccrp_onetable.h" +#include "corpus.h" using namespace std; using namespace tr1; @@ -58,101 +60,6 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) { } } -void ReadParallelCorpus(const string& filename, - vector >* f, - vector >* e, - set* vocab_f, - set* vocab_e) { - f->clear(); - e->clear(); - vocab_f->clear(); - vocab_e->clear(); - istream* in; - if (filename == "-") - in = &cin; - else - in = new ifstream(filename.c_str()); - assert(*in); - string line; - const WordID kDIV = TD::Convert("|||"); - vector tmp; - while(*in) { - getline(*in, line); - if (line.empty() && !*in) break; - e->push_back(vector()); - f->push_back(vector()); - vector& le = e->back(); - vector& lf = f->back(); - tmp.clear(); - TD::ConvertSentence(line, &tmp); - bool isf = true; - for (unsigned i = 0; i < tmp.size(); ++i) { - const int cur = tmp[i]; - if (isf) { - if (kDIV == cur) { isf = false; } else { - lf.push_back(cur); - vocab_f->insert(cur); - } - } else { - assert(cur != kDIV); - le.push_back(cur); - vocab_e->insert(cur); - } - } - assert(isf == false); - } - if (in != &cin) delete in; -} - -struct MyJointModel { - MyJointModel(PhraseJointBase& rcp0) : - rp0(rcp0), base(prob_t::One()), rules(1,1) {} - - void DecrementRule(const TRule& rule) { - if (rules.decrement(rule)) - base /= rp0(rule); - } - - void IncrementRule(const TRule& rule) { - if (rules.increment(rule)) - base *= rp0(rule); - } - - void IncrementRules(const vector& rules) { - for (int i = 0; i < rules.size(); ++i) - IncrementRule(*rules[i]); - } - - void DecrementRules(const vector& rules) { - for (int i = 0; i < rules.size(); ++i) - DecrementRule(*rules[i]); - } - - prob_t RuleProbability(const TRule& rule) const { - prob_t p; p.logeq(rules.logprob(rule, log(rp0(rule)))); - return p; - } - - prob_t Likelihood() const { - prob_t p = base; - prob_t q; q.logeq(rules.log_crp_prob()); - p *= q; - for (unsigned l = 1; l < src_jumps.size(); ++l) { - if (src_jumps[l].num_customers() > 0) { - prob_t q; - q.logeq(src_jumps[l].log_crp_prob()); - p *= q; - } - } - return p; - } - - const PhraseJointBase& rp0; - prob_t base; - CCRP_NoTable rules; - vector > src_jumps; -}; - struct BackwardEstimateSym { BackwardEstimateSym(const Model1& m1, const Model1& invm1, const vector& src, const vector& trg) : @@ -264,7 +171,7 @@ int main(int argc, char** argv) { vector > corpuse, corpusf; set vocabe, vocabf; cerr << "Reading corpus...\n"; - ReadParallelCorpus(conf["input"].as(), &corpusf, &corpuse, &vocabf, &vocabe); + corpus::ReadParallelCorpus(conf["input"].as(), &corpusf, &corpuse, &vocabf, &vocabe); cerr << "F-corpus size: " << corpusf.size() << " sentences\t (" << vocabf.size() << " word types)\n"; cerr << "E-corpus size: " << corpuse.size() << " sentences\t (" << vocabe.size() << " word types)\n"; assert(corpusf.size() == corpuse.size()); @@ -273,13 +180,8 @@ int main(int argc, char** argv) { Model1 m1(conf["model1"].as()); Model1 invm1(conf["inverse_model1"].as()); -#if 0 - PhraseConditionalBase lp0(m1, conf["model1_interpolation_weight"].as(), vocabe.size()); - MyConditionalModel m(lp0); -#else PhraseJointBase lp0(m1, conf["model1_interpolation_weight"].as(), vocabe.size(), vocabf.size()); - MyJointModel m(lp0); -#endif + MonotonicParallelSegementationModel m(lp0); cerr << "Initializing reachability limits...\n"; vector ps(corpusf.size()); @@ -296,7 +198,10 @@ int main(int argc, char** argv) { for (int ci = 0; ci < corpusf.size(); ++ci) { vector& src = corpusf[ci]; vector& trg = corpuse[ci]; - m.DecrementRules(ps[ci].rules); + m.DecrementRulesAndStops(ps[ci].rules); + const prob_t q_stop = m.StopProbability(); + const prob_t q_cont = m.ContinueProbability(); + cerr << "P(stop)=" << q_stop << "\tP(continue)=" <AsString() << "\n"; } cerr << "tmp-LLH: " << log(m.Likelihood()) << endl; } diff --git a/utils/logval_test.cc b/utils/logval_test.cc index 4aa452f2..6133f5ce 100644 --- a/utils/logval_test.cc +++ b/utils/logval_test.cc @@ -30,13 +30,13 @@ TEST_F(LogValTest,Negate) { LogVal x(-2.4); LogVal y(2.4); y.negate(); - EXPECT_FLOAT_EQ(x,y); + EXPECT_FLOAT_EQ(x.as_float(),y.as_float()); } TEST_F(LogValTest,Inverse) { LogVal x(1/2.4); LogVal y(2.4); - EXPECT_FLOAT_EQ(x,y.inverse()); + EXPECT_FLOAT_EQ(x.as_float(),y.inverse().as_float()); } TEST_F(LogValTest,Minus) { @@ -45,9 +45,9 @@ TEST_F(LogValTest,Minus) { LogVal z1 = x - y; LogVal z2 = x; z2 -= y; - EXPECT_FLOAT_EQ(z1, z2); - EXPECT_FLOAT_EQ(z1, 10.0); - EXPECT_FLOAT_EQ(y - x, -10.0); + EXPECT_FLOAT_EQ(z1.as_float(), z2.as_float()); + EXPECT_FLOAT_EQ(z1.as_float(), 10.0); + EXPECT_FLOAT_EQ((y - x).as_float(), -10.0); } TEST_F(LogValTest,TestOps) { @@ -62,8 +62,8 @@ TEST_F(LogValTest,TestOps) { LogVal bb(-0.3); cerr << (aa + bb) << endl; cerr << (bb + aa) << endl; - EXPECT_FLOAT_EQ((aa + bb), (bb + aa)); - EXPECT_FLOAT_EQ((aa + bb), -0.1); + EXPECT_FLOAT_EQ((aa + bb).as_float(), (bb + aa).as_float()); + EXPECT_FLOAT_EQ((aa + bb).as_float(), -0.1); } TEST_F(LogValTest,TestSizes) { -- cgit v1.2.3