From 0acc92a0eecf04a2c429f6f7685bfcaa68c7ec3a Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@cs.cmu.edu>
Date: Tue, 11 Oct 2011 12:06:32 +0100
Subject: check in some experimental particle filtering code, some gitignore
 fixes

---
 gi/pf/pfnaive.cc | 385 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 385 insertions(+)
 create mode 100644 gi/pf/pfnaive.cc

(limited to 'gi/pf/pfnaive.cc')
diff --git a/gi/pf/pfnaive.cc b/gi/pf/pfnaive.cc
new file mode 100644
index 00000000..43c604c3
--- /dev/null
+++ b/gi/pf/pfnaive.cc
@@ -0,0 +1,385 @@
+#include <iostream>
+#include <tr1/memory>
+#include <queue>
+
+#include <boost/functional.hpp>
+#include <boost/program_options.hpp>
+#include <boost/program_options/variables_map.hpp>
+
+#include "base_measures.h"
+#include "reachability.h"
+#include "viterbi.h"
+#include "hg.h"
+#include "trule.h"
+#include "tdict.h"
+#include "filelib.h"
+#include "dict.h"
+#include "sampler.h"
+#include "ccrp_nt.h"
+#include "ccrp_onetable.h"
+
+using namespace std;
+using namespace tr1;
+namespace po = boost::program_options;
+
+shared_ptr<MT19937> prng;
+
+size_t hash_value(const TRule& r) {
+  size_t h = boost::hash_value(r.e_);
+  boost::hash_combine(h, -r.lhs_);
+  boost::hash_combine(h, boost::hash_value(r.f_));
+  return h;
+}
+
+bool operator==(const TRule& a, const TRule& b) {
+  return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_);
+}
+
+void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
+  po::options_description opts("Configuration options");
+  opts.add_options()
+        ("samples,s",po::value<unsigned>()->default_value(1000),"Number of samples")
+        ("particles,p",po::value<unsigned>()->default_value(30),"Number of particles")
+        ("filter_frequency,f",po::value<unsigned>()->default_value(5),"Number of time steps between filterings")
+        ("input,i",po::value<string>(),"Read parallel data from")
+        ("max_src_phrase",po::value<unsigned>()->default_value(5),"Maximum length of source language phrases")
+        ("max_trg_phrase",po::value<unsigned>()->default_value(5),"Maximum length of target language phrases")
+        ("model1,m",po::value<string>(),"Model 1 parameters (used in base distribution)")
+        ("inverse_model1,M",po::value<string>(),"Inverse Model 1 parameters (used in backward estimate)")
+        ("model1_interpolation_weight",po::value<double>()->default_value(0.95),"Mixing proportion of model 1 with uniform target distribution")
+        ("random_seed,S",po::value<uint32_t>(), "Random seed");
+  po::options_description clo("Command line options");
+  clo.add_options()
+        ("config", po::value<string>(), "Configuration file")
+        ("help,h", "Print this help message and exit");
+  po::options_description dconfig_options, dcmdline_options;
+  dconfig_options.add(opts);
+  dcmdline_options.add(opts).add(clo);
+  
+  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
+  if (conf->count("config")) {
+    ifstream config((*conf)["config"].as<string>().c_str());
+    po::store(po::parse_config_file(config, dconfig_options), *conf);
+  }
+  po::notify(*conf);
+
+  if (conf->count("help") || (conf->count("input") == 0)) {
+    cerr << dcmdline_options << endl;
+    exit(1);
+  }
+}
+
+void ReadParallelCorpus(const string& filename,
+                vector<vector<WordID> >* f,
+                vector<vector<WordID> >* e,
+                set<WordID>* vocab_f,
+                set<WordID>* vocab_e) {
+  f->clear();
+  e->clear();
+  vocab_f->clear();
+  vocab_e->clear();
+  istream* in;
+  if (filename == "-")
+    in = &cin;
+  else
+    in = new ifstream(filename.c_str());
+  assert(*in);
+  string line;
+  const WordID kDIV = TD::Convert("|||");
+  vector<WordID> tmp;
+  while(*in) {
+    getline(*in, line);
+    if (line.empty() && !*in) break;
+    e->push_back(vector<int>());
+    f->push_back(vector<int>());
+    vector<int>& le = e->back();
+    vector<int>& lf = f->back();
+    tmp.clear();
+    TD::ConvertSentence(line, &tmp);
+    bool isf = true;
+    for (unsigned i = 0; i < tmp.size(); ++i) {
+      const int cur = tmp[i];
+      if (isf) {
+        if (kDIV == cur) { isf = false; } else {
+          lf.push_back(cur);
+          vocab_f->insert(cur);
+        }
+      } else {
+        assert(cur != kDIV);
+        le.push_back(cur);
+        vocab_e->insert(cur);
+      }
+    }
+    assert(isf == false);
+  }
+  if (in != &cin) delete in;
+}
+
+struct MyJointModel {
+  MyJointModel(PhraseJointBase& rcp0) :
+    rp0(rcp0), base(prob_t::One()), rules(1,1) {}
+
+  void DecrementRule(const TRule& rule) {
+    if (rules.decrement(rule))
+      base /= rp0(rule);
+  }
+
+  void IncrementRule(const TRule& rule) {
+    if (rules.increment(rule))
+      base *= rp0(rule);
+  }
+
+  void IncrementRules(const vector<TRulePtr>& rules) {
+    for (int i = 0; i < rules.size(); ++i)
+      IncrementRule(*rules[i]);
+  }
+
+  void DecrementRules(const vector<TRulePtr>& rules) {
+    for (int i = 0; i < rules.size(); ++i)
+      DecrementRule(*rules[i]);
+  }
+
+  prob_t RuleProbability(const TRule& rule) const {
+    prob_t p; p.logeq(rules.logprob(rule, log(rp0(rule))));
+    return p;
+  }
+
+  prob_t Likelihood() const {
+    prob_t p = base;
+    prob_t q; q.logeq(rules.log_crp_prob());
+    p *= q;
+    for (unsigned l = 1; l < src_jumps.size(); ++l) {
+      if (src_jumps[l].num_customers() > 0) {
+        prob_t q;
+        q.logeq(src_jumps[l].log_crp_prob());
+        p *= q;
+      }
+    }
+    return p;
+  }
+
+  const PhraseJointBase& rp0;
+  prob_t base;
+  CCRP_NoTable<TRule> rules;
+  vector<CCRP_NoTable<int> > src_jumps;
+};
+
+struct BackwardEstimateSym {
+  BackwardEstimateSym(const Model1& m1,
+                      const Model1& invm1, const vector<WordID>& src, const vector<WordID>& trg) :
+      model1_(m1), invmodel1_(invm1), src_(src), trg_(trg) {
+  }
+  const prob_t& operator()(unsigned src_cov, unsigned trg_cov) const {
+    assert(src_cov <= src_.size());
+    assert(trg_cov <= trg_.size());
+    prob_t& e = cache_[src_cov][trg_cov];
+    if (e.is_0()) {
+      if (trg_cov == trg_.size()) { e = prob_t::One(); return e; }
+      vector<WordID> r(src_.size() + 1); r.clear();
+      for (int i = src_cov; i < src_.size(); ++i)
+        r.push_back(src_[i]);
+      r.push_back(0);  // NULL word
+      const prob_t uniform_alignment(1.0 / r.size());
+      e.logeq(log_poisson(trg_.size() - trg_cov, r.size() - 1)); // p(trg len remaining | src len remaining)
+      for (unsigned j = trg_cov; j < trg_.size(); ++j) {
+        prob_t p;
+        for (unsigned i = 0; i < r.size(); ++i)
+          p += model1_(r[i], trg_[j]);
+        if (p.is_0()) {
+          cerr << "ERROR: p(" << TD::Convert(trg_[j]) << " | " << TD::GetString(r) << ") = 0!\n";
+          abort();
+        }
+        p *= uniform_alignment;
+        e *= p;
+      }
+      r.pop_back();
+      const prob_t inv_uniform(1.0 / (trg_.size() - trg_cov + 1.0));
+      prob_t inv;
+      inv.logeq(log_poisson(r.size(), trg_.size() - trg_cov));
+      for (unsigned i = 0; i < r.size(); ++i) {
+        prob_t p;
+        for (unsigned j = trg_cov - 1; j < trg_.size(); ++j)
+          p += invmodel1_(j < trg_cov ? 0 : trg_[j], r[i]);
+        if (p.is_0()) {
+          cerr << "ERROR: p_inv(" << TD::Convert(r[i]) << " | " << TD::GetString(trg_) << ") = 0!\n";
+          abort();
+        }
+        p *= inv_uniform;
+        inv *= p;
+      }
+      prob_t x = pow(e * inv, 0.5);
+      e = x;
+      //cerr << "Forward: " << log(e) << "\tBackward: " << log(inv) << "\t prop: " << log(x) << endl;
+    }
+    return e;
+  }
+  const Model1& model1_;
+  const Model1& invmodel1_;
+  const vector<WordID>& src_;
+  const vector<WordID>& trg_;
+  mutable unordered_map<unsigned, map<unsigned, prob_t> > cache_;
+};
+
+struct Particle {
+  Particle() : weight(prob_t::One()), src_cov(), trg_cov() {}
+  prob_t weight;
+  prob_t gamma_last;
+  vector<TRulePtr> rules;
+  int src_cov;
+  int trg_cov;
+};
+
+ostream& operator<<(ostream& o, const vector<bool>& v) {
+  for (int i = 0; i < v.size(); ++i)
+    o << (v[i] ? '1' : '0');
+  return o;
+}
+ostream& operator<<(ostream& o, const Particle& p) {
+  o << "[src_cov=" << p.src_cov << " trg_cov=" << p.trg_cov << " num_rules=" << p.rules.size() << "  w=" << log(p.weight) << ']';
+  return o;
+}
+
+void FilterCrapParticlesAndReweight(vector<Particle>* pps) {
+  vector<Particle>& ps = *pps;
+  SampleSet<prob_t> ss;
+  for (int i = 0; i < ps.size(); ++i)
+    ss.add(ps[i].weight);
+  vector<Particle> nps; nps.reserve(ps.size());
+  const prob_t uniform_weight(1.0 / ps.size());
+  for (int i = 0; i < ps.size(); ++i) {
+    nps.push_back(ps[prng->SelectSample(ss)]);
+    nps[i].weight = uniform_weight;
+  }
+  nps.swap(ps);
+}
+
+int main(int argc, char** argv) {
+  po::variables_map conf;
+  InitCommandLine(argc, argv, &conf);
+  const unsigned kMAX_TRG_PHRASE = conf["max_trg_phrase"].as<unsigned>();
+  const unsigned kMAX_SRC_PHRASE = conf["max_src_phrase"].as<unsigned>();
+  const unsigned particles = conf["particles"].as<unsigned>();
+  const unsigned samples = conf["samples"].as<unsigned>();
+  const unsigned rejuv_freq = conf["filter_frequency"].as<unsigned>();
+
+  if (!conf.count("model1")) {
+    cerr << argv[0] << "Please use --model1 to specify model 1 parameters\n";
+    return 1;
+  }
+  if (conf.count("random_seed"))
+    prng.reset(new MT19937(conf["random_seed"].as<uint32_t>()));
+  else
+    prng.reset(new MT19937);
+  MT19937& rng = *prng;
+
+  vector<vector<WordID> > corpuse, corpusf;
+  set<WordID> vocabe, vocabf;
+  cerr << "Reading corpus...\n";
+  ReadParallelCorpus(conf["input"].as<string>(), &corpusf, &corpuse, &vocabf, &vocabe);
+  cerr << "F-corpus size: " << corpusf.size() << " sentences\t (" << vocabf.size() << " word types)\n";
+  cerr << "E-corpus size: " << corpuse.size() << " sentences\t (" << vocabe.size() << " word types)\n";
+  assert(corpusf.size() == corpuse.size());
+
+  const int kLHS = -TD::Convert("X");
+  Model1 m1(conf["model1"].as<string>());
+  Model1 invm1(conf["inverse_model1"].as<string>());
+
+#if 0
+  PhraseConditionalBase lp0(m1, conf["model1_interpolation_weight"].as<double>(), vocabe.size());
+  MyConditionalModel m(lp0);
+#else
+  PhraseJointBase lp0(m1, conf["model1_interpolation_weight"].as<double>(), vocabe.size(), vocabf.size());
+  MyJointModel m(lp0);
+#endif
+
+  cerr << "Initializing reachability limits...\n";
+  vector<Particle> ps(corpusf.size());
+  vector<Reachability> reaches; reaches.reserve(corpusf.size());
+  for (int ci = 0; ci < corpusf.size(); ++ci)
+    reaches.push_back(Reachability(corpusf[ci].size(),
+                                   corpuse[ci].size(),
+                                   kMAX_SRC_PHRASE,
+                                   kMAX_TRG_PHRASE));
+  cerr << "Sampling...\n"; 
+  vector<Particle> tmp_p(10000);  // work space
+  SampleSet<prob_t> pfss;
+  for (int SS=0; SS < samples; ++SS) {
+    for (int ci = 0; ci < corpusf.size(); ++ci) {
+      vector<int>& src = corpusf[ci];
+      vector<int>& trg = corpuse[ci];
+      m.DecrementRules(ps[ci].rules);
+
+      BackwardEstimateSym be(m1, invm1, src, trg);
+      const Reachability& r = reaches[ci];
+      vector<Particle> lps(particles);
+
+      bool all_complete = false;
+      while(!all_complete) {
+        SampleSet<prob_t> ss;
+
+        // all particles have now been extended a bit, we will reweight them now
+        if (lps[0].trg_cov > 0)
+          FilterCrapParticlesAndReweight(&lps);
+
+        // loop over all particles and extend them
+        bool done_nothing = true;
+        for (int pi = 0; pi < particles; ++pi) {
+          Particle& p = lps[pi];
+          int tic = 0;
+          while(p.trg_cov < trg.size() && tic < rejuv_freq) {
+            ++tic;
+            done_nothing = false;
+            ss.clear();
+            TRule x; x.lhs_ = kLHS;
+            prob_t z;
+
+            for (int trg_len = 1; trg_len <= kMAX_TRG_PHRASE; ++trg_len) {
+              x.e_.push_back(trg[trg_len - 1 + p.trg_cov]);
+              for (int src_len = 1; src_len <= kMAX_SRC_PHRASE; ++src_len) {
+                if (!r.edges[p.src_cov][p.trg_cov][src_len][trg_len]) continue;
+
+                int i = p.src_cov;
+                assert(ss.size() < tmp_p.size());  // if fails increase tmp_p size
+                Particle& np = tmp_p[ss.size()];
+                np = p;
+                x.f_.clear();
+                for (int j = 0; j < src_len; ++j)
+                  x.f_.push_back(src[i + j]);
+                np.src_cov += x.f_.size();
+                np.trg_cov += x.e_.size();
+                prob_t rp = m.RuleProbability(x);
+                np.gamma_last = rp;
+                const prob_t u = pow(np.gamma_last * pow(be(np.src_cov, np.trg_cov), 1.2), 0.1);
+                //cerr << "**rule=" << x << endl;
+                //cerr << "  u=" << log(u) << "  rule=" << rp << endl;
+                ss.add(u);
+                np.rules.push_back(TRulePtr(new TRule(x)));
+                z += u;
+              }
+            }
+            //cerr << "number of edges to consider: " << ss.size() << endl;
+            const int sampled = rng.SelectSample(ss);
+            prob_t q_n = ss[sampled] / z;
+            p = tmp_p[sampled];
+            //m.IncrementRule(*p.rules.back());
+            p.weight *= p.gamma_last / q_n;
+            //cerr << "[w=" << log(p.weight) << "]\tsampled rule: " << p.rules.back()->AsString() << endl;
+            //cerr << p << endl;
+          }
+        } // loop over particles (pi = 0 .. particles)
+        if (done_nothing) all_complete = true;
+      }
+      pfss.clear();
+      for (int i = 0; i < lps.size(); ++i)
+        pfss.add(lps[i].weight);
+      const int sampled = rng.SelectSample(pfss);
+      ps[ci] = lps[sampled];
+      m.IncrementRules(lps[sampled].rules);
+      for (int i = 0; i < lps[sampled].rules.size(); ++i) { cerr << "S:\t" << lps[sampled].rules[i]->AsString() << "\n"; }
+      cerr << "tmp-LLH: " << log(m.Likelihood()) << endl;
+    }
+    cerr << "LLH: " << log(m.Likelihood()) << endl;
+  }
+  return 0;
+}
+
-- 
cgit v1.2.3


From 0af7d663194beddcde420349bbd91430e0b2e423 Mon Sep 17 00:00:00 2001
From: Guest_account Guest_account prguest11 <prguest11@taipan.cs>
Date: Tue, 11 Oct 2011 16:16:53 +0100
Subject: remove implicit conversion-to-double operator from LogVal<T> that
 caused overflow errors, clean up some pf code

---
 decoder/aligner.cc              |  2 +-
 decoder/cfg.cc                  |  2 +-
 decoder/cfg_format.h            |  2 +-
 decoder/decoder.cc              | 10 ++++----
 decoder/hg.cc                   |  4 ++--
 decoder/rule_lexer.l            |  2 ++
 decoder/trule.h                 | 15 +++++++++++-
 gi/pf/brat.cc                   | 11 ---------
 gi/pf/cbgi.cc                   | 10 --------
 gi/pf/dpnaive.cc                | 12 ----------
 gi/pf/itg.cc                    | 11 ---------
 gi/pf/pfbrat.cc                 | 11 ---------
 gi/pf/pfdist.cc                 | 11 ---------
 gi/pf/pfnaive.cc                | 11 ---------
 mteval/mbr_kbest.cc             |  4 ++--
 phrasinator/ccrp_nt.h           | 24 +++++++++++++++----
 training/mpi_batch_optimize.cc  |  2 +-
 training/mpi_compute_cllh.cc    | 51 +++++++++++++++++++----------------------
 training/mpi_online_optimize.cc |  4 ++--
 utils/logval.h                  | 10 ++++----
 20 files changed, 78 insertions(+), 131 deletions(-)

(limited to 'gi/pf/pfnaive.cc')

diff --git a/decoder/aligner.cc b/decoder/aligner.cc
index 292ee123..53e059fb 100644
--- a/decoder/aligner.cc
+++ b/decoder/aligner.cc
@@ -165,7 +165,7 @@ inline void WriteProbGrid(const Array2D<prob_t>& m, ostream* pos) {
       if (m(i,j) == prob_t::Zero()) {
         os << "\t---X---";
       } else {
-        snprintf(b, 1024, "%0.5f", static_cast<double>(m(i,j)));
+        snprintf(b, 1024, "%0.5f", m(i,j).as_float());
         os << '\t' << b;
       }
     }
diff --git a/decoder/cfg.cc b/decoder/cfg.cc
index 651978d2..cd7e66e9 100755
--- a/decoder/cfg.cc
+++ b/decoder/cfg.cc
@@ -639,7 +639,7 @@ void CFG::Print(std::ostream &o,CFGFormat const& f) const {
     o << '['<<f.goal_nt_name <<']';
     WordID rhs=-goal_nt;
     f.print_rhs(o,*this,&rhs,&rhs+1);
-    if (pushed_inside!=1)
+    if (pushed_inside!=prob_t::One())
       f.print_features(o,pushed_inside);
     o<<'\n';
   }
diff --git a/decoder/cfg_format.h b/decoder/cfg_format.h
index c6a594b8..2f40d483 100755
--- a/decoder/cfg_format.h
+++ b/decoder/cfg_format.h
@@ -101,7 +101,7 @@ struct CFGFormat {
   }
 
   void print_features(std::ostream &o,prob_t p,FeatureVector const& fv=FeatureVector()) const {
-    bool logp=(logprob_feat && p!=1);
+    bool logp=(logprob_feat && p!=prob_t::One());
     if (features || logp) {
       o << partsep;
       if (logp)
diff --git a/decoder/decoder.cc b/decoder/decoder.cc
index c4fe3c4d..3b53fd6b 100644
--- a/decoder/decoder.cc
+++ b/decoder/decoder.cc
@@ -325,7 +325,7 @@ struct DecoderImpl {
 
   static void ConvertSV(const SparseVector<prob_t>& src, SparseVector<double>* trg) {
     for (SparseVector<prob_t>::const_iterator it = src.begin(); it != src.end(); ++it)
-      trg->set_value(it->first, it->second);
+      trg->set_value(it->first, it->second.as_float());
   }
 };
 
@@ -788,10 +788,10 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
   const bool show_tree_structure=conf.count("show_tree_structure");
   if (!SILENT) forest_stats(forest,"  Init. forest",show_tree_structure,oracle.show_derivation);
   if (conf.count("show_expected_length")) {
-    const PRPair<double, double> res =
-      Inside<PRPair<double, double>,
-             PRWeightFunction<double, EdgeProb, double, ELengthWeightFunction> >(forest);
-    cerr << "  Expected length  (words): " << res.r / res.p << "\t" << res << endl;
+    const PRPair<prob_t, prob_t> res =
+      Inside<PRPair<prob_t, prob_t>,
+             PRWeightFunction<prob_t, EdgeProb, prob_t, ELengthWeightFunction> >(forest);
+    cerr << "  Expected length  (words): " << (res.r / res.p).as_float() << "\t" << res << endl;
   }
 
   if (conf.count("show_partition")) {
diff --git a/decoder/hg.cc b/decoder/hg.cc
index 3ad17f1a..180986d7 100644
--- a/decoder/hg.cc
+++ b/decoder/hg.cc
@@ -157,14 +157,14 @@ prob_t Hypergraph::ComputeEdgePosteriors(double scale, vector<prob_t>* posts) co
   const ScaledEdgeProb weight(scale);
   const ScaledTransitionEventWeightFunction w2(scale);
   SparseVector<prob_t> pv;
-  const double inside = InsideOutside<prob_t,
+  const prob_t inside = InsideOutside<prob_t,
                   ScaledEdgeProb,
                   SparseVector<prob_t>,
                   ScaledTransitionEventWeightFunction>(*this, &pv, weight, w2);
   posts->resize(edges_.size());
   for (int i = 0; i < edges_.size(); ++i)
     (*posts)[i] = prob_t(pv.value(i));
-  return prob_t(inside);
+  return inside;
 }
 
 prob_t Hypergraph::ComputeBestPathThroughEdges(vector<prob_t>* post) const {
diff --git a/decoder/rule_lexer.l b/decoder/rule_lexer.l
index 9331d8ed..083a5bb1 100644
--- a/decoder/rule_lexer.l
+++ b/decoder/rule_lexer.l
@@ -220,6 +220,8 @@ NT [^\t \[\],]+
                   std::cerr << "Line " << lex_line << ": LHS and RHS arity mismatch!\n";
                   abort();
                 }
+		// const bool ignore_grammar_features = false;
+		// if (ignore_grammar_features) scfglex_num_feats = 0;
 		TRulePtr rp(new TRule(scfglex_lhs, scfglex_src_rhs, scfglex_src_rhs_size, scfglex_trg_rhs, scfglex_trg_rhs_size, scfglex_feat_ids, scfglex_feat_vals, scfglex_num_feats, scfglex_src_arity, scfglex_als, scfglex_num_als));
     check_and_update_ctf_stack(rp);
     TRulePtr coarse_rp = ((ctf_level == 0) ? TRulePtr() : ctf_rule_stack.top());
diff --git a/decoder/trule.h b/decoder/trule.h
index 4df4ec90..8eb2a059 100644
--- a/decoder/trule.h
+++ b/decoder/trule.h
@@ -5,7 +5,9 @@
 #include <vector>
 #include <cassert>
 #include <iostream>
-#include <boost/shared_ptr.hpp>
+
+#include "boost/shared_ptr.hpp"
+#include "boost/functional/hash.hpp"
 
 #include "sparse_vector.h"
 #include "wordid.h"
@@ -162,4 +164,15 @@ class TRule {
   bool SanityCheck() const;
 };
 
+inline size_t hash_value(const TRule& r) {
+  size_t h = boost::hash_value(r.e_);
+  boost::hash_combine(h, -r.lhs_);
+  boost::hash_combine(h, boost::hash_value(r.f_));
+  return h;
+}
+
+inline bool operator==(const TRule& a, const TRule& b) {
+  return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_);
+}
+
 #endif
diff --git a/gi/pf/brat.cc b/gi/pf/brat.cc
index 4c6ba3ef..7b60ef23 100644
--- a/gi/pf/brat.cc
+++ b/gi/pf/brat.cc
@@ -25,17 +25,6 @@ static unsigned kMAX_SRC_PHRASE;
 static unsigned kMAX_TRG_PHRASE;
 struct FSTState;
 
-size_t hash_value(const TRule& r) {
-  size_t h = 2 - r.lhs_;
-  boost::hash_combine(h, boost::hash_value(r.e_));
-  boost::hash_combine(h, boost::hash_value(r.f_));
-  return h;
-}
-
-bool operator==(const TRule& a, const TRule& b) {
-  return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_);
-}
-
 double log_poisson(unsigned x, const double& lambda) {
   assert(lambda > 0.0);
   return log(lambda) * x - lgamma(x + 1) - lambda;
diff --git a/gi/pf/cbgi.cc b/gi/pf/cbgi.cc
index 20204e8a..97f1ba34 100644
--- a/gi/pf/cbgi.cc
+++ b/gi/pf/cbgi.cc
@@ -27,16 +27,6 @@ double log_decay(unsigned x, const double& b) {
   return log(b - 1) - x * log(b);
 }
 
-size_t hash_value(const TRule& r) {
-  // TODO fix hash function
-  size_t h = boost::hash_value(r.e_) * boost::hash_value(r.f_) * r.lhs_;
-  return h;
-}
-
-bool operator==(const TRule& a, const TRule& b) {
-  return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_);
-}
-
 struct SimpleBase {
   SimpleBase(unsigned esize, unsigned fsize, unsigned ntsize = 144) :
     uniform_e(-log(esize)),
diff --git a/gi/pf/dpnaive.cc b/gi/pf/dpnaive.cc
index 582d1be7..608f73d5 100644
--- a/gi/pf/dpnaive.cc
+++ b/gi/pf/dpnaive.cc
@@ -20,18 +20,6 @@ namespace po = boost::program_options;
 
 static unsigned kMAX_SRC_PHRASE;
 static unsigned kMAX_TRG_PHRASE;
-struct FSTState;
-
-size_t hash_value(const TRule& r) {
-  size_t h = 2 - r.lhs_;
-  boost::hash_combine(h, boost::hash_value(r.e_));
-  boost::hash_combine(h, boost::hash_value(r.f_));
-  return h;
-}
-
-bool operator==(const TRule& a, const TRule& b) {
-  return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_);
-}
 
 void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
   po::options_description opts("Configuration options");
diff --git a/gi/pf/itg.cc b/gi/pf/itg.cc
index 2c2a86f9..ac3c16a3 100644
--- a/gi/pf/itg.cc
+++ b/gi/pf/itg.cc
@@ -27,17 +27,6 @@ ostream& operator<<(ostream& os, const vector<WordID>& p) {
   return os << ']';
 }
 
-size_t hash_value(const TRule& r) {
-  size_t h = boost::hash_value(r.e_);
-  boost::hash_combine(h, -r.lhs_);
-  boost::hash_combine(h, boost::hash_value(r.f_));
-  return h;
-}
-
-bool operator==(const TRule& a, const TRule& b) {
-  return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_);
-}
-
 double log_poisson(unsigned x, const double& lambda) {
   assert(lambda > 0.0);
   return log(lambda) * x - lgamma(x + 1) - lambda;
diff --git a/gi/pf/pfbrat.cc b/gi/pf/pfbrat.cc
index 4c6ba3ef..7b60ef23 100644
--- a/gi/pf/pfbrat.cc
+++ b/gi/pf/pfbrat.cc
@@ -25,17 +25,6 @@ static unsigned kMAX_SRC_PHRASE;
 static unsigned kMAX_TRG_PHRASE;
 struct FSTState;
 
-size_t hash_value(const TRule& r) {
-  size_t h = 2 - r.lhs_;
-  boost::hash_combine(h, boost::hash_value(r.e_));
-  boost::hash_combine(h, boost::hash_value(r.f_));
-  return h;
-}
-
-bool operator==(const TRule& a, const TRule& b) {
-  return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_);
-}
-
 double log_poisson(unsigned x, const double& lambda) {
   assert(lambda > 0.0);
   return log(lambda) * x - lgamma(x + 1) - lambda;
diff --git a/gi/pf/pfdist.cc b/gi/pf/pfdist.cc
index 18dfd03b..81abd61b 100644
--- a/gi/pf/pfdist.cc
+++ b/gi/pf/pfdist.cc
@@ -24,17 +24,6 @@ namespace po = boost::program_options;
 
 shared_ptr<MT19937> prng;
 
-size_t hash_value(const TRule& r) {
-  size_t h = boost::hash_value(r.e_);
-  boost::hash_combine(h, -r.lhs_);
-  boost::hash_combine(h, boost::hash_value(r.f_));
-  return h;
-}
-
-bool operator==(const TRule& a, const TRule& b) {
-  return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_);
-}
-
 void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
   po::options_description opts("Configuration options");
   opts.add_options()
diff --git a/gi/pf/pfnaive.cc b/gi/pf/pfnaive.cc
index 43c604c3..c30e7c4f 100644
--- a/gi/pf/pfnaive.cc
+++ b/gi/pf/pfnaive.cc
@@ -24,17 +24,6 @@ namespace po = boost::program_options;
 
 shared_ptr<MT19937> prng;
 
-size_t hash_value(const TRule& r) {
-  size_t h = boost::hash_value(r.e_);
-  boost::hash_combine(h, -r.lhs_);
-  boost::hash_combine(h, boost::hash_value(r.f_));
-  return h;
-}
-
-bool operator==(const TRule& a, const TRule& b) {
-  return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_);
-}
-
 void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
   po::options_description opts("Configuration options");
   opts.add_options()
diff --git a/mteval/mbr_kbest.cc b/mteval/mbr_kbest.cc
index 2867b36b..64a6a8bf 100644
--- a/mteval/mbr_kbest.cc
+++ b/mteval/mbr_kbest.cc
@@ -32,7 +32,7 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
 }
 
 struct LossComparer {
-  bool operator()(const pair<vector<WordID>, double>& a, const pair<vector<WordID>, double>& b) const {
+  bool operator()(const pair<vector<WordID>, prob_t>& a, const pair<vector<WordID>, prob_t>& b) const {
     return a.second < b.second;
   }
 };
@@ -108,7 +108,7 @@ int main(int argc, char** argv) {
           ScoreP s = scorer->ScoreCandidate(list[j].first);
           double loss = 1.0 - s->ComputeScore();
           if (type == TER || type == AER) loss = 1.0 - loss;
-          double weighted_loss = loss * (joints[j] / marginal);
+          double weighted_loss = loss * (joints[j] / marginal).as_float();
           wl_acc += weighted_loss;
           if ((!output_list) && wl_acc > mbr_loss) break;
         }
diff --git a/phrasinator/ccrp_nt.h b/phrasinator/ccrp_nt.h
index 163b643a..811bce73 100644
--- a/phrasinator/ccrp_nt.h
+++ b/phrasinator/ccrp_nt.h
@@ -50,15 +50,26 @@ class CCRP_NoTable {
     return it->second;
   }
 
-  void increment(const Dish& dish) {
-    ++custs_[dish];
+  int increment(const Dish& dish) {
+    int table_diff = 0;
+    if (++custs_[dish] == 1)
+      table_diff = 1;
     ++num_customers_;
+    return table_diff;
   }
 
-  void decrement(const Dish& dish) {
-    if ((--custs_[dish]) == 0)
+  int decrement(const Dish& dish) {
+    int table_diff = 0;
+    int nc = --custs_[dish];
+    if (nc == 0) {
       custs_.erase(dish);
+      table_diff = -1;
+    } else if (nc < 0) {
+      std::cerr << "Dish counts dropped below zero for: " << dish << std::endl;
+      abort();
+    }
     --num_customers_;
+    return table_diff;
   }
 
   double prob(const Dish& dish, const double& p0) const {
@@ -66,6 +77,11 @@ class CCRP_NoTable {
     return (at_table + p0 * concentration_) / (num_customers_ + concentration_);
   }
 
+  double logprob(const Dish& dish, const double& logp0) const {
+    const unsigned at_table = num_customers(dish);
+    return log(at_table + exp(logp0 + log(concentration_))) - log(num_customers_ + concentration_);
+  }
+
   double log_crp_prob() const {
     return log_crp_prob(concentration_);
   }
diff --git a/training/mpi_batch_optimize.cc b/training/mpi_batch_optimize.cc
index 0ba8c530..046e921c 100644
--- a/training/mpi_batch_optimize.cc
+++ b/training/mpi_batch_optimize.cc
@@ -92,7 +92,7 @@ struct TrainingObserver : public DecoderObserver {
   void SetLocalGradientAndObjective(vector<double>* g, double* o) const {
     *o = acc_obj;
     for (SparseVector<prob_t>::const_iterator it = acc_grad.begin(); it != acc_grad.end(); ++it)
-      (*g)[it->first] = it->second;
+      (*g)[it->first] = it->second.as_float();
   }
 
   virtual void NotifyDecodingStart(const SentenceMetadata& smeta) {
diff --git a/training/mpi_compute_cllh.cc b/training/mpi_compute_cllh.cc
index b496d196..d5caa745 100644
--- a/training/mpi_compute_cllh.cc
+++ b/training/mpi_compute_cllh.cc
@@ -1,6 +1,4 @@
-#include <sstream>
 #include <iostream>
-#include <fstream>
 #include <vector>
 #include <cassert>
 #include <cmath>
@@ -12,6 +10,7 @@
 #include <boost/program_options.hpp>
 #include <boost/program_options/variables_map.hpp>
 
+#include "sentence_metadata.h"
 #include "verbose.h"
 #include "hg.h"
 #include "prob.h"
@@ -52,7 +51,8 @@ bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
   return true;
 }
 
-void ReadTrainingCorpus(const string& fname, int rank, int size, vector<string>* c, vector<int>* ids) {
+void ReadInstances(const string& fname, int rank, int size, vector<string>* c) {
+  assert(fname != "-");
   ReadFile rf(fname);
   istream& in = *rf.stream();
   string line;
@@ -60,20 +60,16 @@ void ReadTrainingCorpus(const string& fname, int rank, int size, vector<string>*
   while(in) {
     getline(in, line);
     if (!in) break;
-    if (lc % size == rank) {
-      c->push_back(line);
-      ids->push_back(lc);
-    }
+    if (lc % size == rank) c->push_back(line);
     ++lc;
   }
 }
 
 static const double kMINUS_EPSILON = -1e-6;
 
-struct TrainingObserver : public DecoderObserver {
-  void Reset() {
-    acc_obj = 0;
-  } 
+struct ConditionalLikelihoodObserver : public DecoderObserver {
+
+  ConditionalLikelihoodObserver() : trg_words(), acc_obj(), cur_obj() {}
 
   virtual void NotifyDecodingStart(const SentenceMetadata&) {
     cur_obj = 0;
@@ -120,8 +116,10 @@ struct TrainingObserver : public DecoderObserver {
     }
     assert(!isnan(log_ref_z));
     acc_obj += (cur_obj - log_ref_z);
+    trg_words += smeta.GetReference().size();
   }
 
+  unsigned trg_words;
   double acc_obj;
   double cur_obj;
   int state;
@@ -161,35 +159,32 @@ int main(int argc, char** argv) {
   if (conf.count("weights"))
     Weights::InitFromFile(conf["weights"].as<string>(), &weights);
 
-  // freeze feature set
-  //const bool freeze_feature_set = conf.count("freeze_feature_set");
-  //if (freeze_feature_set) FD::Freeze();
-
-  vector<string> corpus; vector<int> ids;
-  ReadTrainingCorpus(conf["training_data"].as<string>(), rank, size, &corpus, &ids);
+  vector<string> corpus;
+  ReadInstances(conf["training_data"].as<string>(), rank, size, &corpus);
   assert(corpus.size() > 0);
-  assert(corpus.size() == ids.size());
-
-  TrainingObserver observer;
-  double objective = 0;
 
-  observer.Reset();
   if (rank == 0)
-    cerr << "Each processor is decoding " << corpus.size() << " training examples...\n";
+    cerr << "Each processor is decoding ~" << corpus.size() << " training examples...\n";
 
-  for (int i = 0; i < corpus.size(); ++i) {
-    decoder.SetId(ids[i]);
+  ConditionalLikelihoodObserver observer;
+  for (int i = 0; i < corpus.size(); ++i)
     decoder.Decode(corpus[i], &observer);
-  }
 
+  double objective = 0;
+  unsigned total_words = 0;
 #ifdef HAVE_MPI
   reduce(world, observer.acc_obj, objective, std::plus<double>(), 0);
+  reduce(world, observer.trg_words, total_words, std::plus<unsigned>(), 0);
 #else
   objective = observer.acc_obj;
 #endif
 
-  if (rank == 0)
-    cout << "OBJECTIVE: " << objective << endl;
+  if (rank == 0) {
+    cout << "CONDITIONAL LOG_e LIKELIHOOD: " << objective << endl;
+    cout << "CONDITIONAL LOG_2 LIKELIHOOD: " << (objective/log(2)) << endl;
+    cout << "         CONDITIONAL ENTROPY: " << (objective/log(2) / total_words) << endl;
+    cout << "                  PERPLEXITY: " << pow(2, (objective/log(2) / total_words)) << endl;
+  }
 
   return 0;
 }
diff --git a/training/mpi_online_optimize.cc b/training/mpi_online_optimize.cc
index 2ef4a2e7..f87b7274 100644
--- a/training/mpi_online_optimize.cc
+++ b/training/mpi_online_optimize.cc
@@ -94,7 +94,7 @@ struct TrainingObserver : public DecoderObserver {
   void SetLocalGradientAndObjective(vector<double>* g, double* o) const {
     *o = acc_obj;
     for (SparseVector<prob_t>::const_iterator it = acc_grad.begin(); it != acc_grad.end(); ++it)
-      (*g)[it->first] = it->second;
+      (*g)[it->first] = it->second.as_float();
   }
 
   virtual void NotifyDecodingStart(const SentenceMetadata& smeta) {
@@ -158,7 +158,7 @@ struct TrainingObserver : public DecoderObserver {
   void GetGradient(SparseVector<double>* g) const {
     g->clear();
     for (SparseVector<prob_t>::const_iterator it = acc_grad.begin(); it != acc_grad.end(); ++it)
-      g->set_value(it->first, it->second);
+      g->set_value(it->first, it->second.as_float());
   }
 
   int total_complete;
diff --git a/utils/logval.h b/utils/logval.h
index 6fdc2c42..8a59d0b1 100644
--- a/utils/logval.h
+++ b/utils/logval.h
@@ -25,12 +25,13 @@ class LogVal {
   typedef LogVal<T> Self;
 
   LogVal() : s_(), v_(LOGVAL_LOG0) {}
-  explicit LogVal(double x) : s_(std::signbit(x)), v_(s_ ? std::log(-x) : std::log(x)) {}
+  LogVal(double x) : s_(std::signbit(x)), v_(s_ ? std::log(-x) : std::log(x)) {}
+  const Self& operator=(double x) { s_ = std::signbit(x); v_ = s_ ? std::log(-x) : std::log(x); return *this; }
   LogVal(init_minus_1) : s_(true),v_(0) {  }
   LogVal(init_1) : s_(),v_(0) {  }
   LogVal(init_0) : s_(),v_(LOGVAL_LOG0) {  }
-  LogVal(int x) : s_(x<0), v_(s_ ? std::log(-x) : std::log(x)) {}
-  LogVal(unsigned x) : s_(0), v_(std::log(x)) { }
+  explicit LogVal(int x) : s_(x<0), v_(s_ ? std::log(-x) : std::log(x)) {}
+  explicit LogVal(unsigned x) : s_(0), v_(std::log(x)) { }
   LogVal(double lnx,bool sign) : s_(sign),v_(lnx) {}
   LogVal(double lnx,init_lnx) : s_(),v_(lnx) {}
   static Self exp(T lnx) { return Self(lnx,false); }
@@ -141,9 +142,6 @@ class LogVal {
     return pow(1/root);
   }
 
-  operator T() const {
-    if (s_) return -std::exp(v_); else return std::exp(v_);
-  }
   T as_float() const {
     if (s_) return -std::exp(v_); else return std::exp(v_);
   }
-- 
cgit v1.2.3


From ee84ab027c0be54800cac0c9bff62dd097354f6d Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@cs.cmu.edu>
Date: Wed, 12 Oct 2011 14:57:15 +0100
Subject: model lenght properly, clean up

---
 gi/pf/Makefile.am      |   2 +-
 gi/pf/corpus.cc        |  57 ++++++++++++++++++++++++
 gi/pf/corpus.h         |  19 ++++++++
 gi/pf/dpnaive.cc       |  95 +++++++++++-----------------------------
 gi/pf/monotonic_pseg.h |  88 +++++++++++++++++++++++++++++++++++++
 gi/pf/pfnaive.cc       | 116 +++++--------------------------------------------
 utils/logval_test.cc   |  14 +++---
 7 files changed, 209 insertions(+), 182 deletions(-)
 create mode 100644 gi/pf/corpus.cc
 create mode 100644 gi/pf/corpus.h
 create mode 100644 gi/pf/monotonic_pseg.h

(limited to 'gi/pf/pfnaive.cc')

diff --git a/gi/pf/Makefile.am b/gi/pf/Makefile.am
index c9764ad5..42758939 100644
--- a/gi/pf/Makefile.am
+++ b/gi/pf/Makefile.am
@@ -1,7 +1,7 @@
 bin_PROGRAMS = cbgi brat dpnaive pfbrat pfdist itg pfnaive
 
 noinst_LIBRARIES = libpf.a
-libpf_a_SOURCES = base_measures.cc reachability.cc cfg_wfst_composer.cc
+libpf_a_SOURCES = base_measures.cc reachability.cc cfg_wfst_composer.cc corpus.cc
 
 itg_SOURCES = itg.cc
 
diff --git a/gi/pf/corpus.cc b/gi/pf/corpus.cc
new file mode 100644
index 00000000..a408e7cf
--- /dev/null
+++ b/gi/pf/corpus.cc
@@ -0,0 +1,57 @@
+#include "corpus.h"
+
+#include <set>
+#include <vector>
+#include <string>
+
+#include "tdict.h"
+#include "filelib.h"
+
+using namespace std;
+
+namespace corpus {
+
+void ReadParallelCorpus(const string& filename,
+                vector<vector<WordID> >* f,
+                vector<vector<WordID> >* e,
+                set<WordID>* vocab_f,
+                set<WordID>* vocab_e) {
+  f->clear();
+  e->clear();
+  vocab_f->clear();
+  vocab_e->clear();
+  ReadFile rf(filename);
+  istream* in = rf.stream();
+  assert(*in);
+  string line;
+  const WordID kDIV = TD::Convert("|||");
+  vector<WordID> tmp;
+  while(*in) {
+    getline(*in, line);
+    if (line.empty() && !*in) break;
+    e->push_back(vector<int>());
+    f->push_back(vector<int>());
+    vector<int>& le = e->back();
+    vector<int>& lf = f->back();
+    tmp.clear();
+    TD::ConvertSentence(line, &tmp);
+    bool isf = true;
+    for (unsigned i = 0; i < tmp.size(); ++i) {
+      const int cur = tmp[i];
+      if (isf) {
+        if (kDIV == cur) { isf = false; } else {
+          lf.push_back(cur);
+          vocab_f->insert(cur);
+        }
+      } else {
+        assert(cur != kDIV);
+        le.push_back(cur);
+        vocab_e->insert(cur);
+      }
+    }
+    assert(isf == false);
+  }
+}
+
+}
+
diff --git a/gi/pf/corpus.h b/gi/pf/corpus.h
new file mode 100644
index 00000000..e7febdb7
--- /dev/null
+++ b/gi/pf/corpus.h
@@ -0,0 +1,19 @@
+#ifndef _CORPUS_H_
+#define _CORPUS_H_
+
+#include <string>
+#include <vector>
+#include <set>
+#include "wordid.h"
+
+namespace corpus {
+
+void ReadParallelCorpus(const std::string& filename,
+                std::vector<std::vector<WordID> >* f,
+                std::vector<std::vector<WordID> >* e,
+                std::set<WordID>* vocab_f,
+                std::set<WordID>* vocab_e);
+
+}
+
+#endif
diff --git a/gi/pf/dpnaive.cc b/gi/pf/dpnaive.cc
index 608f73d5..c926487b 100644
--- a/gi/pf/dpnaive.cc
+++ b/gi/pf/dpnaive.cc
@@ -7,12 +7,14 @@
 #include <boost/program_options/variables_map.hpp>
 
 #include "base_measures.h"
+#include "monotonic_pseg.h"
 #include "trule.h"
 #include "tdict.h"
 #include "filelib.h"
 #include "dict.h"
 #include "sampler.h"
 #include "ccrp_nt.h"
+#include "corpus.h"
 
 using namespace std;
 using namespace std::tr1;
@@ -52,57 +54,12 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
   }
 }
 
-void ReadParallelCorpus(const string& filename,
-                vector<vector<WordID> >* f,
-                vector<vector<int> >* e,
-                set<int>* vocab_e,
-                set<int>* vocab_f) {
-  f->clear();
-  e->clear();
-  vocab_f->clear();
-  vocab_e->clear();
-  istream* in;
-  if (filename == "-")
-    in = &cin;
-  else
-    in = new ifstream(filename.c_str());
-  assert(*in);
-  string line;
-  const WordID kDIV = TD::Convert("|||");
-  vector<WordID> tmp;
-  while(*in) {
-    getline(*in, line);
-    if (line.empty() && !*in) break;
-    e->push_back(vector<int>());
-    f->push_back(vector<int>());
-    vector<int>& le = e->back();
-    vector<int>& lf = f->back();
-    tmp.clear();
-    TD::ConvertSentence(line, &tmp);
-    bool isf = true;
-    for (unsigned i = 0; i < tmp.size(); ++i) {
-      const int cur = tmp[i];
-      if (isf) {
-        if (kDIV == cur) { isf = false; } else {
-          lf.push_back(cur);
-          vocab_f->insert(cur);
-        }
-      } else {
-        assert(cur != kDIV);
-        le.push_back(cur);
-        vocab_e->insert(cur);
-      }
-    }
-    assert(isf == false);
-  }
-  if (in != &cin) delete in;
-}
-
 shared_ptr<MT19937> prng;
 
 template <typename Base>
 struct ModelAndData {
-  explicit ModelAndData(const Base& b, const vector<vector<int> >& ce, const vector<vector<int> >& cf, const set<int>& ve, const set<int>& vf) :
+  explicit ModelAndData(MonotonicParallelSegementationModel& m, const Base& b, const vector<vector<int> >& ce, const vector<vector<int> >& cf, const set<int>& ve, const set<int>& vf) :
+     model(m),
      rng(&*prng),
      p0(b),
      baseprob(prob_t::One()),
@@ -110,14 +67,12 @@ struct ModelAndData {
      corpusf(cf),
      vocabe(ve),
      vocabf(vf),
-     rules(1,1),
      mh_samples(),
      mh_rejects(),
      kX(-TD::Convert("X")),
      derivations(corpuse.size()) {}
 
   void ResampleHyperparameters() {
-    rules.resample_hyperparameters(&*prng);
   }
 
   void InstantiateRule(const pair<short,short>& from,
@@ -139,12 +94,10 @@ struct ModelAndData {
     TRule x;
     for (int i = 1; i < d.size(); ++i) {
       InstantiateRule(d[i], d[i-1], sentf, sente, &x);
-      //cerr << "REMOVE: " << x.AsString() << endl;
-      if (rules.decrement(x)) {
-        baseprob /= p0(x);
-        //cerr << "  (REMOVED ONLY INSTANCE)\n";
-      }
+      model.DecrementRule(x);
+      model.DecrementContinue();
     }
+    model.DecrementStop();
   }
 
   void PrintDerivation(const vector<pair<short,short> >& d, const vector<int>& sentf, const vector<int>& sente) {
@@ -161,39 +114,38 @@ struct ModelAndData {
     TRule x;
     for (int i = 1; i < d.size(); ++i) {
       InstantiateRule(d[i], d[i-1], sentf, sente, &x);
-      if (rules.increment(x)) {
-        baseprob *= p0(x);
-      }
+      model.IncrementRule(x);
+      model.IncrementContinue();
     }
+    model.IncrementStop();
   }
 
   prob_t Likelihood() const {
-    prob_t p;
-    p.logeq(rules.log_crp_prob());
-    return p * baseprob;
+    return model.Likelihood();
   }
 
   prob_t DerivationProposalProbability(const vector<pair<short,short> >& d, const vector<int>& sentf, const vector<int>& sente) const {
-    prob_t p = prob_t::One();
+    prob_t p = model.StopProbability();
     if (d.size() < 2) return p;
     TRule x;
+    const prob_t p_cont = model.ContinueProbability();
     for (int i = 1; i < d.size(); ++i) {
       InstantiateRule(d[i], d[i-1], sentf, sente, &x);
-      prob_t rp; rp.logeq(rules.logprob(x, log(p0(x))));
-      p *= rp;
+      p *= p_cont;
+      p *= model.RuleProbability(x);
     }
     return p;
   }
 
   void Sample();
 
+  MonotonicParallelSegementationModel& model;
   MT19937* rng;
   const Base& p0;
   prob_t baseprob; // cached value of generating the table table labels from p0
                    // this can't be used if we go to a hierarchical prior!
   const vector<vector<int> >& corpuse, corpusf;
   const set<int>& vocabe, vocabf;
-  CCRP_NoTable<TRule> rules;
   unsigned mh_samples, mh_rejects;
   const int kX;
   vector<vector<pair<short, short> > > derivations;
@@ -201,8 +153,8 @@ struct ModelAndData {
 
 template <typename Base>
 void ModelAndData<Base>::Sample() {
-  unsigned MAXK = 4;
-  unsigned MAXL = 4;
+  unsigned MAXK = kMAX_SRC_PHRASE;
+  unsigned MAXL = kMAX_TRG_PHRASE;
   TRule x;
   x.lhs_ = -TD::Convert("X");
   for (int samples = 0; samples < 1000; ++samples) {
@@ -228,6 +180,8 @@ void ModelAndData<Base>::Sample() {
       boost::multi_array<prob_t, 2> a(boost::extents[sentf.size() + 1][sente.size() + 1]);
       boost::multi_array<prob_t, 4> trans(boost::extents[sentf.size() + 1][sente.size() + 1][MAXK][MAXL]);
       a[0][0] = prob_t::One();
+      const prob_t q_stop = model.StopProbability();
+      const prob_t q_cont = model.ContinueProbability();
       for (int i = 0; i < sentf.size(); ++i) {
         for (int j = 0; j < sente.size(); ++j) {
           const prob_t src_a = a[i][j];
@@ -239,7 +193,9 @@ void ModelAndData<Base>::Sample() {
             for (int l = 1; l <= MAXL; ++l) {
               if (j + l > sente.size()) break;
               x.e_.push_back(sente[j + l - 1]);
-              trans[i][j][k - 1][l - 1].logeq(rules.logprob(x, log(p0(x))));
+              const bool stop_now = ((j + l) == sente.size()) && ((i + k) == sentf.size());
+              const prob_t& cp = stop_now ? q_stop : q_cont;
+              trans[i][j][k - 1][l - 1] = model.RuleProbability(x) * cp;
               a[i + k][j + l] += src_a * trans[i][j][k - 1][l - 1];
             }
           }
@@ -319,7 +275,7 @@ int main(int argc, char** argv) {
 
   vector<vector<int> > corpuse, corpusf;
   set<int> vocabe, vocabf;
-  ReadParallelCorpus(conf["input"].as<string>(), &corpusf, &corpuse, &vocabf, &vocabe);
+  corpus::ReadParallelCorpus(conf["input"].as<string>(), &corpusf, &corpuse, &vocabf, &vocabe);
   cerr << "f-Corpus size: " << corpusf.size() << " sentences\n";
   cerr << "f-Vocabulary size: " << vocabf.size() << " types\n";
   cerr << "f-Corpus size: " << corpuse.size() << " sentences\n";
@@ -328,8 +284,9 @@ int main(int argc, char** argv) {
 
   Model1 m1(conf["model1"].as<string>());
   PhraseJointBase lp0(m1, conf["model1_interpolation_weight"].as<double>(), vocabe.size(), vocabf.size());
+  MonotonicParallelSegementationModel m(lp0);
 
-  ModelAndData<PhraseJointBase> posterior(lp0, corpuse, corpusf, vocabe, vocabf);
+  ModelAndData<PhraseJointBase> posterior(m, lp0, corpuse, corpusf, vocabe, vocabf);
   posterior.Sample();
 
   return 0;
diff --git a/gi/pf/monotonic_pseg.h b/gi/pf/monotonic_pseg.h
new file mode 100644
index 00000000..7e6af3fc
--- /dev/null
+++ b/gi/pf/monotonic_pseg.h
@@ -0,0 +1,88 @@
+#ifndef _MONOTONIC_PSEG_H_
+#define _MONOTONIC_PSEG_H_
+
+#include <vector>
+
+#include "prob.h"
+#include "ccrp_nt.h"
+#include "trule.h"
+#include "base_measures.h"
+
+struct MonotonicParallelSegementationModel {
+  explicit MonotonicParallelSegementationModel(PhraseJointBase& rcp0) :
+    rp0(rcp0), base(prob_t::One()), rules(1,1), stop(1.0) {}
+
+  void DecrementRule(const TRule& rule) {
+    if (rules.decrement(rule))
+      base /= rp0(rule);
+  }
+
+  void IncrementRule(const TRule& rule) {
+    if (rules.increment(rule))
+      base *= rp0(rule);
+  }
+
+  void IncrementRulesAndStops(const std::vector<TRulePtr>& rules) {
+    for (int i = 0; i < rules.size(); ++i)
+      IncrementRule(*rules[i]);
+    if (rules.size()) IncrementContinue(rules.size() - 1);
+    IncrementStop();
+  }
+
+  void DecrementRulesAndStops(const std::vector<TRulePtr>& rules) {
+    for (int i = 0; i < rules.size(); ++i)
+      DecrementRule(*rules[i]);
+    if (rules.size()) {
+      DecrementContinue(rules.size() - 1);
+      DecrementStop();
+    }
+  }
+
+  prob_t RuleProbability(const TRule& rule) const {
+    prob_t p; p.logeq(rules.logprob(rule, log(rp0(rule))));
+    return p;
+  }
+
+  prob_t Likelihood() const {
+    prob_t p = base;
+    prob_t q; q.logeq(rules.log_crp_prob());
+    p *= q;
+    q.logeq(stop.log_crp_prob());
+    p *= q;
+    return p;
+  }
+
+  void IncrementStop() {
+    stop.increment(true);
+  }
+
+  void IncrementContinue(int n = 1) {
+    for (int i = 0; i < n; ++i)
+      stop.increment(false);
+  }
+
+  void DecrementStop() {
+    stop.decrement(true);
+  }
+
+  void DecrementContinue(int n = 1) {
+    for (int i = 0; i < n; ++i)
+      stop.decrement(false);
+  }
+
+  prob_t StopProbability() const {
+    return prob_t(stop.prob(true, 0.5));
+  }
+
+  prob_t ContinueProbability() const {
+    return prob_t(stop.prob(false, 0.5));
+  }
+
+  const PhraseJointBase& rp0;
+  prob_t base;
+  CCRP_NoTable<TRule> rules;
+  CCRP_NoTable<bool> stop;
+};
+
+#endif
+
diff --git a/gi/pf/pfnaive.cc b/gi/pf/pfnaive.cc
index c30e7c4f..33dc08c3 100644
--- a/gi/pf/pfnaive.cc
+++ b/gi/pf/pfnaive.cc
@@ -7,6 +7,7 @@
 #include <boost/program_options/variables_map.hpp>
 
 #include "base_measures.h"
+#include "monotonic_pseg.h"
 #include "reachability.h"
 #include "viterbi.h"
 #include "hg.h"
@@ -17,6 +18,7 @@
 #include "sampler.h"
 #include "ccrp_nt.h"
 #include "ccrp_onetable.h"
+#include "corpus.h"
 
 using namespace std;
 using namespace tr1;
@@ -58,101 +60,6 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
   }
 }
 
-void ReadParallelCorpus(const string& filename,
-                vector<vector<WordID> >* f,
-                vector<vector<WordID> >* e,
-                set<WordID>* vocab_f,
-                set<WordID>* vocab_e) {
-  f->clear();
-  e->clear();
-  vocab_f->clear();
-  vocab_e->clear();
-  istream* in;
-  if (filename == "-")
-    in = &cin;
-  else
-    in = new ifstream(filename.c_str());
-  assert(*in);
-  string line;
-  const WordID kDIV = TD::Convert("|||");
-  vector<WordID> tmp;
-  while(*in) {
-    getline(*in, line);
-    if (line.empty() && !*in) break;
-    e->push_back(vector<int>());
-    f->push_back(vector<int>());
-    vector<int>& le = e->back();
-    vector<int>& lf = f->back();
-    tmp.clear();
-    TD::ConvertSentence(line, &tmp);
-    bool isf = true;
-    for (unsigned i = 0; i < tmp.size(); ++i) {
-      const int cur = tmp[i];
-      if (isf) {
-        if (kDIV == cur) { isf = false; } else {
-          lf.push_back(cur);
-          vocab_f->insert(cur);
-        }
-      } else {
-        assert(cur != kDIV);
-        le.push_back(cur);
-        vocab_e->insert(cur);
-      }
-    }
-    assert(isf == false);
-  }
-  if (in != &cin) delete in;
-}
-
-struct MyJointModel {
-  MyJointModel(PhraseJointBase& rcp0) :
-    rp0(rcp0), base(prob_t::One()), rules(1,1) {}
-
-  void DecrementRule(const TRule& rule) {
-    if (rules.decrement(rule))
-      base /= rp0(rule);
-  }
-
-  void IncrementRule(const TRule& rule) {
-    if (rules.increment(rule))
-      base *= rp0(rule);
-  }
-
-  void IncrementRules(const vector<TRulePtr>& rules) {
-    for (int i = 0; i < rules.size(); ++i)
-      IncrementRule(*rules[i]);
-  }
-
-  void DecrementRules(const vector<TRulePtr>& rules) {
-    for (int i = 0; i < rules.size(); ++i)
-      DecrementRule(*rules[i]);
-  }
-
-  prob_t RuleProbability(const TRule& rule) const {
-    prob_t p; p.logeq(rules.logprob(rule, log(rp0(rule))));
-    return p;
-  }
-
-  prob_t Likelihood() const {
-    prob_t p = base;
-    prob_t q; q.logeq(rules.log_crp_prob());
-    p *= q;
-    for (unsigned l = 1; l < src_jumps.size(); ++l) {
-      if (src_jumps[l].num_customers() > 0) {
-        prob_t q;
-        q.logeq(src_jumps[l].log_crp_prob());
-        p *= q;
-      }
-    }
-    return p;
-  }
-
-  const PhraseJointBase& rp0;
-  prob_t base;
-  CCRP_NoTable<TRule> rules;
-  vector<CCRP_NoTable<int> > src_jumps;
-};
-
 struct BackwardEstimateSym {
   BackwardEstimateSym(const Model1& m1,
                       const Model1& invm1, const vector<WordID>& src, const vector<WordID>& trg) :
@@ -264,7 +171,7 @@ int main(int argc, char** argv) {
   vector<vector<WordID> > corpuse, corpusf;
   set<WordID> vocabe, vocabf;
   cerr << "Reading corpus...\n";
-  ReadParallelCorpus(conf["input"].as<string>(), &corpusf, &corpuse, &vocabf, &vocabe);
+  corpus::ReadParallelCorpus(conf["input"].as<string>(), &corpusf, &corpuse, &vocabf, &vocabe);
   cerr << "F-corpus size: " << corpusf.size() << " sentences\t (" << vocabf.size() << " word types)\n";
   cerr << "E-corpus size: " << corpuse.size() << " sentences\t (" << vocabe.size() << " word types)\n";
   assert(corpusf.size() == corpuse.size());
@@ -273,13 +180,8 @@ int main(int argc, char** argv) {
   Model1 m1(conf["model1"].as<string>());
   Model1 invm1(conf["inverse_model1"].as<string>());
 
-#if 0
-  PhraseConditionalBase lp0(m1, conf["model1_interpolation_weight"].as<double>(), vocabe.size());
-  MyConditionalModel m(lp0);
-#else
   PhraseJointBase lp0(m1, conf["model1_interpolation_weight"].as<double>(), vocabe.size(), vocabf.size());
-  MyJointModel m(lp0);
-#endif
+  MonotonicParallelSegementationModel m(lp0);
 
   cerr << "Initializing reachability limits...\n";
   vector<Particle> ps(corpusf.size());
@@ -296,7 +198,10 @@ int main(int argc, char** argv) {
     for (int ci = 0; ci < corpusf.size(); ++ci) {
       vector<int>& src = corpusf[ci];
       vector<int>& trg = corpuse[ci];
-      m.DecrementRules(ps[ci].rules);
+      m.DecrementRulesAndStops(ps[ci].rules);
+      const prob_t q_stop = m.StopProbability();
+      const prob_t q_cont = m.ContinueProbability();
+      cerr << "P(stop)=" << q_stop << "\tP(continue)=" <<q_cont << endl;
 
       BackwardEstimateSym be(m1, invm1, src, trg);
       const Reachability& r = reaches[ci];
@@ -336,7 +241,8 @@ int main(int argc, char** argv) {
                   x.f_.push_back(src[i + j]);
                 np.src_cov += x.f_.size();
                 np.trg_cov += x.e_.size();
-                prob_t rp = m.RuleProbability(x);
+                const bool stop_now = (np.src_cov == src_len && np.trg_cov == trg_len);
+                prob_t rp = m.RuleProbability(x) * (stop_now ? q_stop : q_cont);
                 np.gamma_last = rp;
                 const prob_t u = pow(np.gamma_last * pow(be(np.src_cov, np.trg_cov), 1.2), 0.1);
                 //cerr << "**rule=" << x << endl;
@@ -363,7 +269,7 @@ int main(int argc, char** argv) {
         pfss.add(lps[i].weight);
       const int sampled = rng.SelectSample(pfss);
       ps[ci] = lps[sampled];
-      m.IncrementRules(lps[sampled].rules);
+      m.IncrementRulesAndStops(lps[sampled].rules);
       for (int i = 0; i < lps[sampled].rules.size(); ++i) { cerr << "S:\t" << lps[sampled].rules[i]->AsString() << "\n"; }
       cerr << "tmp-LLH: " << log(m.Likelihood()) << endl;
     }
diff --git a/utils/logval_test.cc b/utils/logval_test.cc
index 4aa452f2..6133f5ce 100644
--- a/utils/logval_test.cc
+++ b/utils/logval_test.cc
@@ -30,13 +30,13 @@ TEST_F(LogValTest,Negate) {
   LogVal<double> x(-2.4);
   LogVal<double> y(2.4);
   y.negate();
-  EXPECT_FLOAT_EQ(x,y);
+  EXPECT_FLOAT_EQ(x.as_float(),y.as_float());
 }
 
 TEST_F(LogValTest,Inverse) {
   LogVal<double> x(1/2.4);
   LogVal<double> y(2.4);
-  EXPECT_FLOAT_EQ(x,y.inverse());
+  EXPECT_FLOAT_EQ(x.as_float(),y.inverse().as_float());
 }
 
 TEST_F(LogValTest,Minus) {
@@ -45,9 +45,9 @@ TEST_F(LogValTest,Minus) {
   LogVal<double> z1 = x - y;
   LogVal<double> z2 = x;
   z2 -= y;
-  EXPECT_FLOAT_EQ(z1, z2);
-  EXPECT_FLOAT_EQ(z1, 10.0);
-  EXPECT_FLOAT_EQ(y - x, -10.0);
+  EXPECT_FLOAT_EQ(z1.as_float(), z2.as_float());
+  EXPECT_FLOAT_EQ(z1.as_float(), 10.0);
+  EXPECT_FLOAT_EQ((y - x).as_float(), -10.0);
 }
 
 TEST_F(LogValTest,TestOps) {
@@ -62,8 +62,8 @@ TEST_F(LogValTest,TestOps) {
   LogVal<double> bb(-0.3);
   cerr << (aa + bb) << endl;
   cerr << (bb + aa) << endl;
-  EXPECT_FLOAT_EQ((aa + bb), (bb + aa));
-  EXPECT_FLOAT_EQ((aa + bb), -0.1);
+  EXPECT_FLOAT_EQ((aa + bb).as_float(), (bb + aa).as_float());
+  EXPECT_FLOAT_EQ((aa + bb).as_float(), -0.1);
 }
 
 TEST_F(LogValTest,TestSizes) {
-- 
cgit v1.2.3