From 0acc92a0eecf04a2c429f6f7685bfcaa68c7ec3a Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@cs.cmu.edu>
Date: Tue, 11 Oct 2011 12:06:32 +0100
Subject: check in some experimental particle filtering code, some gitignore
 fixes

---
 gi/pf/pfdist.cc | 621 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 621 insertions(+)
 create mode 100644 gi/pf/pfdist.cc

(limited to 'gi/pf/pfdist.cc')
diff --git a/gi/pf/pfdist.cc b/gi/pf/pfdist.cc
new file mode 100644
index 00000000..18dfd03b
--- /dev/null
+++ b/gi/pf/pfdist.cc
@@ -0,0 +1,621 @@
+#include <iostream>
+#include <tr1/memory>
+#include <queue>
+
+#include <boost/functional.hpp>
+#include <boost/program_options.hpp>
+#include <boost/program_options/variables_map.hpp>
+
+#include "base_measures.h"
+#include "reachability.h"
+#include "viterbi.h"
+#include "hg.h"
+#include "trule.h"
+#include "tdict.h"
+#include "filelib.h"
+#include "dict.h"
+#include "sampler.h"
+#include "ccrp_nt.h"
+#include "ccrp_onetable.h"
+
+using namespace std;
+using namespace tr1;
+namespace po = boost::program_options;
+
+shared_ptr<MT19937> prng;
+
+size_t hash_value(const TRule& r) {
+  size_t h = boost::hash_value(r.e_);
+  boost::hash_combine(h, -r.lhs_);
+  boost::hash_combine(h, boost::hash_value(r.f_));
+  return h;
+}
+
+bool operator==(const TRule& a, const TRule& b) {
+  return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_);
+}
+
+void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
+  po::options_description opts("Configuration options");
+  opts.add_options()
+        ("samples,s",po::value<unsigned>()->default_value(1000),"Number of samples")
+        ("particles,p",po::value<unsigned>()->default_value(30),"Number of particles")
+        ("filter_frequency,f",po::value<unsigned>()->default_value(5),"Number of time steps between filterings")
+        ("input,i",po::value<string>(),"Read parallel data from")
+        ("max_src_phrase",po::value<unsigned>()->default_value(5),"Maximum length of source language phrases")
+        ("max_trg_phrase",po::value<unsigned>()->default_value(5),"Maximum length of target language phrases")
+        ("model1,m",po::value<string>(),"Model 1 parameters (used in base distribution)")
+        ("inverse_model1,M",po::value<string>(),"Inverse Model 1 parameters (used in backward estimate)")
+        ("model1_interpolation_weight",po::value<double>()->default_value(0.95),"Mixing proportion of model 1 with uniform target distribution")
+        ("random_seed,S",po::value<uint32_t>(), "Random seed");
+  po::options_description clo("Command line options");
+  clo.add_options()
+        ("config", po::value<string>(), "Configuration file")
+        ("help,h", "Print this help message and exit");
+  po::options_description dconfig_options, dcmdline_options;
+  dconfig_options.add(opts);
+  dcmdline_options.add(opts).add(clo);
+  
+  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
+  if (conf->count("config")) {
+    ifstream config((*conf)["config"].as<string>().c_str());
+    po::store(po::parse_config_file(config, dconfig_options), *conf);
+  }
+  po::notify(*conf);
+
+  if (conf->count("help") || (conf->count("input") == 0)) {
+    cerr << dcmdline_options << endl;
+    exit(1);
+  }
+}
+
+void ReadParallelCorpus(const string& filename,
+                vector<vector<WordID> >* f,
+                vector<vector<WordID> >* e,
+                set<WordID>* vocab_f,
+                set<WordID>* vocab_e) {
+  f->clear();
+  e->clear();
+  vocab_f->clear();
+  vocab_e->clear();
+  istream* in;
+  if (filename == "-")
+    in = &cin;
+  else
+    in = new ifstream(filename.c_str());
+  assert(*in);
+  string line;
+  const WordID kDIV = TD::Convert("|||");
+  vector<WordID> tmp;
+  while(*in) {
+    getline(*in, line);
+    if (line.empty() && !*in) break;
+    e->push_back(vector<int>());
+    f->push_back(vector<int>());
+    vector<int>& le = e->back();
+    vector<int>& lf = f->back();
+    tmp.clear();
+    TD::ConvertSentence(line, &tmp);
+    bool isf = true;
+    for (unsigned i = 0; i < tmp.size(); ++i) {
+      const int cur = tmp[i];
+      if (isf) {
+        if (kDIV == cur) { isf = false; } else {
+          lf.push_back(cur);
+          vocab_f->insert(cur);
+        }
+      } else {
+        assert(cur != kDIV);
+        le.push_back(cur);
+        vocab_e->insert(cur);
+      }
+    }
+    assert(isf == false);
+  }
+  if (in != &cin) delete in;
+}
+
+#if 0
+struct MyConditionalModel {
+  MyConditionalModel(PhraseConditionalBase& rcp0) : rp0(&rcp0), base(prob_t::One()), src_phrases(1,1), src_jumps(200, CCRP_NoTable<int>(1,1)) {}
+
+  prob_t srcp0(const vector<WordID>& src) const {
+    prob_t p(1.0 / 3000.0);
+    p.poweq(src.size());
+    prob_t lenp; lenp.logeq(log_poisson(src.size(), 1.0));
+    p *= lenp;
+    return p;
+  }
+
+  void DecrementRule(const TRule& rule) {
+    const RuleCRPMap::iterator it = rules.find(rule.f_);
+    assert(it != rules.end());
+    if (it->second.decrement(rule)) {
+      base /= (*rp0)(rule);
+      if (it->second.num_customers() == 0)
+        rules.erase(it);
+    }
+    if (src_phrases.decrement(rule.f_))
+      base /= srcp0(rule.f_);
+  }
+
+  void IncrementRule(const TRule& rule) {
+    RuleCRPMap::iterator it = rules.find(rule.f_);
+    if (it == rules.end())
+      it = rules.insert(make_pair(rule.f_, CCRP_NoTable<TRule>(1,1))).first;
+    if (it->second.increment(rule)) {
+      base *= (*rp0)(rule);
+    }
+    if (src_phrases.increment(rule.f_))
+      base *= srcp0(rule.f_);
+  }
+
+  void IncrementRules(const vector<TRulePtr>& rules) {
+    for (int i = 0; i < rules.size(); ++i)
+      IncrementRule(*rules[i]);
+  }
+
+  void DecrementRules(const vector<TRulePtr>& rules) {
+    for (int i = 0; i < rules.size(); ++i)
+      DecrementRule(*rules[i]);
+  }
+
+  void IncrementJump(int dist, unsigned src_len) {
+    assert(src_len > 0);
+    if (src_jumps[src_len].increment(dist))
+      base *= jp0(dist, src_len);
+  }
+
+  void DecrementJump(int dist, unsigned src_len) {
+    assert(src_len > 0);
+    if (src_jumps[src_len].decrement(dist))
+      base /= jp0(dist, src_len);
+  }
+
+  void IncrementJumps(const vector<int>& js, unsigned src_len) {
+    for (unsigned i = 0; i < js.size(); ++i)
+      IncrementJump(js[i], src_len);
+  }
+
+  void DecrementJumps(const vector<int>& js, unsigned src_len) {
+    for (unsigned i = 0; i < js.size(); ++i)
+      DecrementJump(js[i], src_len);
+  }
+
+  // p(jump = dist | src_len , z)
+  prob_t JumpProbability(int dist, unsigned src_len) {
+    const prob_t p0 = jp0(dist, src_len);
+    const double lp = src_jumps[src_len].logprob(dist, log(p0));
+    prob_t q; q.logeq(lp);
+    return q;
+  }
+
+  // p(rule.f_ | z) * p(rule.e_ | rule.f_ , z)
+  prob_t RuleProbability(const TRule& rule) const {
+    const prob_t p0 = (*rp0)(rule);
+    prob_t srcp; srcp.logeq(src_phrases.logprob(rule.f_, log(srcp0(rule.f_))));
+    const RuleCRPMap::const_iterator it = rules.find(rule.f_);
+    if (it == rules.end()) return srcp * p0;
+    const double lp = it->second.logprob(rule, log(p0));
+    prob_t q; q.logeq(lp);
+    return q * srcp;
+  }
+
+  prob_t Likelihood() const {
+    prob_t p = base;
+    for (RuleCRPMap::const_iterator it = rules.begin();
+         it != rules.end(); ++it) {
+      prob_t cl; cl.logeq(it->second.log_crp_prob());
+      p *= cl;
+    }
+    for (unsigned l = 1; l < src_jumps.size(); ++l) {
+      if (src_jumps[l].num_customers() > 0) {
+        prob_t q;
+        q.logeq(src_jumps[l].log_crp_prob());
+        p *= q;
+      }
+    }
+    return p;
+  }
+
+  JumpBase jp0;
+  const PhraseConditionalBase* rp0;
+  prob_t base;
+  typedef unordered_map<vector<WordID>, CCRP_NoTable<TRule>, boost::hash<vector<WordID> > > RuleCRPMap;
+  RuleCRPMap rules;
+  CCRP_NoTable<vector<WordID> > src_phrases;
+  vector<CCRP_NoTable<int> > src_jumps;
+};
+
+#endif
+
+struct MyJointModel {
+  MyJointModel(PhraseJointBase& rcp0) :
+    rp0(rcp0), base(prob_t::One()), rules(1,1), src_jumps(200, CCRP_NoTable<int>(1,1)) {}
+
+  void DecrementRule(const TRule& rule) {
+    if (rules.decrement(rule))
+      base /= rp0(rule);
+  }
+
+  void IncrementRule(const TRule& rule) {
+    if (rules.increment(rule))
+      base *= rp0(rule);
+  }
+
+  void IncrementRules(const vector<TRulePtr>& rules) {
+    for (int i = 0; i < rules.size(); ++i)
+      IncrementRule(*rules[i]);
+  }
+
+  void DecrementRules(const vector<TRulePtr>& rules) {
+    for (int i = 0; i < rules.size(); ++i)
+      DecrementRule(*rules[i]);
+  }
+
+  void IncrementJump(int dist, unsigned src_len) {
+    assert(src_len > 0);
+    if (src_jumps[src_len].increment(dist))
+      base *= jp0(dist, src_len);
+  }
+
+  void DecrementJump(int dist, unsigned src_len) {
+    assert(src_len > 0);
+    if (src_jumps[src_len].decrement(dist))
+      base /= jp0(dist, src_len);
+  }
+
+  void IncrementJumps(const vector<int>& js, unsigned src_len) {
+    for (unsigned i = 0; i < js.size(); ++i)
+      IncrementJump(js[i], src_len);
+  }
+
+  void DecrementJumps(const vector<int>& js, unsigned src_len) {
+    for (unsigned i = 0; i < js.size(); ++i)
+      DecrementJump(js[i], src_len);
+  }
+
+  // p(jump = dist | src_len , z)
+  prob_t JumpProbability(int dist, unsigned src_len) {
+    const prob_t p0 = jp0(dist, src_len);
+    const double lp = src_jumps[src_len].logprob(dist, log(p0));
+    prob_t q; q.logeq(lp);
+    return q;
+  }
+
+  // p(rule.f_ | z) * p(rule.e_ | rule.f_ , z)
+  prob_t RuleProbability(const TRule& rule) const {
+    prob_t p; p.logeq(rules.logprob(rule, log(rp0(rule))));
+    return p;
+  }
+
+  prob_t Likelihood() const {
+    prob_t p = base;
+    prob_t q; q.logeq(rules.log_crp_prob());
+    p *= q;
+    for (unsigned l = 1; l < src_jumps.size(); ++l) {
+      if (src_jumps[l].num_customers() > 0) {
+        prob_t q;
+        q.logeq(src_jumps[l].log_crp_prob());
+        p *= q;
+      }
+    }
+    return p;
+  }
+
+  JumpBase jp0;
+  const PhraseJointBase& rp0;
+  prob_t base;
+  CCRP_NoTable<TRule> rules;
+  vector<CCRP_NoTable<int> > src_jumps;
+};
+
+struct BackwardEstimate {
+  BackwardEstimate(const Model1& m1, const vector<WordID>& src, const vector<WordID>& trg) :
+      model1_(m1), src_(src), trg_(trg) {
+  }
+  const prob_t& operator()(const vector<bool>& src_cov, unsigned trg_cov) const {
+    assert(src_.size() == src_cov.size());
+    assert(trg_cov <= trg_.size());
+    prob_t& e = cache_[src_cov][trg_cov];
+    if (e.is_0()) {
+      if (trg_cov == trg_.size()) { e = prob_t::One(); return e; }
+      vector<WordID> r(src_.size() + 1); r.clear();
+      r.push_back(0);  // NULL word
+      for (int i = 0; i < src_cov.size(); ++i)
+        if (!src_cov[i]) r.push_back(src_[i]);
+      const prob_t uniform_alignment(1.0 / r.size());
+      e.logeq(log_poisson(trg_.size() - trg_cov, r.size() - 1)); // p(trg len remaining | src len remaining)
+      for (unsigned j = trg_cov; j < trg_.size(); ++j) {
+        prob_t p;
+        for (unsigned i = 0; i < r.size(); ++i)
+          p += model1_(r[i], trg_[j]);
+        if (p.is_0()) {
+          cerr << "ERROR: p(" << TD::Convert(trg_[j]) << " | " << TD::GetString(r) << ") = 0!\n";
+          abort();
+        }
+        p *= uniform_alignment;
+        e *= p;
+      }
+    }
+    return e;
+  }
+  const Model1& model1_;
+  const vector<WordID>& src_;
+  const vector<WordID>& trg_;
+  mutable unordered_map<vector<bool>, map<unsigned, prob_t>, boost::hash<vector<bool> > > cache_;
+};
+
+struct BackwardEstimateSym {
+  BackwardEstimateSym(const Model1& m1,
+                      const Model1& invm1, const vector<WordID>& src, const vector<WordID>& trg) :
+      model1_(m1), invmodel1_(invm1), src_(src), trg_(trg) {
+  }
+  const prob_t& operator()(const vector<bool>& src_cov, unsigned trg_cov) const {
+    assert(src_.size() == src_cov.size());
+    assert(trg_cov <= trg_.size());
+    prob_t& e = cache_[src_cov][trg_cov];
+    if (e.is_0()) {
+      if (trg_cov == trg_.size()) { e = prob_t::One(); return e; }
+      vector<WordID> r(src_.size() + 1); r.clear();
+      for (int i = 0; i < src_cov.size(); ++i)
+        if (!src_cov[i]) r.push_back(src_[i]);
+      r.push_back(0);  // NULL word
+      const prob_t uniform_alignment(1.0 / r.size());
+      e.logeq(log_poisson(trg_.size() - trg_cov, r.size() - 1)); // p(trg len remaining | src len remaining)
+      for (unsigned j = trg_cov; j < trg_.size(); ++j) {
+        prob_t p;
+        for (unsigned i = 0; i < r.size(); ++i)
+          p += model1_(r[i], trg_[j]);
+        if (p.is_0()) {
+          cerr << "ERROR: p(" << TD::Convert(trg_[j]) << " | " << TD::GetString(r) << ") = 0!\n";
+          abort();
+        }
+        p *= uniform_alignment;
+        e *= p;
+      }
+      r.pop_back();
+      const prob_t inv_uniform(1.0 / (trg_.size() - trg_cov + 1.0));
+      prob_t inv;
+      inv.logeq(log_poisson(r.size(), trg_.size() - trg_cov));
+      for (unsigned i = 0; i < r.size(); ++i) {
+        prob_t p;
+        for (unsigned j = trg_cov - 1; j < trg_.size(); ++j)
+          p += invmodel1_(j < trg_cov ? 0 : trg_[j], r[i]);
+        if (p.is_0()) {
+          cerr << "ERROR: p_inv(" << TD::Convert(r[i]) << " | " << TD::GetString(trg_) << ") = 0!\n";
+          abort();
+        }
+        p *= inv_uniform;
+        inv *= p;
+      }
+      prob_t x = pow(e * inv, 0.5);
+      e = x;
+      //cerr << "Forward: " << log(e) << "\tBackward: " << log(inv) << "\t prop: " << log(x) << endl;
+    }
+    return e;
+  }
+  const Model1& model1_;
+  const Model1& invmodel1_;
+  const vector<WordID>& src_;
+  const vector<WordID>& trg_;
+  mutable unordered_map<vector<bool>, map<unsigned, prob_t>, boost::hash<vector<bool> > > cache_;
+};
+
+struct Particle {
+  Particle() : weight(prob_t::One()), src_cov(), trg_cov(), prev_pos(-1) {}
+  prob_t weight;
+  prob_t gamma_last;
+  vector<int> src_jumps;
+  vector<TRulePtr> rules;
+  vector<bool> src_cv;
+  int src_cov;
+  int trg_cov;
+  int prev_pos;
+};
+
+ostream& operator<<(ostream& o, const vector<bool>& v) {
+  for (int i = 0; i < v.size(); ++i)
+    o << (v[i] ? '1' : '0');
+  return o;
+}
+ostream& operator<<(ostream& o, const Particle& p) {
+  o << "[cv=" << p.src_cv << "  src_cov=" << p.src_cov << " trg_cov=" << p.trg_cov << " last_pos=" << p.prev_pos << " num_rules=" << p.rules.size() << "  w=" << log(p.weight) << ']';
+  return o;
+}
+
+void FilterCrapParticlesAndReweight(vector<Particle>* pps) {
+  vector<Particle>& ps = *pps;
+  SampleSet<prob_t> ss;
+  for (int i = 0; i < ps.size(); ++i)
+    ss.add(ps[i].weight);
+  vector<Particle> nps; nps.reserve(ps.size());
+  const prob_t uniform_weight(1.0 / ps.size());
+  for (int i = 0; i < ps.size(); ++i) {
+    nps.push_back(ps[prng->SelectSample(ss)]);
+    nps[i].weight = uniform_weight;
+  }
+  nps.swap(ps);
+}
+
+int main(int argc, char** argv) {
+  po::variables_map conf;
+  InitCommandLine(argc, argv, &conf);
+  const unsigned kMAX_TRG_PHRASE = conf["max_trg_phrase"].as<unsigned>();
+  const unsigned kMAX_SRC_PHRASE = conf["max_src_phrase"].as<unsigned>();
+  const unsigned particles = conf["particles"].as<unsigned>();
+  const unsigned samples = conf["samples"].as<unsigned>();
+  const unsigned rejuv_freq = conf["filter_frequency"].as<unsigned>();
+
+  if (!conf.count("model1")) {
+    cerr << argv[0] << "Please use --model1 to specify model 1 parameters\n";
+    return 1;
+  }
+  if (conf.count("random_seed"))
+    prng.reset(new MT19937(conf["random_seed"].as<uint32_t>()));
+  else
+    prng.reset(new MT19937);
+  MT19937& rng = *prng;
+
+  vector<vector<WordID> > corpuse, corpusf;
+  set<WordID> vocabe, vocabf;
+  cerr << "Reading corpus...\n";
+  ReadParallelCorpus(conf["input"].as<string>(), &corpusf, &corpuse, &vocabf, &vocabe);
+  cerr << "F-corpus size: " << corpusf.size() << " sentences\t (" << vocabf.size() << " word types)\n";
+  cerr << "E-corpus size: " << corpuse.size() << " sentences\t (" << vocabe.size() << " word types)\n";
+  assert(corpusf.size() == corpuse.size());
+
+  const int kLHS = -TD::Convert("X");
+  Model1 m1(conf["model1"].as<string>());
+  Model1 invm1(conf["inverse_model1"].as<string>());
+
+#if 0
+  PhraseConditionalBase lp0(m1, conf["model1_interpolation_weight"].as<double>(), vocabe.size());
+  MyConditionalModel m(lp0);
+#else
+  PhraseJointBase lp0(m1, conf["model1_interpolation_weight"].as<double>(), vocabe.size(), vocabf.size());
+  MyJointModel m(lp0);
+#endif
+
+  cerr << "Initializing reachability limits...\n";
+  vector<Particle> ps(corpusf.size());
+  vector<Reachability> reaches; reaches.reserve(corpusf.size());
+  for (int ci = 0; ci < corpusf.size(); ++ci)
+    reaches.push_back(Reachability(corpusf[ci].size(),
+                                   corpuse[ci].size(),
+                                   kMAX_SRC_PHRASE,
+                                   kMAX_TRG_PHRASE));
+  cerr << "Sampling...\n"; 
+  vector<Particle> tmp_p(10000);  // work space
+  SampleSet<prob_t> pfss;
+  for (int SS=0; SS < samples; ++SS) {
+    for (int ci = 0; ci < corpusf.size(); ++ci) {
+      vector<int>& src = corpusf[ci];
+      vector<int>& trg = corpuse[ci];
+      m.DecrementRules(ps[ci].rules);
+      m.DecrementJumps(ps[ci].src_jumps, src.size());
+
+      //BackwardEstimate be(m1, src, trg);
+      BackwardEstimateSym be(m1, invm1, src, trg);
+      const Reachability& r = reaches[ci];
+      vector<Particle> lps(particles);
+
+      for (int pi = 0; pi < particles; ++pi) {
+        Particle& p = lps[pi];
+        p.src_cv.resize(src.size(), false);
+      }
+
+      bool all_complete = false;
+      while(!all_complete) {
+        SampleSet<prob_t> ss;
+
+        // all particles have now been extended a bit, we will reweight them now
+        if (lps[0].trg_cov > 0)
+          FilterCrapParticlesAndReweight(&lps);
+
+        // loop over all particles and extend them
+        bool done_nothing = true;
+        for (int pi = 0; pi < particles; ++pi) {
+          Particle& p = lps[pi];
+          int tic = 0;
+          while(p.trg_cov < trg.size() && tic < rejuv_freq) {
+            ++tic;
+            done_nothing = false;
+            ss.clear();
+            TRule x; x.lhs_ = kLHS;
+            prob_t z;
+            int first_uncovered = src.size();
+            int last_uncovered = -1;
+            for (int i = 0; i < src.size(); ++i) {
+              const bool is_uncovered = !p.src_cv[i];
+              if (i < first_uncovered && is_uncovered) first_uncovered = i;
+              if (is_uncovered && i > last_uncovered) last_uncovered = i;
+            }
+            assert(last_uncovered > -1);
+            assert(first_uncovered < src.size());
+
+            for (int trg_len = 1; trg_len <= kMAX_TRG_PHRASE; ++trg_len) {
+              x.e_.push_back(trg[trg_len - 1 + p.trg_cov]);
+              for (int src_len = 1; src_len <= kMAX_SRC_PHRASE; ++src_len) {
+                if (!r.edges[p.src_cov][p.trg_cov][src_len][trg_len]) continue;
+
+                const int last_possible_start = last_uncovered - src_len + 1;
+                assert(last_possible_start >= 0);
+                //cerr << src_len << "," << trg_len << " is allowed. E=" << TD::GetString(x.e_) << endl;
+                //cerr << "  first_uncovered=" << first_uncovered << "  last_possible_start=" << last_possible_start << endl;
+                for (int i = first_uncovered; i <= last_possible_start; ++i) {
+                  if (p.src_cv[i]) continue;
+                  assert(ss.size() < tmp_p.size());  // if fails increase tmp_p size
+                  Particle& np = tmp_p[ss.size()];
+                  np = p;
+                  x.f_.clear();
+                  int gap_add = 0;
+                  bool bad = false;
+                  prob_t jp = prob_t::One();
+                  int prev_pos = p.prev_pos;
+                  for (int j = 0; j < src_len; ++j) {
+                    if ((j + i + gap_add) == src.size()) { bad = true; break; }
+                    while ((i+j+gap_add) < src.size() && p.src_cv[i + j + gap_add]) { ++gap_add; }
+                    if ((j + i + gap_add) == src.size()) { bad = true; break; }
+                    np.src_cv[i + j + gap_add] = true;
+                    x.f_.push_back(src[i + j + gap_add]);
+                    jp *= m.JumpProbability(i + j + gap_add - prev_pos, src.size());
+                    int jump = i + j + gap_add - prev_pos;
+                    assert(jump != 0);
+                    np.src_jumps.push_back(jump);
+                    prev_pos = i + j + gap_add;
+                  }
+                  if (bad) continue;
+                  np.prev_pos = prev_pos;
+                  np.src_cov += x.f_.size();
+                  np.trg_cov += x.e_.size();
+                  if (x.f_.size() != src_len) continue;
+                  prob_t rp = m.RuleProbability(x);
+                  np.gamma_last = rp * jp;
+                  const prob_t u = pow(np.gamma_last * be(np.src_cv, np.trg_cov), 0.2);
+                  //cerr << "**rule=" << x << endl;
+                  //cerr << "  u=" << log(u) << "  rule=" << rp << " jump=" << jp << endl;
+                  ss.add(u);
+                  np.rules.push_back(TRulePtr(new TRule(x)));
+                  z += u;
+
+                  const bool completed = (p.trg_cov == trg.size());
+                  if (completed) {
+                    int last_jump = src.size() - p.prev_pos;
+                    assert(last_jump > 0);
+                    p.src_jumps.push_back(last_jump);
+                    p.weight *= m.JumpProbability(last_jump, src.size());
+                  }
+                }
+              }
+            }
+            cerr << "number of edges to consider: " << ss.size() << endl;
+            const int sampled = rng.SelectSample(ss);
+            prob_t q_n = ss[sampled] / z;
+            p = tmp_p[sampled];
+            //m.IncrementRule(*p.rules.back());
+            p.weight *= p.gamma_last / q_n;
+            cerr << "[w=" << log(p.weight) << "]\tsampled rule: " << p.rules.back()->AsString() << endl;
+            cerr << p << endl;
+          }
+        } // loop over particles (pi = 0 .. particles)
+        if (done_nothing) all_complete = true;
+      }
+      pfss.clear();
+      for (int i = 0; i < lps.size(); ++i)
+        pfss.add(lps[i].weight);
+      const int sampled = rng.SelectSample(pfss);
+      ps[ci] = lps[sampled];
+      m.IncrementRules(lps[sampled].rules);
+      m.IncrementJumps(lps[sampled].src_jumps, src.size());
+      for (int i = 0; i < lps[sampled].rules.size(); ++i) { cerr << "S:\t" << lps[sampled].rules[i]->AsString() << "\n"; }
+      cerr << "tmp-LLH: " << log(m.Likelihood()) << endl;
+    }
+    cerr << "LLH: " << log(m.Likelihood()) << endl;
+    for (int sni = 0; sni < 5; ++sni) {
+      for (int i = 0; i < ps[sni].rules.size(); ++i) { cerr << "\t" << ps[sni].rules[i]->AsString() << endl; }
+    }
+  }
+  return 0;
+}
+
-- 
cgit v1.2.3


From 0af7d663194beddcde420349bbd91430e0b2e423 Mon Sep 17 00:00:00 2001
From: Guest_account Guest_account prguest11 <prguest11@taipan.cs>
Date: Tue, 11 Oct 2011 16:16:53 +0100
Subject: remove implicit conversion-to-double operator from LogVal<T> that
 caused overflow errors, clean up some pf code

---
 decoder/aligner.cc              |  2 +-
 decoder/cfg.cc                  |  2 +-
 decoder/cfg_format.h            |  2 +-
 decoder/decoder.cc              | 10 ++++----
 decoder/hg.cc                   |  4 ++--
 decoder/rule_lexer.l            |  2 ++
 decoder/trule.h                 | 15 +++++++++++-
 gi/pf/brat.cc                   | 11 ---------
 gi/pf/cbgi.cc                   | 10 --------
 gi/pf/dpnaive.cc                | 12 ----------
 gi/pf/itg.cc                    | 11 ---------
 gi/pf/pfbrat.cc                 | 11 ---------
 gi/pf/pfdist.cc                 | 11 ---------
 gi/pf/pfnaive.cc                | 11 ---------
 mteval/mbr_kbest.cc             |  4 ++--
 phrasinator/ccrp_nt.h           | 24 +++++++++++++++----
 training/mpi_batch_optimize.cc  |  2 +-
 training/mpi_compute_cllh.cc    | 51 +++++++++++++++++++----------------------
 training/mpi_online_optimize.cc |  4 ++--
 utils/logval.h                  | 10 ++++----
 20 files changed, 78 insertions(+), 131 deletions(-)

(limited to 'gi/pf/pfdist.cc')

diff --git a/decoder/aligner.cc b/decoder/aligner.cc
index 292ee123..53e059fb 100644
--- a/decoder/aligner.cc
+++ b/decoder/aligner.cc
@@ -165,7 +165,7 @@ inline void WriteProbGrid(const Array2D<prob_t>& m, ostream* pos) {
       if (m(i,j) == prob_t::Zero()) {
         os << "\t---X---";
       } else {
-        snprintf(b, 1024, "%0.5f", static_cast<double>(m(i,j)));
+        snprintf(b, 1024, "%0.5f", m(i,j).as_float());
         os << '\t' << b;
       }
     }
diff --git a/decoder/cfg.cc b/decoder/cfg.cc
index 651978d2..cd7e66e9 100755
--- a/decoder/cfg.cc
+++ b/decoder/cfg.cc
@@ -639,7 +639,7 @@ void CFG::Print(std::ostream &o,CFGFormat const& f) const {
     o << '['<<f.goal_nt_name <<']';
     WordID rhs=-goal_nt;
     f.print_rhs(o,*this,&rhs,&rhs+1);
-    if (pushed_inside!=1)
+    if (pushed_inside!=prob_t::One())
       f.print_features(o,pushed_inside);
     o<<'\n';
   }
diff --git a/decoder/cfg_format.h b/decoder/cfg_format.h
index c6a594b8..2f40d483 100755
--- a/decoder/cfg_format.h
+++ b/decoder/cfg_format.h
@@ -101,7 +101,7 @@ struct CFGFormat {
   }
 
   void print_features(std::ostream &o,prob_t p,FeatureVector const& fv=FeatureVector()) const {
-    bool logp=(logprob_feat && p!=1);
+    bool logp=(logprob_feat && p!=prob_t::One());
     if (features || logp) {
       o << partsep;
       if (logp)
diff --git a/decoder/decoder.cc b/decoder/decoder.cc
index c4fe3c4d..3b53fd6b 100644
--- a/decoder/decoder.cc
+++ b/decoder/decoder.cc
@@ -325,7 +325,7 @@ struct DecoderImpl {
 
   static void ConvertSV(const SparseVector<prob_t>& src, SparseVector<double>* trg) {
     for (SparseVector<prob_t>::const_iterator it = src.begin(); it != src.end(); ++it)
-      trg->set_value(it->first, it->second);
+      trg->set_value(it->first, it->second.as_float());
   }
 };
 
@@ -788,10 +788,10 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
   const bool show_tree_structure=conf.count("show_tree_structure");
   if (!SILENT) forest_stats(forest,"  Init. forest",show_tree_structure,oracle.show_derivation);
   if (conf.count("show_expected_length")) {
-    const PRPair<double, double> res =
-      Inside<PRPair<double, double>,
-             PRWeightFunction<double, EdgeProb, double, ELengthWeightFunction> >(forest);
-    cerr << "  Expected length  (words): " << res.r / res.p << "\t" << res << endl;
+    const PRPair<prob_t, prob_t> res =
+      Inside<PRPair<prob_t, prob_t>,
+             PRWeightFunction<prob_t, EdgeProb, prob_t, ELengthWeightFunction> >(forest);
+    cerr << "  Expected length  (words): " << (res.r / res.p).as_float() << "\t" << res << endl;
   }
 
   if (conf.count("show_partition")) {
diff --git a/decoder/hg.cc b/decoder/hg.cc
index 3ad17f1a..180986d7 100644
--- a/decoder/hg.cc
+++ b/decoder/hg.cc
@@ -157,14 +157,14 @@ prob_t Hypergraph::ComputeEdgePosteriors(double scale, vector<prob_t>* posts) co
   const ScaledEdgeProb weight(scale);
   const ScaledTransitionEventWeightFunction w2(scale);
   SparseVector<prob_t> pv;
-  const double inside = InsideOutside<prob_t,
+  const prob_t inside = InsideOutside<prob_t,
                   ScaledEdgeProb,
                   SparseVector<prob_t>,
                   ScaledTransitionEventWeightFunction>(*this, &pv, weight, w2);
   posts->resize(edges_.size());
   for (int i = 0; i < edges_.size(); ++i)
     (*posts)[i] = prob_t(pv.value(i));
-  return prob_t(inside);
+  return inside;
 }
 
 prob_t Hypergraph::ComputeBestPathThroughEdges(vector<prob_t>* post) const {
diff --git a/decoder/rule_lexer.l b/decoder/rule_lexer.l
index 9331d8ed..083a5bb1 100644
--- a/decoder/rule_lexer.l
+++ b/decoder/rule_lexer.l
@@ -220,6 +220,8 @@ NT [^\t \[\],]+
                   std::cerr << "Line " << lex_line << ": LHS and RHS arity mismatch!\n";
                   abort();
                 }
+		// const bool ignore_grammar_features = false;
+		// if (ignore_grammar_features) scfglex_num_feats = 0;
 		TRulePtr rp(new TRule(scfglex_lhs, scfglex_src_rhs, scfglex_src_rhs_size, scfglex_trg_rhs, scfglex_trg_rhs_size, scfglex_feat_ids, scfglex_feat_vals, scfglex_num_feats, scfglex_src_arity, scfglex_als, scfglex_num_als));
     check_and_update_ctf_stack(rp);
     TRulePtr coarse_rp = ((ctf_level == 0) ? TRulePtr() : ctf_rule_stack.top());
diff --git a/decoder/trule.h b/decoder/trule.h
index 4df4ec90..8eb2a059 100644
--- a/decoder/trule.h
+++ b/decoder/trule.h
@@ -5,7 +5,9 @@
 #include <vector>
 #include <cassert>
 #include <iostream>
-#include <boost/shared_ptr.hpp>
+
+#include "boost/shared_ptr.hpp"
+#include "boost/functional/hash.hpp"
 
 #include "sparse_vector.h"
 #include "wordid.h"
@@ -162,4 +164,15 @@ class TRule {
   bool SanityCheck() const;
 };
 
+inline size_t hash_value(const TRule& r) {
+  size_t h = boost::hash_value(r.e_);
+  boost::hash_combine(h, -r.lhs_);
+  boost::hash_combine(h, boost::hash_value(r.f_));
+  return h;
+}
+
+inline bool operator==(const TRule& a, const TRule& b) {
+  return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_);
+}
+
 #endif
diff --git a/gi/pf/brat.cc b/gi/pf/brat.cc
index 4c6ba3ef..7b60ef23 100644
--- a/gi/pf/brat.cc
+++ b/gi/pf/brat.cc
@@ -25,17 +25,6 @@ static unsigned kMAX_SRC_PHRASE;
 static unsigned kMAX_TRG_PHRASE;
 struct FSTState;
 
-size_t hash_value(const TRule& r) {
-  size_t h = 2 - r.lhs_;
-  boost::hash_combine(h, boost::hash_value(r.e_));
-  boost::hash_combine(h, boost::hash_value(r.f_));
-  return h;
-}
-
-bool operator==(const TRule& a, const TRule& b) {
-  return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_);
-}
-
 double log_poisson(unsigned x, const double& lambda) {
   assert(lambda > 0.0);
   return log(lambda) * x - lgamma(x + 1) - lambda;
diff --git a/gi/pf/cbgi.cc b/gi/pf/cbgi.cc
index 20204e8a..97f1ba34 100644
--- a/gi/pf/cbgi.cc
+++ b/gi/pf/cbgi.cc
@@ -27,16 +27,6 @@ double log_decay(unsigned x, const double& b) {
   return log(b - 1) - x * log(b);
 }
 
-size_t hash_value(const TRule& r) {
-  // TODO fix hash function
-  size_t h = boost::hash_value(r.e_) * boost::hash_value(r.f_) * r.lhs_;
-  return h;
-}
-
-bool operator==(const TRule& a, const TRule& b) {
-  return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_);
-}
-
 struct SimpleBase {
   SimpleBase(unsigned esize, unsigned fsize, unsigned ntsize = 144) :
     uniform_e(-log(esize)),
diff --git a/gi/pf/dpnaive.cc b/gi/pf/dpnaive.cc
index 582d1be7..608f73d5 100644
--- a/gi/pf/dpnaive.cc
+++ b/gi/pf/dpnaive.cc
@@ -20,18 +20,6 @@ namespace po = boost::program_options;
 
 static unsigned kMAX_SRC_PHRASE;
 static unsigned kMAX_TRG_PHRASE;
-struct FSTState;
-
-size_t hash_value(const TRule& r) {
-  size_t h = 2 - r.lhs_;
-  boost::hash_combine(h, boost::hash_value(r.e_));
-  boost::hash_combine(h, boost::hash_value(r.f_));
-  return h;
-}
-
-bool operator==(const TRule& a, const TRule& b) {
-  return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_);
-}
 
 void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
   po::options_description opts("Configuration options");
diff --git a/gi/pf/itg.cc b/gi/pf/itg.cc
index 2c2a86f9..ac3c16a3 100644
--- a/gi/pf/itg.cc
+++ b/gi/pf/itg.cc
@@ -27,17 +27,6 @@ ostream& operator<<(ostream& os, const vector<WordID>& p) {
   return os << ']';
 }
 
-size_t hash_value(const TRule& r) {
-  size_t h = boost::hash_value(r.e_);
-  boost::hash_combine(h, -r.lhs_);
-  boost::hash_combine(h, boost::hash_value(r.f_));
-  return h;
-}
-
-bool operator==(const TRule& a, const TRule& b) {
-  return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_);
-}
-
 double log_poisson(unsigned x, const double& lambda) {
   assert(lambda > 0.0);
   return log(lambda) * x - lgamma(x + 1) - lambda;
diff --git a/gi/pf/pfbrat.cc b/gi/pf/pfbrat.cc
index 4c6ba3ef..7b60ef23 100644
--- a/gi/pf/pfbrat.cc
+++ b/gi/pf/pfbrat.cc
@@ -25,17 +25,6 @@ static unsigned kMAX_SRC_PHRASE;
 static unsigned kMAX_TRG_PHRASE;
 struct FSTState;
 
-size_t hash_value(const TRule& r) {
-  size_t h = 2 - r.lhs_;
-  boost::hash_combine(h, boost::hash_value(r.e_));
-  boost::hash_combine(h, boost::hash_value(r.f_));
-  return h;
-}
-
-bool operator==(const TRule& a, const TRule& b) {
-  return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_);
-}
-
 double log_poisson(unsigned x, const double& lambda) {
   assert(lambda > 0.0);
   return log(lambda) * x - lgamma(x + 1) - lambda;
diff --git a/gi/pf/pfdist.cc b/gi/pf/pfdist.cc
index 18dfd03b..81abd61b 100644
--- a/gi/pf/pfdist.cc
+++ b/gi/pf/pfdist.cc
@@ -24,17 +24,6 @@ namespace po = boost::program_options;
 
 shared_ptr<MT19937> prng;
 
-size_t hash_value(const TRule& r) {
-  size_t h = boost::hash_value(r.e_);
-  boost::hash_combine(h, -r.lhs_);
-  boost::hash_combine(h, boost::hash_value(r.f_));
-  return h;
-}
-
-bool operator==(const TRule& a, const TRule& b) {
-  return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_);
-}
-
 void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
   po::options_description opts("Configuration options");
   opts.add_options()
diff --git a/gi/pf/pfnaive.cc b/gi/pf/pfnaive.cc
index 43c604c3..c30e7c4f 100644
--- a/gi/pf/pfnaive.cc
+++ b/gi/pf/pfnaive.cc
@@ -24,17 +24,6 @@ namespace po = boost::program_options;
 
 shared_ptr<MT19937> prng;
 
-size_t hash_value(const TRule& r) {
-  size_t h = boost::hash_value(r.e_);
-  boost::hash_combine(h, -r.lhs_);
-  boost::hash_combine(h, boost::hash_value(r.f_));
-  return h;
-}
-
-bool operator==(const TRule& a, const TRule& b) {
-  return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_);
-}
-
 void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
   po::options_description opts("Configuration options");
   opts.add_options()
diff --git a/mteval/mbr_kbest.cc b/mteval/mbr_kbest.cc
index 2867b36b..64a6a8bf 100644
--- a/mteval/mbr_kbest.cc
+++ b/mteval/mbr_kbest.cc
@@ -32,7 +32,7 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
 }
 
 struct LossComparer {
-  bool operator()(const pair<vector<WordID>, double>& a, const pair<vector<WordID>, double>& b) const {
+  bool operator()(const pair<vector<WordID>, prob_t>& a, const pair<vector<WordID>, prob_t>& b) const {
     return a.second < b.second;
   }
 };
@@ -108,7 +108,7 @@ int main(int argc, char** argv) {
           ScoreP s = scorer->ScoreCandidate(list[j].first);
           double loss = 1.0 - s->ComputeScore();
           if (type == TER || type == AER) loss = 1.0 - loss;
-          double weighted_loss = loss * (joints[j] / marginal);
+          double weighted_loss = loss * (joints[j] / marginal).as_float();
           wl_acc += weighted_loss;
           if ((!output_list) && wl_acc > mbr_loss) break;
         }
diff --git a/phrasinator/ccrp_nt.h b/phrasinator/ccrp_nt.h
index 163b643a..811bce73 100644
--- a/phrasinator/ccrp_nt.h
+++ b/phrasinator/ccrp_nt.h
@@ -50,15 +50,26 @@ class CCRP_NoTable {
     return it->second;
   }
 
-  void increment(const Dish& dish) {
-    ++custs_[dish];
+  int increment(const Dish& dish) {
+    int table_diff = 0;
+    if (++custs_[dish] == 1)
+      table_diff = 1;
     ++num_customers_;
+    return table_diff;
   }
 
-  void decrement(const Dish& dish) {
-    if ((--custs_[dish]) == 0)
+  int decrement(const Dish& dish) {
+    int table_diff = 0;
+    int nc = --custs_[dish];
+    if (nc == 0) {
       custs_.erase(dish);
+      table_diff = -1;
+    } else if (nc < 0) {
+      std::cerr << "Dish counts dropped below zero for: " << dish << std::endl;
+      abort();
+    }
     --num_customers_;
+    return table_diff;
   }
 
   double prob(const Dish& dish, const double& p0) const {
@@ -66,6 +77,11 @@ class CCRP_NoTable {
     return (at_table + p0 * concentration_) / (num_customers_ + concentration_);
   }
 
+  double logprob(const Dish& dish, const double& logp0) const {
+    const unsigned at_table = num_customers(dish);
+    return log(at_table + exp(logp0 + log(concentration_))) - log(num_customers_ + concentration_);
+  }
+
   double log_crp_prob() const {
     return log_crp_prob(concentration_);
   }
diff --git a/training/mpi_batch_optimize.cc b/training/mpi_batch_optimize.cc
index 0ba8c530..046e921c 100644
--- a/training/mpi_batch_optimize.cc
+++ b/training/mpi_batch_optimize.cc
@@ -92,7 +92,7 @@ struct TrainingObserver : public DecoderObserver {
   void SetLocalGradientAndObjective(vector<double>* g, double* o) const {
     *o = acc_obj;
     for (SparseVector<prob_t>::const_iterator it = acc_grad.begin(); it != acc_grad.end(); ++it)
-      (*g)[it->first] = it->second;
+      (*g)[it->first] = it->second.as_float();
   }
 
   virtual void NotifyDecodingStart(const SentenceMetadata& smeta) {
diff --git a/training/mpi_compute_cllh.cc b/training/mpi_compute_cllh.cc
index b496d196..d5caa745 100644
--- a/training/mpi_compute_cllh.cc
+++ b/training/mpi_compute_cllh.cc
@@ -1,6 +1,4 @@
-#include <sstream>
 #include <iostream>
-#include <fstream>
 #include <vector>
 #include <cassert>
 #include <cmath>
@@ -12,6 +10,7 @@
 #include <boost/program_options.hpp>
 #include <boost/program_options/variables_map.hpp>
 
+#include "sentence_metadata.h"
 #include "verbose.h"
 #include "hg.h"
 #include "prob.h"
@@ -52,7 +51,8 @@ bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
   return true;
 }
 
-void ReadTrainingCorpus(const string& fname, int rank, int size, vector<string>* c, vector<int>* ids) {
+void ReadInstances(const string& fname, int rank, int size, vector<string>* c) {
+  assert(fname != "-");
   ReadFile rf(fname);
   istream& in = *rf.stream();
   string line;
@@ -60,20 +60,16 @@ void ReadTrainingCorpus(const string& fname, int rank, int size, vector<string>*
   while(in) {
     getline(in, line);
     if (!in) break;
-    if (lc % size == rank) {
-      c->push_back(line);
-      ids->push_back(lc);
-    }
+    if (lc % size == rank) c->push_back(line);
     ++lc;
   }
 }
 
 static const double kMINUS_EPSILON = -1e-6;
 
-struct TrainingObserver : public DecoderObserver {
-  void Reset() {
-    acc_obj = 0;
-  } 
+struct ConditionalLikelihoodObserver : public DecoderObserver {
+
+  ConditionalLikelihoodObserver() : trg_words(), acc_obj(), cur_obj() {}
 
   virtual void NotifyDecodingStart(const SentenceMetadata&) {
     cur_obj = 0;
@@ -120,8 +116,10 @@ struct TrainingObserver : public DecoderObserver {
     }
     assert(!isnan(log_ref_z));
     acc_obj += (cur_obj - log_ref_z);
+    trg_words += smeta.GetReference().size();
   }
 
+  unsigned trg_words;
   double acc_obj;
   double cur_obj;
   int state;
@@ -161,35 +159,32 @@ int main(int argc, char** argv) {
   if (conf.count("weights"))
     Weights::InitFromFile(conf["weights"].as<string>(), &weights);
 
-  // freeze feature set
-  //const bool freeze_feature_set = conf.count("freeze_feature_set");
-  //if (freeze_feature_set) FD::Freeze();
-
-  vector<string> corpus; vector<int> ids;
-  ReadTrainingCorpus(conf["training_data"].as<string>(), rank, size, &corpus, &ids);
+  vector<string> corpus;
+  ReadInstances(conf["training_data"].as<string>(), rank, size, &corpus);
   assert(corpus.size() > 0);
-  assert(corpus.size() == ids.size());
-
-  TrainingObserver observer;
-  double objective = 0;
 
-  observer.Reset();
   if (rank == 0)
-    cerr << "Each processor is decoding " << corpus.size() << " training examples...\n";
+    cerr << "Each processor is decoding ~" << corpus.size() << " training examples...\n";
 
-  for (int i = 0; i < corpus.size(); ++i) {
-    decoder.SetId(ids[i]);
+  ConditionalLikelihoodObserver observer;
+  for (int i = 0; i < corpus.size(); ++i)
     decoder.Decode(corpus[i], &observer);
-  }
 
+  double objective = 0;
+  unsigned total_words = 0;
 #ifdef HAVE_MPI
   reduce(world, observer.acc_obj, objective, std::plus<double>(), 0);
+  reduce(world, observer.trg_words, total_words, std::plus<unsigned>(), 0);
 #else
   objective = observer.acc_obj;
 #endif
 
-  if (rank == 0)
-    cout << "OBJECTIVE: " << objective << endl;
+  if (rank == 0) {
+    cout << "CONDITIONAL LOG_e LIKELIHOOD: " << objective << endl;
+    cout << "CONDITIONAL LOG_2 LIKELIHOOD: " << (objective/log(2)) << endl;
+    cout << "         CONDITIONAL ENTROPY: " << (objective/log(2) / total_words) << endl;
+    cout << "                  PERPLEXITY: " << pow(2, (objective/log(2) / total_words)) << endl;
+  }
 
   return 0;
 }
diff --git a/training/mpi_online_optimize.cc b/training/mpi_online_optimize.cc
index 2ef4a2e7..f87b7274 100644
--- a/training/mpi_online_optimize.cc
+++ b/training/mpi_online_optimize.cc
@@ -94,7 +94,7 @@ struct TrainingObserver : public DecoderObserver {
   void SetLocalGradientAndObjective(vector<double>* g, double* o) const {
     *o = acc_obj;
     for (SparseVector<prob_t>::const_iterator it = acc_grad.begin(); it != acc_grad.end(); ++it)
-      (*g)[it->first] = it->second;
+      (*g)[it->first] = it->second.as_float();
   }
 
   virtual void NotifyDecodingStart(const SentenceMetadata& smeta) {
@@ -158,7 +158,7 @@ struct TrainingObserver : public DecoderObserver {
   void GetGradient(SparseVector<double>* g) const {
     g->clear();
     for (SparseVector<prob_t>::const_iterator it = acc_grad.begin(); it != acc_grad.end(); ++it)
-      g->set_value(it->first, it->second);
+      g->set_value(it->first, it->second.as_float());
   }
 
   int total_complete;
diff --git a/utils/logval.h b/utils/logval.h
index 6fdc2c42..8a59d0b1 100644
--- a/utils/logval.h
+++ b/utils/logval.h
@@ -25,12 +25,13 @@ class LogVal {
   typedef LogVal<T> Self;
 
   LogVal() : s_(), v_(LOGVAL_LOG0) {}
-  explicit LogVal(double x) : s_(std::signbit(x)), v_(s_ ? std::log(-x) : std::log(x)) {}
+  LogVal(double x) : s_(std::signbit(x)), v_(s_ ? std::log(-x) : std::log(x)) {}
+  const Self& operator=(double x) { s_ = std::signbit(x); v_ = s_ ? std::log(-x) : std::log(x); return *this; }
   LogVal(init_minus_1) : s_(true),v_(0) {  }
   LogVal(init_1) : s_(),v_(0) {  }
   LogVal(init_0) : s_(),v_(LOGVAL_LOG0) {  }
-  LogVal(int x) : s_(x<0), v_(s_ ? std::log(-x) : std::log(x)) {}
-  LogVal(unsigned x) : s_(0), v_(std::log(x)) { }
+  explicit LogVal(int x) : s_(x<0), v_(s_ ? std::log(-x) : std::log(x)) {}
+  explicit LogVal(unsigned x) : s_(0), v_(std::log(x)) { }
   LogVal(double lnx,bool sign) : s_(sign),v_(lnx) {}
   LogVal(double lnx,init_lnx) : s_(),v_(lnx) {}
   static Self exp(T lnx) { return Self(lnx,false); }
@@ -141,9 +142,6 @@ class LogVal {
     return pow(1/root);
   }
 
-  operator T() const {
-    if (s_) return -std::exp(v_); else return std::exp(v_);
-  }
   T as_float() const {
     if (s_) return -std::exp(v_); else return std::exp(v_);
   }
-- 
cgit v1.2.3