From 0acc92a0eecf04a2c429f6f7685bfcaa68c7ec3a Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@cs.cmu.edu>
Date: Tue, 11 Oct 2011 12:06:32 +0100
Subject: check in some experimental particle filtering code, some gitignore
 fixes

---
 gi/pf/pfbrat.cc | 554 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 554 insertions(+)
 create mode 100644 gi/pf/pfbrat.cc

(limited to 'gi/pf/pfbrat.cc')
diff --git a/gi/pf/pfbrat.cc b/gi/pf/pfbrat.cc
new file mode 100644
index 00000000..4c6ba3ef
--- /dev/null
+++ b/gi/pf/pfbrat.cc
@@ -0,0 +1,554 @@
+#include <iostream>
+#include <tr1/memory>
+#include <queue>
+
+#include <boost/functional.hpp>
+#include <boost/multi_array.hpp>
+#include <boost/program_options.hpp>
+#include <boost/program_options/variables_map.hpp>
+
+#include "viterbi.h"
+#include "hg.h"
+#include "trule.h"
+#include "tdict.h"
+#include "filelib.h"
+#include "dict.h"
+#include "sampler.h"
+#include "ccrp_nt.h"
+#include "cfg_wfst_composer.h"
+
+using namespace std;
+using namespace tr1;
+namespace po = boost::program_options;
+
+static unsigned kMAX_SRC_PHRASE;
+static unsigned kMAX_TRG_PHRASE;
+struct FSTState;
+
+size_t hash_value(const TRule& r) {
+  size_t h = 2 - r.lhs_;
+  boost::hash_combine(h, boost::hash_value(r.e_));
+  boost::hash_combine(h, boost::hash_value(r.f_));
+  return h;
+}
+
+bool operator==(const TRule& a, const TRule& b) {
+  return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_);
+}
+
+double log_poisson(unsigned x, const double& lambda) {
+  assert(lambda > 0.0);
+  return log(lambda) * x - lgamma(x + 1) - lambda;
+}
+
+struct ConditionalBase {
+  explicit ConditionalBase(const double m1mixture, const unsigned vocab_e_size, const string& model1fname) :
+      kM1MIXTURE(m1mixture),
+      kUNIFORM_MIXTURE(1.0 - m1mixture),
+      kUNIFORM_TARGET(1.0 / vocab_e_size),
+      kNULL(TD::Convert("<eps>")) {
+    assert(m1mixture >= 0.0 && m1mixture <= 1.0);
+    assert(vocab_e_size > 0);
+    LoadModel1(model1fname);
+  }
+
+  void LoadModel1(const string& fname) {
+    cerr << "Loading Model 1 parameters from " << fname << " ..." << endl;
+    ReadFile rf(fname);
+    istream& in = *rf.stream();
+    string line;
+    unsigned lc = 0;
+    while(getline(in, line)) {
+      ++lc;
+      int cur = 0;
+      int start = 0;
+      while(cur < line.size() && line[cur] != ' ') { ++cur; }
+      assert(cur != line.size());
+      line[cur] = 0;
+      const WordID src = TD::Convert(&line[0]);
+      ++cur;
+      start = cur;
+      while(cur < line.size() && line[cur] != ' ') { ++cur; }
+      assert(cur != line.size());
+      line[cur] = 0;
+      WordID trg = TD::Convert(&line[start]);
+      const double logprob = strtod(&line[cur + 1], NULL);
+      if (src >= ttable.size()) ttable.resize(src + 1);
+      ttable[src][trg].logeq(logprob);
+    }
+    cerr << "  read " << lc << " parameters.\n";
+  }
+
+  // return logp0 of rule.e_ | rule.f_
+  prob_t operator()(const TRule& rule) const {
+    const int flen = rule.f_.size();
+    const int elen = rule.e_.size();
+    prob_t uniform_src_alignment; uniform_src_alignment.logeq(-log(flen + 1));
+    prob_t p;
+    p.logeq(log_poisson(elen, flen + 0.01));       // elen | flen          ~Pois(flen + 0.01)
+    for (int i = 0; i < elen; ++i) {               // for each position i in e-RHS
+      const WordID trg = rule.e_[i];
+      prob_t tp = prob_t::Zero();
+      for (int j = -1; j < flen; ++j) {
+        const WordID src = j < 0 ? kNULL : rule.f_[j];
+        const map<WordID, prob_t>::const_iterator it = ttable[src].find(trg);
+        if (it != ttable[src].end()) {
+          tp += kM1MIXTURE * it->second;
+        }
+        tp += kUNIFORM_MIXTURE * kUNIFORM_TARGET;
+      }
+      tp *= uniform_src_alignment;                 //     draw a_i         ~uniform
+      p *= tp;                                     //     draw e_i         ~Model1(f_a_i) / uniform
+    }
+    return p;
+  }
+
+  const prob_t kM1MIXTURE;  // Model 1 mixture component
+  const prob_t kUNIFORM_MIXTURE; // uniform mixture component
+  const prob_t kUNIFORM_TARGET;
+  const WordID kNULL;
+  vector<map<WordID, prob_t> > ttable;
+};
+
+void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
+  po::options_description opts("Configuration options");
+  opts.add_options()
+        ("samples,s",po::value<unsigned>()->default_value(1000),"Number of samples")
+        ("input,i",po::value<string>(),"Read parallel data from")
+        ("max_src_phrase",po::value<unsigned>()->default_value(3),"Maximum length of source language phrases")
+        ("max_trg_phrase",po::value<unsigned>()->default_value(3),"Maximum length of target language phrases")
+        ("model1,m",po::value<string>(),"Model 1 parameters (used in base distribution)")
+        ("model1_interpolation_weight",po::value<double>()->default_value(0.95),"Mixing proportion of model 1 with uniform target distribution")
+        ("random_seed,S",po::value<uint32_t>(), "Random seed");
+  po::options_description clo("Command line options");
+  clo.add_options()
+        ("config", po::value<string>(), "Configuration file")
+        ("help,h", "Print this help message and exit");
+  po::options_description dconfig_options, dcmdline_options;
+  dconfig_options.add(opts);
+  dcmdline_options.add(opts).add(clo);
+  
+  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
+  if (conf->count("config")) {
+    ifstream config((*conf)["config"].as<string>().c_str());
+    po::store(po::parse_config_file(config, dconfig_options), *conf);
+  }
+  po::notify(*conf);
+
+  if (conf->count("help") || (conf->count("input") == 0)) {
+    cerr << dcmdline_options << endl;
+    exit(1);
+  }
+}
+
+void ReadParallelCorpus(const string& filename,
+                vector<vector<WordID> >* f,
+                vector<vector<int> >* e,
+                set<int>* vocab_f,
+                set<int>* vocab_e) {
+  f->clear();
+  e->clear();
+  vocab_f->clear();
+  vocab_e->clear();
+  istream* in;
+  if (filename == "-")
+    in = &cin;
+  else
+    in = new ifstream(filename.c_str());
+  assert(*in);
+  string line;
+  const WordID kDIV = TD::Convert("|||");
+  vector<WordID> tmp;
+  while(*in) {
+    getline(*in, line);
+    if (line.empty() && !*in) break;
+    e->push_back(vector<int>());
+    f->push_back(vector<int>());
+    vector<int>& le = e->back();
+    vector<int>& lf = f->back();
+    tmp.clear();
+    TD::ConvertSentence(line, &tmp);
+    bool isf = true;
+    for (unsigned i = 0; i < tmp.size(); ++i) {
+      const int cur = tmp[i];
+      if (isf) {
+        if (kDIV == cur) { isf = false; } else {
+          lf.push_back(cur);
+          vocab_f->insert(cur);
+        }
+      } else {
+        assert(cur != kDIV);
+        le.push_back(cur);
+        vocab_e->insert(cur);
+      }
+    }
+    assert(isf == false);
+  }
+  if (in != &cin) delete in;
+}
+
+struct UniphraseLM {
+  UniphraseLM(const vector<vector<int> >& corpus,
+              const set<int>& vocab,
+              const po::variables_map& conf) :
+    phrases_(1,1),
+    gen_(1,1),
+    corpus_(corpus),
+    uniform_word_(1.0 / vocab.size()),
+    gen_p0_(0.5),
+    p_end_(0.5),
+    use_poisson_(conf.count("poisson_length") > 0) {}
+
+  void ResampleHyperparameters(MT19937* rng) {
+    phrases_.resample_hyperparameters(rng);
+    gen_.resample_hyperparameters(rng);
+    cerr << " " << phrases_.concentration();
+  }
+
+  CCRP_NoTable<vector<int> > phrases_;
+  CCRP_NoTable<bool> gen_;
+  vector<vector<bool> > z_;   // z_[i] is there a phrase boundary after the ith word
+  const vector<vector<int> >& corpus_;
+  const double uniform_word_;
+  const double gen_p0_;
+  const double p_end_; // in base length distribution, p of the end of a phrase
+  const bool use_poisson_;
+};
+
+struct Reachability {
+  boost::multi_array<bool, 4> edges;  // edges[src_covered][trg_covered][x][trg_delta] is this edge worth exploring?
+  boost::multi_array<short, 2> max_src_delta; // msd[src_covered][trg_covered] -- the largest src delta that's valid
+
+  Reachability(int srclen, int trglen, int src_max_phrase_len, int trg_max_phrase_len) :
+      edges(boost::extents[srclen][trglen][src_max_phrase_len+1][trg_max_phrase_len+1]),
+      max_src_delta(boost::extents[srclen][trglen]) {
+    ComputeReachability(srclen, trglen, src_max_phrase_len, trg_max_phrase_len);
+  }
+
+ private:
+  struct SState {
+    SState() : prev_src_covered(), prev_trg_covered() {}
+    SState(int i, int j) : prev_src_covered(i), prev_trg_covered(j) {}
+    int prev_src_covered;
+    int prev_trg_covered;
+  };
+
+  struct NState {
+    NState() : next_src_covered(), next_trg_covered() {}
+    NState(int i, int j) : next_src_covered(i), next_trg_covered(j) {}
+    int next_src_covered;
+    int next_trg_covered;
+  };
+
+  void ComputeReachability(int srclen, int trglen, int src_max_phrase_len, int trg_max_phrase_len) {
+    typedef boost::multi_array<vector<SState>, 2> array_type;
+    array_type a(boost::extents[srclen + 1][trglen + 1]);
+    a[0][0].push_back(SState());
+    for (int i = 0; i < srclen; ++i) {
+      for (int j = 0; j < trglen; ++j) {
+        if (a[i][j].size() == 0) continue;
+        const SState prev(i,j);
+        for (int k = 1; k <= src_max_phrase_len; ++k) {
+          if ((i + k) > srclen) continue;
+          for (int l = 1; l <= trg_max_phrase_len; ++l) {
+            if ((j + l) > trglen) continue;
+            a[i + k][j + l].push_back(prev);
+          }
+        }
+      }
+    }
+    a[0][0].clear();
+    cerr << "Final cell contains " << a[srclen][trglen].size() << " back pointers\n";
+    assert(a[srclen][trglen].size() > 0);
+
+    typedef boost::multi_array<bool, 2> rarray_type;
+    rarray_type r(boost::extents[srclen + 1][trglen + 1]);
+//    typedef boost::multi_array<vector<NState>, 2> narray_type;
+//    narray_type b(boost::extents[srclen + 1][trglen + 1]);
+    r[srclen][trglen] = true;
+    for (int i = srclen; i >= 0; --i) {
+      for (int j = trglen; j >= 0; --j) {
+        vector<SState>& prevs = a[i][j];
+        if (!r[i][j]) { prevs.clear(); }
+//        const NState nstate(i,j);
+        for (int k = 0; k < prevs.size(); ++k) {
+          r[prevs[k].prev_src_covered][prevs[k].prev_trg_covered] = true;
+          int src_delta = i - prevs[k].prev_src_covered;
+          edges[prevs[k].prev_src_covered][prevs[k].prev_trg_covered][src_delta][j - prevs[k].prev_trg_covered] = true;
+          short &msd = max_src_delta[prevs[k].prev_src_covered][prevs[k].prev_trg_covered];
+          if (src_delta > msd) msd = src_delta;
+//          b[prevs[k].prev_src_covered][prevs[k].prev_trg_covered].push_back(nstate);
+        }
+      }
+    }
+    assert(!edges[0][0][1][0]);
+    assert(!edges[0][0][0][1]);
+    assert(!edges[0][0][0][0]);
+    cerr << "  MAX SRC DELTA[0][0] = " << max_src_delta[0][0] << endl;
+    assert(max_src_delta[0][0] > 0);
+    //cerr << "First cell contains " << b[0][0].size() << " forward pointers\n";
+    //for (int i = 0; i < b[0][0].size(); ++i) {
+    //  cerr << "  -> (" << b[0][0][i].next_src_covered << "," << b[0][0][i].next_trg_covered << ")\n";
+    //}
+  }
+};
+
+ostream& operator<<(ostream& os, const FSTState& q);
+struct FSTState {
+  explicit FSTState(int src_size) :
+      trg_covered_(),
+      src_covered_(),
+      src_coverage_(src_size) {}
+
+  FSTState(short trg_covered, short src_covered, const vector<bool>& src_coverage, const vector<short>& src_prefix) :
+      trg_covered_(trg_covered),
+      src_covered_(src_covered),
+      src_coverage_(src_coverage),
+      src_prefix_(src_prefix) {
+    if (src_coverage_.size() == src_covered) {
+      assert(src_prefix.size() == 0);
+    }
+  }
+
+  // if we extend by the word at src_position, what are
+  // the next states that are reachable and lie on a valid
+  // path to the final state?
+  vector<FSTState> Extensions(int src_position, int src_len, int trg_len, const Reachability& r) const {
+    assert(src_position < src_coverage_.size());
+    if (src_coverage_[src_position]) {
+      cerr << "Trying to extend " << *this << " with position " << src_position << endl;
+      abort();
+    }
+    vector<bool> ncvg = src_coverage_;
+    ncvg[src_position] = true;
+
+    vector<FSTState> res;
+    const int trg_remaining = trg_len - trg_covered_;
+    if (trg_remaining <= 0) {
+      cerr << "Target appears to have been covered: " << *this << " (trg_len=" << trg_len << ",trg_covered=" << trg_covered_ << ")" << endl;
+      abort();
+    }
+    const int src_remaining = src_len - src_covered_;
+    if (src_remaining <= 0) {
+      cerr << "Source appears to have been covered: " << *this << endl;
+      abort();
+    }
+
+    for (int tc = 1; tc <= kMAX_TRG_PHRASE; ++tc) {
+      if (r.edges[src_covered_][trg_covered_][src_prefix_.size() + 1][tc]) {
+        int nc = src_prefix_.size() + 1 + src_covered_;
+        res.push_back(FSTState(trg_covered_ + tc, nc, ncvg, vector<short>()));
+      }
+    }
+
+    if ((src_prefix_.size() + 1) < r.max_src_delta[src_covered_][trg_covered_]) {
+      vector<short> nsp = src_prefix_;
+      nsp.push_back(src_position);
+      res.push_back(FSTState(trg_covered_, src_covered_, ncvg, nsp));
+    }
+
+    if (res.size() == 0) {
+      cerr << *this << " can't be extended!\n";
+      abort();
+    }
+    return res;
+  }
+
+  short trg_covered_, src_covered_;
+  vector<bool> src_coverage_;
+  vector<short> src_prefix_;
+};
+bool operator<(const FSTState& q, const FSTState& r) {
+  if (q.trg_covered_ != r.trg_covered_) return q.trg_covered_ < r.trg_covered_;
+  if (q.src_covered_!= r.src_covered_) return q.src_covered_ < r.src_covered_;
+  if (q.src_coverage_ != r.src_coverage_) return q.src_coverage_ < r.src_coverage_;
+  return q.src_prefix_ < r.src_prefix_;
+}
+
+ostream& operator<<(ostream& os, const FSTState& q) {
+  os << "[" << q.trg_covered_ << " : ";
+  for (int i = 0; i < q.src_coverage_.size(); ++i)
+    os << q.src_coverage_[i];
+  os << " : <";
+  for (int i = 0; i < q.src_prefix_.size(); ++i) {
+    if (i != 0) os << ' ';
+    os << q.src_prefix_[i];
+  }
+  return os << ">]";
+}
+
+struct MyModel {
+  MyModel(ConditionalBase& rcp0) : rp0(rcp0) {}
+  typedef unordered_map<vector<WordID>, CCRP_NoTable<TRule>, boost::hash<vector<WordID> > > SrcToRuleCRPMap;
+
+  void DecrementRule(const TRule& rule) {
+    SrcToRuleCRPMap::iterator it = rules.find(rule.f_);
+    assert(it != rules.end());
+    it->second.decrement(rule);
+    if (it->second.num_customers() == 0) rules.erase(it);
+  }
+
+  void IncrementRule(const TRule& rule) {
+    SrcToRuleCRPMap::iterator it = rules.find(rule.f_);
+    if (it == rules.end()) {
+      CCRP_NoTable<TRule> crp(1,1);
+      it = rules.insert(make_pair(rule.f_, crp)).first;
+    }
+    it->second.increment(rule);
+  }
+
+  // conditioned on rule.f_
+  prob_t RuleConditionalProbability(const TRule& rule) const {
+    const prob_t base = rp0(rule);
+    SrcToRuleCRPMap::const_iterator it = rules.find(rule.f_);
+    if (it == rules.end()) {
+      return base;
+    } else {
+      const double lp = it->second.logprob(rule, log(base));
+      prob_t q; q.logeq(lp);
+      return q;
+    }
+  }
+
+  const ConditionalBase& rp0;
+  SrcToRuleCRPMap rules;
+};
+
+struct MyFST : public WFST {
+  MyFST(const vector<WordID>& ssrc, const vector<WordID>& strg, MyModel* m) :
+      src(ssrc), trg(strg),
+      r(src.size(),trg.size(),kMAX_SRC_PHRASE, kMAX_TRG_PHRASE),
+      model(m) {
+    FSTState in(src.size());
+    cerr << " INIT: " << in << endl;
+    init = GetNode(in);
+    for (int i = 0; i < in.src_coverage_.size(); ++i) in.src_coverage_[i] = true;
+    in.src_covered_ = src.size();
+    in.trg_covered_ = trg.size();
+    cerr << "FINAL: " << in << endl;
+    final = GetNode(in);
+  }
+  virtual const WFSTNode* Final() const;
+  virtual const WFSTNode* Initial() const;
+
+  const WFSTNode* GetNode(const FSTState& q);
+  map<FSTState, boost::shared_ptr<WFSTNode> > m;
+  const vector<WordID>& src;
+  const vector<WordID>& trg;
+  Reachability r;
+  const WFSTNode* init;
+  const WFSTNode* final;
+  MyModel* model;
+};
+
+struct MyNode : public WFSTNode {
+  MyNode(const FSTState& q, MyFST* fst) : state(q), container(fst) {}
+  virtual vector<pair<const WFSTNode*, TRulePtr> > ExtendInput(unsigned srcindex) const;
+  const FSTState state;
+  mutable MyFST* container;
+};
+
+vector<pair<const WFSTNode*, TRulePtr> > MyNode::ExtendInput(unsigned srcindex) const {
+  cerr << "EXTEND " << state << " with " << srcindex << endl;
+  vector<FSTState> ext = state.Extensions(srcindex, container->src.size(), container->trg.size(), container->r);
+  vector<pair<const WFSTNode*,TRulePtr> > res(ext.size());
+  for (unsigned i = 0; i < ext.size(); ++i) {
+    res[i].first = container->GetNode(ext[i]);
+    if (ext[i].src_prefix_.size() == 0) {
+      const unsigned trg_from = state.trg_covered_;
+      const unsigned trg_to = ext[i].trg_covered_;
+      const unsigned prev_prfx_size = state.src_prefix_.size();
+      res[i].second.reset(new TRule);
+      res[i].second->lhs_ = -TD::Convert("X");
+      vector<WordID>& src = res[i].second->f_;
+      vector<WordID>& trg = res[i].second->e_;
+      src.resize(prev_prfx_size + 1);
+      for (unsigned j = 0; j < prev_prfx_size; ++j)
+        src[j] = container->src[state.src_prefix_[j]];
+      src[prev_prfx_size] = container->src[srcindex];
+      for (unsigned j = trg_from; j < trg_to; ++j)
+        trg.push_back(container->trg[j]);
+      res[i].second->scores_.set_value(FD::Convert("Proposal"), log(container->model->RuleConditionalProbability(*res[i].second)));
+    }
+  }
+  return res;
+}
+
+const WFSTNode* MyFST::GetNode(const FSTState& q) {
+  boost::shared_ptr<WFSTNode>& res = m[q];
+  if (!res) {
+    res.reset(new MyNode(q, this));
+  }
+  return &*res;
+}
+
+const WFSTNode* MyFST::Final() const {
+  return final;
+}
+
+const WFSTNode* MyFST::Initial() const {
+  return init;
+}
+
+int main(int argc, char** argv) {
+  po::variables_map conf;
+  InitCommandLine(argc, argv, &conf);
+  kMAX_TRG_PHRASE = conf["max_trg_phrase"].as<unsigned>();
+  kMAX_SRC_PHRASE = conf["max_src_phrase"].as<unsigned>();
+
+  if (!conf.count("model1")) {
+    cerr << argv[0] << "Please use --model1 to specify model 1 parameters\n";
+    return 1;
+  }
+  shared_ptr<MT19937> prng;
+  if (conf.count("random_seed"))
+    prng.reset(new MT19937(conf["random_seed"].as<uint32_t>()));
+  else
+    prng.reset(new MT19937);
+  MT19937& rng = *prng;
+
+  vector<vector<int> > corpuse, corpusf;
+  set<int> vocabe, vocabf;
+  ReadParallelCorpus(conf["input"].as<string>(), &corpusf, &corpuse, &vocabf, &vocabe);
+  cerr << "f-Corpus size: " << corpusf.size() << " sentences\n";
+  cerr << "f-Vocabulary size: " << vocabf.size() << " types\n";
+  cerr << "f-Corpus size: " << corpuse.size() << " sentences\n";
+  cerr << "f-Vocabulary size: " << vocabe.size() << " types\n";
+  assert(corpusf.size() == corpuse.size());
+
+  ConditionalBase lp0(conf["model1_interpolation_weight"].as<double>(),
+                      vocabe.size(),
+                      conf["model1"].as<string>());
+  MyModel m(lp0);
+
+  TRule x("[X] ||| kAnwntR myN ||| at the convent ||| 0");
+  m.IncrementRule(x);
+  TRule y("[X] ||| nY dyN ||| gave ||| 0");
+  m.IncrementRule(y);
+
+
+  MyFST fst(corpusf[0], corpuse[0], &m);
+  ifstream in("./kimura.g");
+  assert(in);
+  CFG_WFSTComposer comp(fst);
+  Hypergraph hg;
+  bool succeed = comp.Compose(&in, &hg);
+  hg.PrintGraphviz();
+  if (succeed) { cerr << "SUCCESS.\n"; } else { cerr << "FAILURE REPORTED.\n"; }
+
+#if 0
+  ifstream in2("./amnabooks.g");
+  assert(in2);
+  MyFST fst2(corpusf[1], corpuse[1], &m);
+  CFG_WFSTComposer comp2(fst2);
+  Hypergraph hg2;
+  bool succeed2 = comp2.Compose(&in2, &hg2);
+  if (succeed2) { cerr << "SUCCESS.\n"; } else { cerr << "FAILURE REPORTED.\n"; }
+#endif
+
+  SparseVector<double> w; w.set_value(FD::Convert("Proposal"), 1.0);
+  hg.Reweight(w);
+  cerr << ViterbiFTree(hg) << endl;
+  return 0;
+}
+
-- 
cgit v1.2.3


From 0af7d663194beddcde420349bbd91430e0b2e423 Mon Sep 17 00:00:00 2001
From: Guest_account Guest_account prguest11 <prguest11@taipan.cs>
Date: Tue, 11 Oct 2011 16:16:53 +0100
Subject: remove implicit conversion-to-double operator from LogVal<T> that
 caused overflow errors, clean up some pf code

---
 decoder/aligner.cc              |  2 +-
 decoder/cfg.cc                  |  2 +-
 decoder/cfg_format.h            |  2 +-
 decoder/decoder.cc              | 10 ++++----
 decoder/hg.cc                   |  4 ++--
 decoder/rule_lexer.l            |  2 ++
 decoder/trule.h                 | 15 +++++++++++-
 gi/pf/brat.cc                   | 11 ---------
 gi/pf/cbgi.cc                   | 10 --------
 gi/pf/dpnaive.cc                | 12 ----------
 gi/pf/itg.cc                    | 11 ---------
 gi/pf/pfbrat.cc                 | 11 ---------
 gi/pf/pfdist.cc                 | 11 ---------
 gi/pf/pfnaive.cc                | 11 ---------
 mteval/mbr_kbest.cc             |  4 ++--
 phrasinator/ccrp_nt.h           | 24 +++++++++++++++----
 training/mpi_batch_optimize.cc  |  2 +-
 training/mpi_compute_cllh.cc    | 51 +++++++++++++++++++----------------------
 training/mpi_online_optimize.cc |  4 ++--
 utils/logval.h                  | 10 ++++----
 20 files changed, 78 insertions(+), 131 deletions(-)

(limited to 'gi/pf/pfbrat.cc')

diff --git a/decoder/aligner.cc b/decoder/aligner.cc
index 292ee123..53e059fb 100644
--- a/decoder/aligner.cc
+++ b/decoder/aligner.cc
@@ -165,7 +165,7 @@ inline void WriteProbGrid(const Array2D<prob_t>& m, ostream* pos) {
       if (m(i,j) == prob_t::Zero()) {
         os << "\t---X---";
       } else {
-        snprintf(b, 1024, "%0.5f", static_cast<double>(m(i,j)));
+        snprintf(b, 1024, "%0.5f", m(i,j).as_float());
         os << '\t' << b;
       }
     }
diff --git a/decoder/cfg.cc b/decoder/cfg.cc
index 651978d2..cd7e66e9 100755
--- a/decoder/cfg.cc
+++ b/decoder/cfg.cc
@@ -639,7 +639,7 @@ void CFG::Print(std::ostream &o,CFGFormat const& f) const {
     o << '['<<f.goal_nt_name <<']';
     WordID rhs=-goal_nt;
     f.print_rhs(o,*this,&rhs,&rhs+1);
-    if (pushed_inside!=1)
+    if (pushed_inside!=prob_t::One())
       f.print_features(o,pushed_inside);
     o<<'\n';
   }
diff --git a/decoder/cfg_format.h b/decoder/cfg_format.h
index c6a594b8..2f40d483 100755
--- a/decoder/cfg_format.h
+++ b/decoder/cfg_format.h
@@ -101,7 +101,7 @@ struct CFGFormat {
   }
 
   void print_features(std::ostream &o,prob_t p,FeatureVector const& fv=FeatureVector()) const {
-    bool logp=(logprob_feat && p!=1);
+    bool logp=(logprob_feat && p!=prob_t::One());
     if (features || logp) {
       o << partsep;
       if (logp)
diff --git a/decoder/decoder.cc b/decoder/decoder.cc
index c4fe3c4d..3b53fd6b 100644
--- a/decoder/decoder.cc
+++ b/decoder/decoder.cc
@@ -325,7 +325,7 @@ struct DecoderImpl {
 
   static void ConvertSV(const SparseVector<prob_t>& src, SparseVector<double>* trg) {
     for (SparseVector<prob_t>::const_iterator it = src.begin(); it != src.end(); ++it)
-      trg->set_value(it->first, it->second);
+      trg->set_value(it->first, it->second.as_float());
   }
 };
 
@@ -788,10 +788,10 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
   const bool show_tree_structure=conf.count("show_tree_structure");
   if (!SILENT) forest_stats(forest,"  Init. forest",show_tree_structure,oracle.show_derivation);
   if (conf.count("show_expected_length")) {
-    const PRPair<double, double> res =
-      Inside<PRPair<double, double>,
-             PRWeightFunction<double, EdgeProb, double, ELengthWeightFunction> >(forest);
-    cerr << "  Expected length  (words): " << res.r / res.p << "\t" << res << endl;
+    const PRPair<prob_t, prob_t> res =
+      Inside<PRPair<prob_t, prob_t>,
+             PRWeightFunction<prob_t, EdgeProb, prob_t, ELengthWeightFunction> >(forest);
+    cerr << "  Expected length  (words): " << (res.r / res.p).as_float() << "\t" << res << endl;
   }
 
   if (conf.count("show_partition")) {
diff --git a/decoder/hg.cc b/decoder/hg.cc
index 3ad17f1a..180986d7 100644
--- a/decoder/hg.cc
+++ b/decoder/hg.cc
@@ -157,14 +157,14 @@ prob_t Hypergraph::ComputeEdgePosteriors(double scale, vector<prob_t>* posts) co
   const ScaledEdgeProb weight(scale);
   const ScaledTransitionEventWeightFunction w2(scale);
   SparseVector<prob_t> pv;
-  const double inside = InsideOutside<prob_t,
+  const prob_t inside = InsideOutside<prob_t,
                   ScaledEdgeProb,
                   SparseVector<prob_t>,
                   ScaledTransitionEventWeightFunction>(*this, &pv, weight, w2);
   posts->resize(edges_.size());
   for (int i = 0; i < edges_.size(); ++i)
     (*posts)[i] = prob_t(pv.value(i));
-  return prob_t(inside);
+  return inside;
 }
 
 prob_t Hypergraph::ComputeBestPathThroughEdges(vector<prob_t>* post) const {
diff --git a/decoder/rule_lexer.l b/decoder/rule_lexer.l
index 9331d8ed..083a5bb1 100644
--- a/decoder/rule_lexer.l
+++ b/decoder/rule_lexer.l
@@ -220,6 +220,8 @@ NT [^\t \[\],]+
                   std::cerr << "Line " << lex_line << ": LHS and RHS arity mismatch!\n";
                   abort();
                 }
+		// const bool ignore_grammar_features = false;
+		// if (ignore_grammar_features) scfglex_num_feats = 0;
 		TRulePtr rp(new TRule(scfglex_lhs, scfglex_src_rhs, scfglex_src_rhs_size, scfglex_trg_rhs, scfglex_trg_rhs_size, scfglex_feat_ids, scfglex_feat_vals, scfglex_num_feats, scfglex_src_arity, scfglex_als, scfglex_num_als));
     check_and_update_ctf_stack(rp);
     TRulePtr coarse_rp = ((ctf_level == 0) ? TRulePtr() : ctf_rule_stack.top());
diff --git a/decoder/trule.h b/decoder/trule.h
index 4df4ec90..8eb2a059 100644
--- a/decoder/trule.h
+++ b/decoder/trule.h
@@ -5,7 +5,9 @@
 #include <vector>
 #include <cassert>
 #include <iostream>
-#include <boost/shared_ptr.hpp>
+
+#include "boost/shared_ptr.hpp"
+#include "boost/functional/hash.hpp"
 
 #include "sparse_vector.h"
 #include "wordid.h"
@@ -162,4 +164,15 @@ class TRule {
   bool SanityCheck() const;
 };
 
+inline size_t hash_value(const TRule& r) {
+  size_t h = boost::hash_value(r.e_);
+  boost::hash_combine(h, -r.lhs_);
+  boost::hash_combine(h, boost::hash_value(r.f_));
+  return h;
+}
+
+inline bool operator==(const TRule& a, const TRule& b) {
+  return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_);
+}
+
 #endif
diff --git a/gi/pf/brat.cc b/gi/pf/brat.cc
index 4c6ba3ef..7b60ef23 100644
--- a/gi/pf/brat.cc
+++ b/gi/pf/brat.cc
@@ -25,17 +25,6 @@ static unsigned kMAX_SRC_PHRASE;
 static unsigned kMAX_TRG_PHRASE;
 struct FSTState;
 
-size_t hash_value(const TRule& r) {
-  size_t h = 2 - r.lhs_;
-  boost::hash_combine(h, boost::hash_value(r.e_));
-  boost::hash_combine(h, boost::hash_value(r.f_));
-  return h;
-}
-
-bool operator==(const TRule& a, const TRule& b) {
-  return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_);
-}
-
 double log_poisson(unsigned x, const double& lambda) {
   assert(lambda > 0.0);
   return log(lambda) * x - lgamma(x + 1) - lambda;
diff --git a/gi/pf/cbgi.cc b/gi/pf/cbgi.cc
index 20204e8a..97f1ba34 100644
--- a/gi/pf/cbgi.cc
+++ b/gi/pf/cbgi.cc
@@ -27,16 +27,6 @@ double log_decay(unsigned x, const double& b) {
   return log(b - 1) - x * log(b);
 }
 
-size_t hash_value(const TRule& r) {
-  // TODO fix hash function
-  size_t h = boost::hash_value(r.e_) * boost::hash_value(r.f_) * r.lhs_;
-  return h;
-}
-
-bool operator==(const TRule& a, const TRule& b) {
-  return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_);
-}
-
 struct SimpleBase {
   SimpleBase(unsigned esize, unsigned fsize, unsigned ntsize = 144) :
     uniform_e(-log(esize)),
diff --git a/gi/pf/dpnaive.cc b/gi/pf/dpnaive.cc
index 582d1be7..608f73d5 100644
--- a/gi/pf/dpnaive.cc
+++ b/gi/pf/dpnaive.cc
@@ -20,18 +20,6 @@ namespace po = boost::program_options;
 
 static unsigned kMAX_SRC_PHRASE;
 static unsigned kMAX_TRG_PHRASE;
-struct FSTState;
-
-size_t hash_value(const TRule& r) {
-  size_t h = 2 - r.lhs_;
-  boost::hash_combine(h, boost::hash_value(r.e_));
-  boost::hash_combine(h, boost::hash_value(r.f_));
-  return h;
-}
-
-bool operator==(const TRule& a, const TRule& b) {
-  return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_);
-}
 
 void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
   po::options_description opts("Configuration options");
diff --git a/gi/pf/itg.cc b/gi/pf/itg.cc
index 2c2a86f9..ac3c16a3 100644
--- a/gi/pf/itg.cc
+++ b/gi/pf/itg.cc
@@ -27,17 +27,6 @@ ostream& operator<<(ostream& os, const vector<WordID>& p) {
   return os << ']';
 }
 
-size_t hash_value(const TRule& r) {
-  size_t h = boost::hash_value(r.e_);
-  boost::hash_combine(h, -r.lhs_);
-  boost::hash_combine(h, boost::hash_value(r.f_));
-  return h;
-}
-
-bool operator==(const TRule& a, const TRule& b) {
-  return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_);
-}
-
 double log_poisson(unsigned x, const double& lambda) {
   assert(lambda > 0.0);
   return log(lambda) * x - lgamma(x + 1) - lambda;
diff --git a/gi/pf/pfbrat.cc b/gi/pf/pfbrat.cc
index 4c6ba3ef..7b60ef23 100644
--- a/gi/pf/pfbrat.cc
+++ b/gi/pf/pfbrat.cc
@@ -25,17 +25,6 @@ static unsigned kMAX_SRC_PHRASE;
 static unsigned kMAX_TRG_PHRASE;
 struct FSTState;
 
-size_t hash_value(const TRule& r) {
-  size_t h = 2 - r.lhs_;
-  boost::hash_combine(h, boost::hash_value(r.e_));
-  boost::hash_combine(h, boost::hash_value(r.f_));
-  return h;
-}
-
-bool operator==(const TRule& a, const TRule& b) {
-  return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_);
-}
-
 double log_poisson(unsigned x, const double& lambda) {
   assert(lambda > 0.0);
   return log(lambda) * x - lgamma(x + 1) - lambda;
diff --git a/gi/pf/pfdist.cc b/gi/pf/pfdist.cc
index 18dfd03b..81abd61b 100644
--- a/gi/pf/pfdist.cc
+++ b/gi/pf/pfdist.cc
@@ -24,17 +24,6 @@ namespace po = boost::program_options;
 
 shared_ptr<MT19937> prng;
 
-size_t hash_value(const TRule& r) {
-  size_t h = boost::hash_value(r.e_);
-  boost::hash_combine(h, -r.lhs_);
-  boost::hash_combine(h, boost::hash_value(r.f_));
-  return h;
-}
-
-bool operator==(const TRule& a, const TRule& b) {
-  return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_);
-}
-
 void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
   po::options_description opts("Configuration options");
   opts.add_options()
diff --git a/gi/pf/pfnaive.cc b/gi/pf/pfnaive.cc
index 43c604c3..c30e7c4f 100644
--- a/gi/pf/pfnaive.cc
+++ b/gi/pf/pfnaive.cc
@@ -24,17 +24,6 @@ namespace po = boost::program_options;
 
 shared_ptr<MT19937> prng;
 
-size_t hash_value(const TRule& r) {
-  size_t h = boost::hash_value(r.e_);
-  boost::hash_combine(h, -r.lhs_);
-  boost::hash_combine(h, boost::hash_value(r.f_));
-  return h;
-}
-
-bool operator==(const TRule& a, const TRule& b) {
-  return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_);
-}
-
 void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
   po::options_description opts("Configuration options");
   opts.add_options()
diff --git a/mteval/mbr_kbest.cc b/mteval/mbr_kbest.cc
index 2867b36b..64a6a8bf 100644
--- a/mteval/mbr_kbest.cc
+++ b/mteval/mbr_kbest.cc
@@ -32,7 +32,7 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
 }
 
 struct LossComparer {
-  bool operator()(const pair<vector<WordID>, double>& a, const pair<vector<WordID>, double>& b) const {
+  bool operator()(const pair<vector<WordID>, prob_t>& a, const pair<vector<WordID>, prob_t>& b) const {
     return a.second < b.second;
   }
 };
@@ -108,7 +108,7 @@ int main(int argc, char** argv) {
           ScoreP s = scorer->ScoreCandidate(list[j].first);
           double loss = 1.0 - s->ComputeScore();
           if (type == TER || type == AER) loss = 1.0 - loss;
-          double weighted_loss = loss * (joints[j] / marginal);
+          double weighted_loss = loss * (joints[j] / marginal).as_float();
           wl_acc += weighted_loss;
           if ((!output_list) && wl_acc > mbr_loss) break;
         }
diff --git a/phrasinator/ccrp_nt.h b/phrasinator/ccrp_nt.h
index 163b643a..811bce73 100644
--- a/phrasinator/ccrp_nt.h
+++ b/phrasinator/ccrp_nt.h
@@ -50,15 +50,26 @@ class CCRP_NoTable {
     return it->second;
   }
 
-  void increment(const Dish& dish) {
-    ++custs_[dish];
+  int increment(const Dish& dish) {
+    int table_diff = 0;
+    if (++custs_[dish] == 1)
+      table_diff = 1;
     ++num_customers_;
+    return table_diff;
   }
 
-  void decrement(const Dish& dish) {
-    if ((--custs_[dish]) == 0)
+  int decrement(const Dish& dish) {
+    int table_diff = 0;
+    int nc = --custs_[dish];
+    if (nc == 0) {
       custs_.erase(dish);
+      table_diff = -1;
+    } else if (nc < 0) {
+      std::cerr << "Dish counts dropped below zero for: " << dish << std::endl;
+      abort();
+    }
     --num_customers_;
+    return table_diff;
   }
 
   double prob(const Dish& dish, const double& p0) const {
@@ -66,6 +77,11 @@ class CCRP_NoTable {
     return (at_table + p0 * concentration_) / (num_customers_ + concentration_);
   }
 
+  double logprob(const Dish& dish, const double& logp0) const {
+    const unsigned at_table = num_customers(dish);
+    return log(at_table + exp(logp0 + log(concentration_))) - log(num_customers_ + concentration_);
+  }
+
   double log_crp_prob() const {
     return log_crp_prob(concentration_);
   }
diff --git a/training/mpi_batch_optimize.cc b/training/mpi_batch_optimize.cc
index 0ba8c530..046e921c 100644
--- a/training/mpi_batch_optimize.cc
+++ b/training/mpi_batch_optimize.cc
@@ -92,7 +92,7 @@ struct TrainingObserver : public DecoderObserver {
   void SetLocalGradientAndObjective(vector<double>* g, double* o) const {
     *o = acc_obj;
     for (SparseVector<prob_t>::const_iterator it = acc_grad.begin(); it != acc_grad.end(); ++it)
-      (*g)[it->first] = it->second;
+      (*g)[it->first] = it->second.as_float();
   }
 
   virtual void NotifyDecodingStart(const SentenceMetadata& smeta) {
diff --git a/training/mpi_compute_cllh.cc b/training/mpi_compute_cllh.cc
index b496d196..d5caa745 100644
--- a/training/mpi_compute_cllh.cc
+++ b/training/mpi_compute_cllh.cc
@@ -1,6 +1,4 @@
-#include <sstream>
 #include <iostream>
-#include <fstream>
 #include <vector>
 #include <cassert>
 #include <cmath>
@@ -12,6 +10,7 @@
 #include <boost/program_options.hpp>
 #include <boost/program_options/variables_map.hpp>
 
+#include "sentence_metadata.h"
 #include "verbose.h"
 #include "hg.h"
 #include "prob.h"
@@ -52,7 +51,8 @@ bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
   return true;
 }
 
-void ReadTrainingCorpus(const string& fname, int rank, int size, vector<string>* c, vector<int>* ids) {
+void ReadInstances(const string& fname, int rank, int size, vector<string>* c) {
+  assert(fname != "-");
   ReadFile rf(fname);
   istream& in = *rf.stream();
   string line;
@@ -60,20 +60,16 @@ void ReadTrainingCorpus(const string& fname, int rank, int size, vector<string>*
   while(in) {
     getline(in, line);
     if (!in) break;
-    if (lc % size == rank) {
-      c->push_back(line);
-      ids->push_back(lc);
-    }
+    if (lc % size == rank) c->push_back(line);
     ++lc;
   }
 }
 
 static const double kMINUS_EPSILON = -1e-6;
 
-struct TrainingObserver : public DecoderObserver {
-  void Reset() {
-    acc_obj = 0;
-  } 
+struct ConditionalLikelihoodObserver : public DecoderObserver {
+
+  ConditionalLikelihoodObserver() : trg_words(), acc_obj(), cur_obj() {}
 
   virtual void NotifyDecodingStart(const SentenceMetadata&) {
     cur_obj = 0;
@@ -120,8 +116,10 @@ struct TrainingObserver : public DecoderObserver {
     }
     assert(!isnan(log_ref_z));
     acc_obj += (cur_obj - log_ref_z);
+    trg_words += smeta.GetReference().size();
   }
 
+  unsigned trg_words;
   double acc_obj;
   double cur_obj;
   int state;
@@ -161,35 +159,32 @@ int main(int argc, char** argv) {
   if (conf.count("weights"))
     Weights::InitFromFile(conf["weights"].as<string>(), &weights);
 
-  // freeze feature set
-  //const bool freeze_feature_set = conf.count("freeze_feature_set");
-  //if (freeze_feature_set) FD::Freeze();
-
-  vector<string> corpus; vector<int> ids;
-  ReadTrainingCorpus(conf["training_data"].as<string>(), rank, size, &corpus, &ids);
+  vector<string> corpus;
+  ReadInstances(conf["training_data"].as<string>(), rank, size, &corpus);
   assert(corpus.size() > 0);
-  assert(corpus.size() == ids.size());
-
-  TrainingObserver observer;
-  double objective = 0;
 
-  observer.Reset();
   if (rank == 0)
-    cerr << "Each processor is decoding " << corpus.size() << " training examples...\n";
+    cerr << "Each processor is decoding ~" << corpus.size() << " training examples...\n";
 
-  for (int i = 0; i < corpus.size(); ++i) {
-    decoder.SetId(ids[i]);
+  ConditionalLikelihoodObserver observer;
+  for (int i = 0; i < corpus.size(); ++i)
     decoder.Decode(corpus[i], &observer);
-  }
 
+  double objective = 0;
+  unsigned total_words = 0;
 #ifdef HAVE_MPI
   reduce(world, observer.acc_obj, objective, std::plus<double>(), 0);
+  reduce(world, observer.trg_words, total_words, std::plus<unsigned>(), 0);
 #else
   objective = observer.acc_obj;
 #endif
 
-  if (rank == 0)
-    cout << "OBJECTIVE: " << objective << endl;
+  if (rank == 0) {
+    cout << "CONDITIONAL LOG_e LIKELIHOOD: " << objective << endl;
+    cout << "CONDITIONAL LOG_2 LIKELIHOOD: " << (objective/log(2)) << endl;
+    cout << "         CONDITIONAL ENTROPY: " << (objective/log(2) / total_words) << endl;
+    cout << "                  PERPLEXITY: " << pow(2, (objective/log(2) / total_words)) << endl;
+  }
 
   return 0;
 }
diff --git a/training/mpi_online_optimize.cc b/training/mpi_online_optimize.cc
index 2ef4a2e7..f87b7274 100644
--- a/training/mpi_online_optimize.cc
+++ b/training/mpi_online_optimize.cc
@@ -94,7 +94,7 @@ struct TrainingObserver : public DecoderObserver {
   void SetLocalGradientAndObjective(vector<double>* g, double* o) const {
     *o = acc_obj;
     for (SparseVector<prob_t>::const_iterator it = acc_grad.begin(); it != acc_grad.end(); ++it)
-      (*g)[it->first] = it->second;
+      (*g)[it->first] = it->second.as_float();
   }
 
   virtual void NotifyDecodingStart(const SentenceMetadata& smeta) {
@@ -158,7 +158,7 @@ struct TrainingObserver : public DecoderObserver {
   void GetGradient(SparseVector<double>* g) const {
     g->clear();
     for (SparseVector<prob_t>::const_iterator it = acc_grad.begin(); it != acc_grad.end(); ++it)
-      g->set_value(it->first, it->second);
+      g->set_value(it->first, it->second.as_float());
   }
 
   int total_complete;
diff --git a/utils/logval.h b/utils/logval.h
index 6fdc2c42..8a59d0b1 100644
--- a/utils/logval.h
+++ b/utils/logval.h
@@ -25,12 +25,13 @@ class LogVal {
   typedef LogVal<T> Self;
 
   LogVal() : s_(), v_(LOGVAL_LOG0) {}
-  explicit LogVal(double x) : s_(std::signbit(x)), v_(s_ ? std::log(-x) : std::log(x)) {}
+  LogVal(double x) : s_(std::signbit(x)), v_(s_ ? std::log(-x) : std::log(x)) {}
+  const Self& operator=(double x) { s_ = std::signbit(x); v_ = s_ ? std::log(-x) : std::log(x); return *this; }
   LogVal(init_minus_1) : s_(true),v_(0) {  }
   LogVal(init_1) : s_(),v_(0) {  }
   LogVal(init_0) : s_(),v_(LOGVAL_LOG0) {  }
-  LogVal(int x) : s_(x<0), v_(s_ ? std::log(-x) : std::log(x)) {}
-  LogVal(unsigned x) : s_(0), v_(std::log(x)) { }
+  explicit LogVal(int x) : s_(x<0), v_(s_ ? std::log(-x) : std::log(x)) {}
+  explicit LogVal(unsigned x) : s_(0), v_(std::log(x)) { }
   LogVal(double lnx,bool sign) : s_(sign),v_(lnx) {}
   LogVal(double lnx,init_lnx) : s_(),v_(lnx) {}
   static Self exp(T lnx) { return Self(lnx,false); }
@@ -141,9 +142,6 @@ class LogVal {
     return pow(1/root);
   }
 
-  operator T() const {
-    if (s_) return -std::exp(v_); else return std::exp(v_);
-  }
   T as_float() const {
     if (s_) return -std::exp(v_); else return std::exp(v_);
   }
-- 
cgit v1.2.3