LICENSE fixes, full support of lattice decoding

author: Chris Dyer <redpony@gmail.com> 2009-12-08 21:38:55 -0500
committer: Chris Dyer <redpony@gmail.com> 2009-12-08 21:38:55 -0500
commit: dc6930c00b4b276883280cff1ed6dcd9ddef03c7 (patch)
tree: d76d25baf66459d13da4faedffd7d6faba28a513 /src
parent: 5082307f382a6563d4ffb97034137da4e362e629 (diff)
10 files changed, 135 insertions, 245 deletions
diff --git a/src/array2d.h b/src/array2d.h
index 09d84d0b..e63eda0d 100644
--- a/src/array2d.h
+++ b/src/array2d.h
@@ -19,6 +19,7 @@ class Array2D {
     width_(w), height_(h), data_(w*h, d) {}
   Array2D(const Array2D& rhs) :
     width_(rhs.width_), height_(rhs.height_), data_(rhs.data_) {}
+  bool empty() const { return data_.empty(); }
   void resize(int w, int h, const T& d = T()) {
     data_.resize(w * h, d);
     width_ = w;
diff --git a/src/bottom_up_parser.cc b/src/bottom_up_parser.cc
index 349ed2de..b3315b8a 100644
--- a/src/bottom_up_parser.cc
+++ b/src/bottom_up_parser.cc
@@ -24,8 +24,18 @@ class PassiveChart {
   inline int GetGoalIndex() const { return goal_idx_; }
 
  private:
-  void ApplyRules(const int i, const int j, const RuleBin* rules, const Hypergraph::TailNodeVector& tail);
-  void ApplyRule(const int i, const int j, TRulePtr r, const Hypergraph::TailNodeVector& ant_nodes);
+  void ApplyRules(const int i,
+                  const int j,
+                  const RuleBin* rules,
+                  const Hypergraph::TailNodeVector& tail,
+                  const float lattice_cost);
+
+  void ApplyRule(const int i,
+                 const int j,
+                 const TRulePtr& r,
+                 const Hypergraph::TailNodeVector& ant_nodes,
+                 const float lattice_cost);
+
   void ApplyUnaryRules(const int i, const int j);
 
   const vector<GrammarPtr>& grammars_;
@@ -38,6 +48,7 @@ class PassiveChart {
   const WordID goal_cat_;    // category that is being searched for at [0,n]
   TRulePtr goal_rule_;
   int goal_idx_;             // index of goal node, if found
+  const int lc_fid_;
 
   static WordID kGOAL;       // [Goal]
 };
@@ -51,12 +62,12 @@ class ActiveChart {
     act_chart_(psv_chart.size(), psv_chart.size()), psv_chart_(psv_chart) {}
 
   struct ActiveItem {
-    ActiveItem(const GrammarIter* g, const Hypergraph::TailNodeVector& a, double lcost) :
+    ActiveItem(const GrammarIter* g, const Hypergraph::TailNodeVector& a, float lcost) :
       gptr_(g), ant_nodes_(a), lattice_cost(lcost) {}
     explicit ActiveItem(const GrammarIter* g) :
-      gptr_(g), ant_nodes_(), lattice_cost() {}
+      gptr_(g), ant_nodes_(), lattice_cost(0.0) {}
 
-    void ExtendTerminal(int symbol, double src_cost, vector<ActiveItem>* out_cell) const {
+    void ExtendTerminal(int symbol, float src_cost, vector<ActiveItem>* out_cell) const {
       const GrammarIter* ni = gptr_->Extend(symbol);
       if (ni) out_cell->push_back(ActiveItem(ni, ant_nodes_, lattice_cost + src_cost));
     }
@@ -73,14 +84,14 @@ class ActiveChart {
 
     const GrammarIter* gptr_;
     Hypergraph::TailNodeVector ant_nodes_;
-    double lattice_cost;  // TODO? use SparseVector<double>
+    float lattice_cost;  // TODO? use SparseVector<double>
   };
 
   inline const vector<ActiveItem>& operator()(int i, int j) const { return act_chart_(i,j); }
   void SeedActiveChart(const Grammar& g) {
     int size = act_chart_.width();
     for (int i = 0; i < size; ++i)
-      if (g.HasRuleForSpan(i,i))
+      if (g.HasRuleForSpan(i,i,0))
         act_chart_(i,i).push_back(ActiveItem(g.GetRoot()));
   }
 
@@ -132,7 +143,8 @@ PassiveChart::PassiveChart(const string& goal,
     nodemap_(input.size()+1, input.size()+1),
     goal_cat_(TD::Convert(goal) * -1),
     goal_rule_(new TRule("[Goal] ||| [" + goal + ",1] ||| [" + goal + ",1]")),
-    goal_idx_(-1) {
+    goal_idx_(-1),
+    lc_fid_(FD::Convert("LatticeCost")) {
   act_chart_.resize(grammars_.size());
   for (int i = 0; i < grammars_.size(); ++i)
     act_chart_[i] = new ActiveChart(forest, *this);
@@ -140,13 +152,19 @@ PassiveChart::PassiveChart(const string& goal,
   cerr << "  Goal category: [" << goal << ']' << endl;
 }
 
-void PassiveChart::ApplyRule(const int i, const int j, TRulePtr r, const Hypergraph::TailNodeVector& ant_nodes) {
+void PassiveChart::ApplyRule(const int i,
+                             const int j,
+                             const TRulePtr& r,
+                             const Hypergraph::TailNodeVector& ant_nodes,
+                             const float lattice_cost) {
   Hypergraph::Edge* new_edge = forest_->AddEdge(r, ant_nodes);
   new_edge->prev_i_ = r->prev_i;
   new_edge->prev_j_ = r->prev_j;
   new_edge->i_ = i;
   new_edge->j_ = j;
   new_edge->feature_values_ = r->GetFeatureValues();
+  if (lattice_cost)
+    new_edge->feature_values_.set_value(lc_fid_, lattice_cost);
   Cat2NodeMap& c2n = nodemap_(i,j);
   const bool is_goal = (r->GetLHS() == kGOAL);
   const Cat2NodeMap::iterator ni = c2n.find(r->GetLHS());
@@ -169,23 +187,24 @@ void PassiveChart::ApplyRule(const int i, const int j, TRulePtr r, const Hypergr
 void PassiveChart::ApplyRules(const int i,
                        const int j,
                        const RuleBin* rules,
-                       const Hypergraph::TailNodeVector& tail) {
+                       const Hypergraph::TailNodeVector& tail,
+                       const float lattice_cost) {
   const int n = rules->GetNumRules();
   for (int k = 0; k < n; ++k)
-    ApplyRule(i, j, rules->GetIthRule(k), tail);
+    ApplyRule(i, j, rules->GetIthRule(k), tail, lattice_cost);
 }
 
 void PassiveChart::ApplyUnaryRules(const int i, const int j) {
   const vector<int>& nodes = chart_(i,j);  // reference is important!
   for (int gi = 0; gi < grammars_.size(); ++gi) {
-    if (!grammars_[gi]->HasRuleForSpan(i,j)) continue;
+    if (!grammars_[gi]->HasRuleForSpan(i,j,input_.Distance(i,j))) continue;
     for (int di = 0; di < nodes.size(); ++di) {
       const WordID& cat = forest_->nodes_[nodes[di]].cat_;
       const vector<TRulePtr>& unaries = grammars_[gi]->GetUnaryRulesForRHS(cat);
       for (int ri = 0; ri < unaries.size(); ++ri) {
         // cerr << "At (" << i << "," << j << "): applying " << unaries[ri]->AsString() << endl;
         const Hypergraph::TailNodeVector ant(1, nodes[di]);
-        ApplyRule(i, j, unaries[ri], ant);  // may update nodes
+        ApplyRule(i, j, unaries[ri], ant, 0);  // may update nodes
       }
     }
   }
@@ -205,7 +224,7 @@ bool PassiveChart::Parse() {
       int j = i + l;
       for (int gi = 0; gi < grammars_.size(); ++gi) {
         const Grammar& g = *grammars_[gi];
-        if (g.HasRuleForSpan(i, j)) {
+        if (g.HasRuleForSpan(i, j, input_.Distance(i, j))) {
           act_chart_[gi]->AdvanceDotsForAllItemsInCell(i, j, input_);
 
           const vector<ActiveChart::ActiveItem>& cell = (*act_chart_[gi])(i,j);
@@ -213,7 +232,7 @@ bool PassiveChart::Parse() {
                ai != cell.end(); ++ai) {
             const RuleBin* rules = (ai->gptr_->GetRules());
             if (!rules) continue;
-            ApplyRules(i, j, rules, ai->ant_nodes_);
+            ApplyRules(i, j, rules, ai->ant_nodes_, ai->lattice_cost);
           }
         }
       }
@@ -222,7 +241,7 @@ bool PassiveChart::Parse() {
       for (int gi = 0; gi < grammars_.size(); ++gi) {
         const Grammar& g = *grammars_[gi];
           // deal with non-terminals that were just proved
-          if (g.HasRuleForSpan(i, j))
+          if (g.HasRuleForSpan(i, j, input_.Distance(i,j)))
             act_chart_[gi]->ExtendActiveItems(i, i, j);
       }
     }
@@ -231,7 +250,7 @@ bool PassiveChart::Parse() {
       const Hypergraph::Node& node = forest_->nodes_[dh[di]];
       if (node.cat_ == goal_cat_) {
         Hypergraph::TailNodeVector ant(1, node.id_);
-        ApplyRule(0, input_.size(), goal_rule_, ant);
+        ApplyRule(0, input_.size(), goal_rule_, ant, 0);
       }
     }
   }
diff --git a/src/cdec_ff.cc b/src/cdec_ff.cc
index 846a908e..0a4f3d5e 100644
--- a/src/cdec_ff.cc
+++ b/src/cdec_ff.cc
@@ -11,6 +11,7 @@ boost::shared_ptr<FFRegistry> global_ff_registry;
 void register_feature_functions() {
   global_ff_registry->Register("LanguageModel", new FFFactory<LanguageModel>);
   global_ff_registry->Register("WordPenalty", new FFFactory<WordPenalty>);
+  global_ff_registry->Register("SourceWordPenalty", new FFFactory<SourceWordPenalty>);
   global_ff_registry->Register("RelativeSentencePosition", new FFFactory<RelativeSentencePosition>);
   global_ff_registry->Register("MarkovJump", new FFFactory<MarkovJump>);
   global_ff_registry->Register("BlunsomSynchronousParseHack", new FFFactory<BlunsomSynchronousParseHack>);
diff --git a/src/ff.cc b/src/ff.cc
index 488e6468..2ae5b9eb 100644
--- a/src/ff.cc
+++ b/src/ff.cc
@@ -36,6 +36,27 @@ void WordPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta,
   features->set_value(fid_, edge.rule_->EWords() * value_);
 }
 
+SourceWordPenalty::SourceWordPenalty(const string& param) :
+    fid_(FD::Convert("SourceWordPenalty")),
+    value_(-1.0 / log(10)) {
+  if (!param.empty()) {
+    cerr << "Warning SourceWordPenalty ignoring parameter: " << param << endl;
+  }
+}
+
+void SourceWordPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta,
+                                        const Hypergraph::Edge& edge,
+                                        const std::vector<const void*>& ant_states,
+                                        SparseVector<double>* features,
+                                        SparseVector<double>* estimated_features,
+                                        void* state) const {
+  (void) smeta;
+  (void) ant_states;
+  (void) state;
+  (void) estimated_features;
+  features->set_value(fid_, edge.rule_->FWords() * value_);
+}
+
 ModelSet::ModelSet(const vector<double>& w, const vector<const FeatureFunction*>& models) :
     models_(models),
     weights_(w),
diff --git a/src/ff.h b/src/ff.h
index c97e2fe2..e962b4ba 100644
--- a/src/ff.h
+++ b/src/ff.h
@@ -89,6 +89,21 @@ class WordPenalty : public FeatureFunction {
   const double value_;
 };
 
+class SourceWordPenalty : public FeatureFunction {
+ public:
+  SourceWordPenalty(const std::string& param);
+ protected:
+  virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
+                                     const Hypergraph::Edge& edge,
+                                     const std::vector<const void*>& ant_contexts,
+                                     SparseVector<double>* features,
+                                     SparseVector<double>* estimated_features,
+                                     void* context) const;
+ private:
+  const int fid_;
+  const double value_;
+};
+
 // this class is a set of FeatureFunctions that can be used to score, rescore,
 // etc. a (translation?) forest
 class ModelSet {
diff --git a/src/grammar.cc b/src/grammar.cc
index e0b2a09e..e19bd344 100644
--- a/src/grammar.cc
+++ b/src/grammar.cc
@@ -15,9 +15,10 @@ RuleBin::~RuleBin() {}
 GrammarIter::~GrammarIter() {}
 Grammar::~Grammar() {}
 
-bool Grammar::HasRuleForSpan(int i, int j) const {
+bool Grammar::HasRuleForSpan(int i, int j, int distance) const {
   (void) i;
   (void) j;
+  (void) distance;
   return true;  // always true by default
 }
 
@@ -119,8 +120,8 @@ void TextGrammar::ReadFromFile(const string& filename) {
   cerr << "  " << rule_count << " rules read.\n";
 }
 
-bool TextGrammar::HasRuleForSpan(int i, int j) const {
-  return (max_span_ >= (j - i));
+bool TextGrammar::HasRuleForSpan(int i, int j, int distance) const {
+  return (max_span_ >= distance);
 }
 
 GlueGrammar::GlueGrammar(const string& file) : TextGrammar(file) {}
@@ -136,7 +137,7 @@ GlueGrammar::GlueGrammar(const string& goal_nt, const string& default_nt) {
   //cerr << "GLUE: " << glue->AsString() << endl;
 }
 
-bool GlueGrammar::HasRuleForSpan(int i, int j) const {
+bool GlueGrammar::HasRuleForSpan(int i, int j, int distance) const {
   (void) j;
   return (i == 0);
 }
@@ -156,7 +157,7 @@ PassThroughGrammar::PassThroughGrammar(const Lattice& input, const string& cat)
   }
 }
 
-bool PassThroughGrammar::HasRuleForSpan(int i, int j) const {
+bool PassThroughGrammar::HasRuleForSpan(int i, int j, int distance) const {
   const set<int>& hr = has_rule_[i];
   if (i == j) { return !hr.empty(); }
   return (hr.find(j) != hr.end());
diff --git a/src/grammar.h b/src/grammar.h
index 4a03c505..3471e3f1 100644
--- a/src/grammar.h
+++ b/src/grammar.h
@@ -28,7 +28,7 @@ struct Grammar {
 
   virtual ~Grammar();
   virtual const GrammarIter* GetRoot() const = 0;
-  virtual bool HasRuleForSpan(int i, int j) const;
+  virtual bool HasRuleForSpan(int i, int j, int distance) const;
 
   // cat is the category to be rewritten
   inline const std::vector<TRulePtr>& GetAllUnaryRules() const {
@@ -59,7 +59,7 @@ struct TextGrammar : public Grammar {
   virtual const GrammarIter* GetRoot() const;
   void AddRule(const TRulePtr& rule);
   void ReadFromFile(const std::string& filename);
-  virtual bool HasRuleForSpan(int i, int j) const;
+  virtual bool HasRuleForSpan(int i, int j, int distance) const;
   const std::vector<TRulePtr>& GetUnaryRules(const WordID& cat) const;
  private:
   int max_span_;
@@ -70,12 +70,12 @@ struct GlueGrammar : public TextGrammar {
   // read glue grammar from file
   explicit GlueGrammar(const std::string& file);
   GlueGrammar(const std::string& goal_nt, const std::string& default_nt);  // "S", "X"
-  virtual bool HasRuleForSpan(int i, int j) const;
+  virtual bool HasRuleForSpan(int i, int j, int distance) const;
 };
 
 struct PassThroughGrammar : public TextGrammar {
   PassThroughGrammar(const Lattice& input, const std::string& cat);
-  virtual bool HasRuleForSpan(int i, int j) const;
+  virtual bool HasRuleForSpan(int i, int j, int distance) const;
  private:
   std::vector<std::set<int> > has_rule_;  // index by [i][j]
 };
diff --git a/src/lattice.cc b/src/lattice.cc
index aa1df3db..56bc9551 100644
--- a/src/lattice.cc
+++ b/src/lattice.cc
@@ -5,6 +5,39 @@
 
 using namespace std;
 
+static const int kUNREACHABLE = 99999999;
+
+void Lattice::ComputeDistances() {
+  const int n = this->size() + 1;
+  dist_.resize(n, n, kUNREACHABLE);
+  for (int i = 0; i < this->size(); ++i) {
+    const vector<LatticeArc>& alts = (*this)[i];
+    for (int j = 0; j < alts.size(); ++j)
+      dist_(i, i + alts[j].dist2next) = 1;
+  }
+  for (int k = 0; k < n; ++k) {
+    for (int i = 0; i < n; ++i) {
+      for (int j = 0; j < n; ++j) {
+        const int dp = dist_(i,k) + dist_(k,j);
+        if (dist_(i,j) > dp)
+          dist_(i,j) = dp;
+      }
+    }
+  }
+
+  for (int i = 0; i < n; ++i) {
+    int latest = kUNREACHABLE;
+    for (int j = n-1; j >= 0; --j) {
+      const int c = dist_(i,j);
+      if (c < kUNREACHABLE)
+        latest = c;
+      else
+        dist_(i,j) = latest;
+    }
+  }
+  // cerr << dist_ << endl;
+}
+
 bool LatticeTools::LooksLikePLF(const string &line) {
   return (line.size() > 5) && (line.substr(0,4) == "((('");
 }
@@ -23,5 +56,6 @@ void LatticeTools::ConvertTextOrPLF(const string& text_or_plf, Lattice* pl) {
     HypergraphIO::PLFtoLattice(text_or_plf, pl);
   else
     ConvertTextToLattice(text_or_plf, pl);
+  pl->ComputeDistances();
 }
 
diff --git a/src/lattice.h b/src/lattice.h
index 1177e768..71589b92 100644
--- a/src/lattice.h
+++ b/src/lattice.h
@@ -4,6 +4,14 @@
 #include <string>
 #include <vector>
 #include "wordid.h"
+#include "array2d.h"
+
+class Lattice;
+struct LatticeTools {
+  static bool LooksLikePLF(const std::string &line);
+  static void ConvertTextToLattice(const std::string& text, Lattice* pl);
+  static void ConvertTextOrPLF(const std::string& text_or_plf, Lattice* pl);
+};
 
 struct LatticeArc {
   WordID label;
@@ -14,18 +22,20 @@ struct LatticeArc {
 };
 
 class Lattice : public std::vector<std::vector<LatticeArc> > {
+  friend void LatticeTools::ConvertTextOrPLF(const std::string& text_or_plf, Lattice* pl);
  public:
   Lattice() {}
   explicit Lattice(size_t t, const std::vector<LatticeArc>& v = std::vector<LatticeArc>()) :
    std::vector<std::vector<LatticeArc> >(t, v) {}
- 
-  // TODO add distance functions
-};
+  int Distance(int from, int to) const {
+    if (dist_.empty())
+      return (to - from);
+    return dist_(from, to);
+  }
 
-struct LatticeTools {
-  static bool LooksLikePLF(const std::string &line);
-  static void ConvertTextToLattice(const std::string& text, Lattice* pl);
-  static void ConvertTextOrPLF(const std::string& text_or_plf, Lattice* pl);
+ private:
+  void ComputeDistances();
+  Array2D<int> dist_;
 };
 
 #endif
diff --git a/src/synparse.cc b/src/synparse.cc
deleted file mode 100644
index 96588f1e..00000000
--- a/src/synparse.cc
+++ /dev/null
@@ -1,212 +0,0 @@
-#include <iostream>
-#include <ext/hash_map>
-#include <ext/hash_set>
-#include <utility>
-
-#include <boost/multi_array.hpp>
-#include <boost/functional/hash.hpp>
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-
-#include "prob.h"
-#include "tdict.h"
-#include "filelib.h"
-
-using namespace std;
-using namespace __gnu_cxx;
-namespace po = boost::program_options;
-
-const prob_t kMONO(1.0);  // 0.6
-const prob_t kINV(1.0);   // 0.1
-const prob_t kLEX(1.0);   // 0.3
-
-typedef hash_map<vector<WordID>, hash_map<vector<WordID>, prob_t, boost::hash<vector<WordID> > >, boost::hash<vector<WordID> > > PTable;
-typedef boost::multi_array<prob_t, 4> CChart;
-typedef pair<int,int> SpanType;
-
-void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
-  po::options_description opts("Configuration options");
-  opts.add_options()
-        ("phrasetable,p",po::value<string>(), "[REQD] Phrase pairs for ITG alignment")
-        ("input,i",po::value<string>()->default_value("-"), "Input file")
-        ("help,h", "Help");
-  po::options_description dcmdline_options;
-  dcmdline_options.add(opts);
-  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
-  bool flag = false;
-  if (!conf->count("phrasetable")) {
-    cerr << "Please specify a grammar file with -p <GRAMMAR.TXT>\n";
-    flag = true;
-  }
-  if (flag || conf->count("help")) {
-    cerr << dcmdline_options << endl;
-    exit(1);
-  }
-}
-
-void LoadITGPhrasetable(const string& fname, PTable* ptable) {
-  const WordID sep = TD::Convert("|||");
-  ReadFile rf(fname);
-  istream& in = *rf.stream();
-  assert(in);
-  int lc = 0;
-  while(in) {
-    string line;
-    getline(in, line);
-    if (line.empty()) continue;
-    ++lc;
-    vector<WordID> full, f, e;
-    TD::ConvertSentence(line, &full);
-    int i = 0;
-    for (; i < full.size(); ++i) {
-      if (full[i] == sep) break;
-      f.push_back(full[i]);
-    }
-    ++i;
-    for (; i < full.size(); ++i) {
-      if (full[i] == sep) break;
-      e.push_back(full[i]);
-    }
-    ++i;
-    prob_t prob(0.000001);
-    if (i < full.size()) { prob = prob_t(atof(TD::Convert(full[i]))); ++i; }
-
-    if (i < full.size()) { cerr << "Warning line " << lc << " has extra stuff.\n"; }
-    assert(f.size() > 0);
-    assert(e.size() > 0);
-    (*ptable)[f][e] = prob;
-  }
-  cerr << "Read " << lc << " phrase pairs\n";
-}
-
-void FindPhrases(const vector<WordID>& e, const vector<WordID>& f, const PTable& pt, CChart* pcc) {
-  CChart& cc = *pcc;
-  const size_t n = f.size();
-  const size_t m = e.size();
-  typedef hash_map<vector<WordID>, vector<SpanType>, boost::hash<vector<WordID> > > PhraseToSpan;
-  PhraseToSpan e_locations;
-  for (int i = 0; i < m; ++i) {
-    const int mel = m - i;
-    vector<WordID> e_phrase;
-    for (int el = 0; el < mel; ++el) {
-      e_phrase.push_back(e[i + el]);
-      e_locations[e_phrase].push_back(make_pair(i, i + el + 1));
-    }
-  }
-  //cerr << "Cached the locations of " << e_locations.size() << " e-phrases\n";
-
-  for (int s = 0; s < n; ++s) {
-    const int mfl = n - s;
-    vector<WordID> f_phrase;
-    for (int fl = 0; fl < mfl; ++fl) {
-      f_phrase.push_back(f[s + fl]);
-      PTable::const_iterator it = pt.find(f_phrase);
-      if (it == pt.end()) continue;
-      const hash_map<vector<WordID>, prob_t, boost::hash<vector<WordID> > >& es = it->second;
-      for (hash_map<vector<WordID>, prob_t, boost::hash<vector<WordID> > >::const_iterator eit = es.begin(); eit != es.end(); ++eit) {
-        PhraseToSpan::iterator loc = e_locations.find(eit->first);
-        if (loc == e_locations.end()) continue;
-        const vector<SpanType>& espans = loc->second;
-        for (int j = 0; j < espans.size(); ++j) {
-          cc[s][s + fl + 1][espans[j].first][espans[j].second] = eit->second;
-          //cerr << '[' << s << ',' << (s + fl + 1) << ',' << espans[j].first << ',' << espans[j].second << "] is C\n";
-        }
-      }
-    }
-  }
-}
-
-long long int evals = 0;
-
-void ProcessSynchronousCell(const int s,
-                            const int t,
-                            const int u,
-                            const int v,
-                            const prob_t& lex,
-                            const prob_t& mono,
-                            const prob_t& inv,
-                            const CChart& tc, CChart* ntc) {
-  prob_t& inside = (*ntc)[s][t][u][v];
-  // cerr << log(tc[s][t][u][v]) << " + " << log(lex) << endl;
-  inside = tc[s][t][u][v] * lex;
-  // cerr << "  terminal span: " << log(inside) << endl;
-  if (t - s == 1) return;
-  if (v - u == 1) return;
-  for (int x = s+1; x < t; ++x) {
-    for (int y = u+1; y < v; ++y) {
-      const prob_t m = (*ntc)[s][x][u][y] * (*ntc)[x][t][y][v] * mono;
-      const prob_t i = (*ntc)[s][x][y][v] * (*ntc)[x][t][u][y] * inv;
-      // cerr << log(i) << "\t" << log(m) << endl;
-      inside += m;
-      inside += i;
-      evals++;
-    }
-  }
-  // cerr << "           span: " << log(inside) << endl;
-}
-
-prob_t SynchronousParse(const int n, const int m, const prob_t& lex, const prob_t& mono, const prob_t& inv, const CChart& tc, CChart* ntc) {
-  for (int fl = 0; fl < n; ++fl) {
-    for (int el = 0; el < m; ++el) {
-      const int ms = n - fl;
-      for (int s = 0; s < ms; ++s) {
-        const int t = s + fl + 1;
-        const int mu = m - el;
-        for (int u = 0; u < mu; ++u) {
-          const int v = u + el + 1;
-          //cerr << "Processing cell [" << s << ',' << t << ',' << u << ',' << v << "]\n";
-          ProcessSynchronousCell(s, t, u, v, lex, mono, inv, tc, ntc);
-        }
-      }
-    }
-  }
-  return (*ntc)[0][n][0][m];
-}
-
-int main(int argc, char** argv) {
-  po::variables_map conf;
-  InitCommandLine(argc, argv, &conf);
-  PTable ptable;
-  LoadITGPhrasetable(conf["phrasetable"].as<string>(), &ptable);
-  ReadFile rf(conf["input"].as<string>());
-  istream& in = *rf.stream();
-  int lc = 0;
-  const WordID sep = TD::Convert("|||");
-  while(in) {
-    string line;
-    getline(in, line);
-    if (line.empty()) continue;
-    ++lc;
-    vector<WordID> full, f, e;
-    TD::ConvertSentence(line, &full);
-    int i = 0;
-    for (; i < full.size(); ++i) {
-      if (full[i] == sep) break;
-      f.push_back(full[i]);
-    }
-    ++i;
-    for (; i < full.size(); ++i) {
-      if (full[i] == sep) break;
-      e.push_back(full[i]);
-    }
-    if (e.empty()) cerr << "E is empty!\n";
-    if (f.empty()) cerr << "F is empty!\n";
-    if (e.empty() || f.empty()) continue;
-    int n = f.size();
-    int m = e.size();
-    cerr << "Synchronous chart has " << (n * n * m * m) << " cells\n";
-    clock_t start = clock();
-    CChart cc(boost::extents[n+1][n+1][m+1][m+1]);
-    FindPhrases(e, f, ptable, &cc);
-    CChart ntc(boost::extents[n+1][n+1][m+1][m+1]);
-    prob_t likelihood = SynchronousParse(n, m, kLEX, kMONO, kINV, cc, &ntc);
-    clock_t end = clock();
-    cerr << "log Z: " << log(likelihood) << endl;
-    cerr << "    Z: " << likelihood << endl;
-    double etime = (end - start) / 1000000.0;
-    cout << " time: " << etime << endl;
-    cout << "evals: " << evals << endl;
-  }
-  return 0;
-}
-
author	Chris Dyer <redpony@gmail.com>	2009-12-08 21:38:55 -0500
committer	Chris Dyer <redpony@gmail.com>	2009-12-08 21:38:55 -0500
commit	dc6930c00b4b276883280cff1ed6dcd9ddef03c7 (patch)
tree	d76d25baf66459d13da4faedffd7d6faba28a513 /src
parent	5082307f382a6563d4ffb97034137da4e362e629 (diff)