From f1916c39b820b7d10d1ae7d7447675c4224d8197 Mon Sep 17 00:00:00 2001
From: Patrick Simianer <p@simianer.de>
Date: Sun, 24 Aug 2014 17:26:05 +0100
Subject: fixes

---
 fast/grammar.hh        |  51 +++++++++++++++++++++++----------------------
 fast/hypergraph.cc     |   6 +++---
 fast/parse.cc          |  55 -------------------------------------------------
 fast/parse.hh          |  11 +++++++---
 fast/test/Makefile     |   5 ++++-
 fast/test/test_grammar | Bin 56832 -> 60943 bytes
 fast/util.hh           |  19 ++++++++++++++++-
 fast/weaver.hh         |   5 +++++
 8 files changed, 64 insertions(+), 88 deletions(-)
 delete mode 100644 fast/parse.cc

diff --git a/fast/grammar.hh b/fast/grammar.hh
index e5acb8a..4906c46 100644
--- a/fast/grammar.hh
+++ b/fast/grammar.hh
@@ -49,32 +49,29 @@ struct NT : public Item {
     index_ = 0; // default
     string t(s);
     t.erase(0, 1); t.pop_back(); // remove '[' and ']'
-    istringstream ss(s);
+    istringstream ss(t);
     if (ss >> index_) { // [i]
       symbol_ = "";
       index_ = stoi(s);
-
       return;
-    } else { // [X]
-      symbol_ = s;
-
-      return;
-    }
-    string buf;
-    size_t j = 0;
-    while (ss.good() && getline(ss, buf, ',')) {
-      if (j == 0) {
-        symbol_ = buf;
-      } else {
-        index_ = stoi(buf);
+    } else {
+      ss.clear();
+      string buf;
+      size_t j = 0;
+      while (ss.good() && getline(ss, buf, ',')) {
+        if (j == 0) {
+          symbol_ = buf;
+        } else {
+          index_ = stoi(buf);
+        }
+        j++;
       }
-      j++;
     }
   }
 
   virtual size_t index() const { return index_; }
   virtual symbol_t symbol() const { return symbol_; }
-  virtual item_type type() { return NON_TERMINAL; }
+  virtual item_type type() const { return NON_TERMINAL; }
 
   virtual ostream&
   repr(ostream& os) const
@@ -103,7 +100,7 @@ struct T : public Item {
   }
 
   virtual symbol_t symbol() const { return symbol_; }
-  virtual item_type type() { return TERMINAL; }
+  virtual item_type type() const { return TERMINAL; }
 
   virtual ostream&
   repr(ostream& os) const
@@ -169,7 +166,7 @@ Sv::SparseVector<string, score_t>* f;
   {
     istringstream ss(s);
     string buf;
-    size_t j = 0, i = 0;
+    size_t j = 0, i = 1;
     r->arity = 0;
     vector<NT*> rhs_non_terminals;
     r->f = new Sv::SparseVector<string, score_t>();
@@ -187,7 +184,7 @@ Sv::SparseVector<string, score_t>* f;
       } else if (j == 2) { // target
         Item* item = vocab.get(buf);
         if (item->type() == NON_TERMINAL) {
-          r->order[i] = item->index();
+          r->order.insert(make_pair(i, item->index()));
           i++;
           if (item->symbol() == "") { // only [1], [2] ... on target
             reinterpret_cast<NT*>(item)->symbol_ = \
@@ -223,9 +220,13 @@ Sv::SparseVector<string, score_t>* f;
     }
     os << "}, f:";
     f->repr(os);
-    os <<  ", arity=" << arity << \
-     ", map:" << "TODO" << \
-     ">";
+    os << ", arity=" << arity << \
+     ", order:{";
+    for (auto it = order.begin(); it != order.end(); it++) {
+      os << it->first << "->" << it->second;
+      if (next(it) != order.end()) os << ", ";
+    }
+    os << "}>";
 
     return os;
   }
@@ -246,14 +247,14 @@ Sv::SparseVector<string, score_t>* f;
     }
     os << " ||| ";
     f->escaped(os);
-    os << " ||| ";
-    os << "TODO";
+    os << " ||| " << \
+     "TODO";
 
     return os;
   };
 
   friend ostream&
-  operator<<(ostream& os, const Rule& r)
+  operator<<(ostream& os, Rule const& r)
   {
     return r.repr(os);
   };
diff --git a/fast/hypergraph.cc b/fast/hypergraph.cc
index d9a51a5..2b33ff4 100644
--- a/fast/hypergraph.cc
+++ b/fast/hypergraph.cc
@@ -69,12 +69,12 @@ viterbi(Hypergraph& hg)
 void
 viterbi_path(Hypergraph& hg, Path& p)
 {
-  //list<Node*>::iterator root = \
+  list<Node*>::iterator root = \
     find_if(hg.nodes.begin(), hg.nodes.end(), \
     [](Node* n) { return n->incoming.size() == 0; });
-  list<Node*>::iterator root = hg.nodes.begin();
+  //list<Node*>::iterator root = hg.nodes.begin();
 
-  //Hg::topological_sort(hg.nodes, root);
+  Hg::topological_sort(hg.nodes, root);
   //  ^^^ FIXME do I need to do this when reading from file?
   Semiring::Viterbi<score_t> semiring;
   Hg::init(hg.nodes, root, semiring);
diff --git a/fast/parse.cc b/fast/parse.cc
deleted file mode 100644
index 06c9fa0..0000000
--- a/fast/parse.cc
+++ /dev/null
@@ -1,55 +0,0 @@
-#include "parse.hh"
-
-
-namespace Parse {
-
-
-} // 
-
-
-vector<G::T> tokenize(string s)
-{
-  istringstream ss(s);
-  vector<G::T> res; 
-  while (ss.good()) {
-    string t;
-    ss >> t;
-    G::T i(t);
-    cout << i.word << endl;
-    res.push_back(i); 
-  }
-  return res;
-}
-
-
-bool operator==(vector<G::Item> const& a, vector<G::Item> const& b)
-{
-  if (a.size() != b.size()) return false;
-  for (auto it: a)
-}
-
-int main(int argc, char** argv)
-{
-  string in("karten haie");
-  vector<G::T> tok = tokenize(in);
-  for (auto it: tok)
-    cout << it.word << ",";
-  cout << endl;
-  size_t n = tok.size();
-
-  G::Grammar g(argv[1]);
-
-  vector<Span> spans; 
-  Parse::visit(spans, 1, 0, 6);
-  for (auto it: spans) {
-    cout << "(" << it.first << "," << it.second << ")" << endl;
-  }
-
-  Parse::Chart active(n);
-  Parse::Chart passive(n);
-
-  //init(tok, n, active, passive, g);
-  
-  cout << *(g.flat.at(0)) << endl;
-}
-
diff --git a/fast/parse.hh b/fast/parse.hh
index 9fbcdea..33ea9ce 100644
--- a/fast/parse.hh
+++ b/fast/parse.hh
@@ -6,6 +6,8 @@
 #include <unordered_map>
 
 #include "grammar.hh"
+#include "util.hh"
+#include "weaver.hh"
 
 
 using namespace std;
@@ -74,9 +76,10 @@ struct Chart
   string h(ChartItem* item, Span s)
   {
     ostringstream ss;
-    ss << item->rule->lhs->symbol;
+    item->rule->lhs->symbol();
     ss << s.first;
     ss << s.second;
+
     return ss.str();
   }
 
@@ -92,9 +95,11 @@ struct Chart
 };
 
 
-void init(vector<G::T> const& in, size_t n, Chart& active,  Chart& passive, G::Grammar const& g)
+void
+init(vector<symbol_t> const& in, size_t n, Chart& active,  Chart& passive, G::Grammar const& g)
 {
-  for (auto rule: g.flat) {
+  for (auto rule: g.rules) {
+    cout << *rule << endl;
   }
 }
 
diff --git a/fast/test/Makefile b/fast/test/Makefile
index 0140f63..65e97ef 100644
--- a/fast/test/Makefile
+++ b/fast/test/Makefile
@@ -3,7 +3,7 @@ CFLAGS=-std=c++11 -O3 -I../
 TCMALLOC=/home/pks/src/weaver/fast/gperftools-2.1/lib/libtcmalloc_minimal.a -pthread
 
 
-all: test_grammar test_sparse_vector
+all: test_grammar test_sparse_vector test_parse
 
 test_grammar: test_grammar.cc ../grammar.hh
 	$(COMPILER) $(CFLAGS) -lstdc++ -lm $(TCMALLOC) test_grammar.cc -o test_grammar
@@ -11,6 +11,9 @@ test_grammar: test_grammar.cc ../grammar.hh
 test_sparse_vector: test_sparse_vector.cc ../sparse_vector.hh
 	$(COMPILER) $(CFLAGS) -lstdc++ -lm $(TCMALLOC) test_sparse_vector.cc -o test_sparse_vector
 
+test_parse: test_parse.cc ../parse.hh ../grammar.hh ../util.hh
+	$(COMPILER) $(CFLAGS) -lstdc++ -lm $(TCMALLOC) test_parse.cc -o test_parse
+
 clean:
 	rm -f test_grammar test_sparse_vector
 
diff --git a/fast/test/test_grammar b/fast/test/test_grammar
index 088d55a..6cf7ad5 100755
Binary files a/fast/test/test_grammar and b/fast/test/test_grammar differ
diff --git a/fast/util.hh b/fast/util.hh
index c3e087e..9ce19da 100644
--- a/fast/util.hh
+++ b/fast/util.hh
@@ -2,13 +2,16 @@
 
 #include <string>
 
+#include "weaver.hh"
+
 using namespace std;
 
 
 namespace util {
 
 inline string
-json_escape(const string& s) { // FIXME: only inline?
+json_escape(const string& s)
+{
   ostringstream os;
   for (auto it = s.cbegin(); it != s.cend(); it++) {
     switch (*it) {
@@ -26,5 +29,19 @@ json_escape(const string& s) { // FIXME: only inline?
   return os.str();
 }
 
+inline vector<symbol_t>
+tokenize(string s)
+{
+  istringstream ss(s);
+  vector<symbol_t> r;
+  while (ss.good()) {
+    string buf;
+    ss >> buf;
+    r.push_back(buf);
+  }
+
+  return r;
+}
+
 } // namespace util
 
diff --git a/fast/weaver.hh b/fast/weaver.hh
index 39d5391..e89b4dd 100644
--- a/fast/weaver.hh
+++ b/fast/weaver.hh
@@ -1,5 +1,10 @@
 #pragma once
 
+#include <string>
+
+using namespace std;
+
+
 typedef double score_t;
 typedef string symbol_t;
 
-- 
cgit v1.2.3