summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Dyer <redpony@gmail.com>2014-03-27 00:07:41 -0400
committerChris Dyer <redpony@gmail.com>2014-03-27 00:07:41 -0400
commit34785db78a0ad12f0fe74d98924acc20a8cab79a (patch)
treead7bf89ee42021e167174a64f47f0141b3ac774c
parent63a3894f2f2649787d5656adb09579e494c791d2 (diff)
breadth first iterator for tree fragment
-rw-r--r--decoder/tree2string_translator.cc32
-rw-r--r--decoder/tree_fragment.cc8
-rw-r--r--decoder/tree_fragment.h73
3 files changed, 93 insertions, 20 deletions
diff --git a/decoder/tree2string_translator.cc b/decoder/tree2string_translator.cc
index ac9c0d74..1c249836 100644
--- a/decoder/tree2string_translator.cc
+++ b/decoder/tree2string_translator.cc
@@ -13,25 +13,6 @@
using namespace std;
-// root: S
-// A implication: (S [A] *INCOMPLETE*
-// B implication: (S [A] [B] *INCOMPLETE*
-// *0* implication: (S _[A] [B])
-// a implication: (S (A a *INCOMPLETE* [B])
-// a implication: (S (A a a *INCOMPLETE* [B])
-// *0* implication: (S (A a a) _[B])
-// D implication: (S (A a a) (B [D] *INCOMPLETE*)
-// *0* implication: (S (A a a) (B _[D]))
-// d implication: (S (A a a) (B (D d *INCOMPLETE*))
-// *0* implication: (S (A a a) (B (D d)))
-// --there are no further outgoing links possible--
-
-// root: S
-// A implication: (S [A] *INCOMPLETE*
-// B implication: (S [A] [B] *INCOMPLETE*
-// *0* implication: (S _[A] [B])
-// *0* implication: (S [A] _[B])
-// b implication: (S [A] (B b *INCOMPLETE*))
struct Tree2StringGrammarNode {
map<unsigned, Tree2StringGrammarNode> next;
string rules;
@@ -44,8 +25,19 @@ void ReadTree2StringGrammar(istream* in, unordered_map<unsigned, Tree2StringGram
size_t pos = line.find("|||");
assert(pos != string::npos);
assert(pos > 3);
- if (line[pos - 1] == ' ') --pos;
+ unsigned xc = 0;
+ while (line[pos - 1] == ' ') { --pos; xc++; }
cdec::TreeFragment rule_src(line.substr(0, pos), true);
+ Tree2StringGrammarNode* cur = &roots[rule_src.root];
+ for (auto sym : rule_src)
+ cur = &cur->next[sym];
+ pos += 3 + xc;
+ while(line[pos] == ' ') { ++pos; }
+ size_t pos2 = line.find("|||", pos);
+ assert(pos2 != string::npos);
+ while (line[pos2 - 1] == ' ') { --pos2; }
+ cur->rules = line.substr(pos, pos2 - pos);
+ cerr << "OUTPUT = '" << cur->rules << "'\n";
}
}
diff --git a/decoder/tree_fragment.cc b/decoder/tree_fragment.cc
index d5c30f58..93aad64e 100644
--- a/decoder/tree_fragment.cc
+++ b/decoder/tree_fragment.cc
@@ -112,4 +112,12 @@ void TreeFragment::ParseRec(const string& tree, bool afs, unsigned cp, unsigned
*psymp = symp;
}
+BreadthFirstIterator TreeFragment::begin() const {
+ return BreadthFirstIterator(this);
+}
+
+BreadthFirstIterator TreeFragment::end() const {
+ return BreadthFirstIterator(this, 0);
+}
+
}
diff --git a/decoder/tree_fragment.h b/decoder/tree_fragment.h
index 83cd1c1e..b1dbbae0 100644
--- a/decoder/tree_fragment.h
+++ b/decoder/tree_fragment.h
@@ -1,6 +1,7 @@
#ifndef TREE_FRAGMENT
#define TREE_FRAGMENT
+#include <queue>
#include <iostream>
#include <vector>
#include <string>
@@ -9,6 +10,8 @@
namespace cdec {
+class BreadthFirstIterator;
+
static const unsigned NT_BIT = 0x40000000u;
static const unsigned FRONTIER_BIT = 0x80000000u;
static const unsigned ALL_MASK = 0x0FFFFFFFu;
@@ -36,6 +39,15 @@ class TreeFragment {
// (S (NP a (X b) c d) (VP (V foo) (NP (NN bar))))
explicit TreeFragment(const std::string& tree, bool allow_frontier_sites = false);
void DebugRec(unsigned cur, std::ostream* out) const;
+ typedef BreadthFirstIterator iterator;
+ typedef ptrdiff_t difference_type;
+ typedef unsigned value_type;
+ typedef const unsigned * pointer;
+ typedef const unsigned & reference;
+
+ iterator begin() const;
+ iterator end() const;
+
private:
// cp is the character index in the tree
// np keeps track of the nodes (nonterminals) that have been built
@@ -49,6 +61,67 @@ class TreeFragment {
std::vector<TreeFragmentProduction> nodes;
};
+struct TFIState {
+ TFIState() : node(), rhspos() {}
+ TFIState(unsigned n, unsigned p) : node(n), rhspos(p) {}
+ bool operator==(const TFIState& o) const { return node == o.node && rhspos == o.rhspos; }
+ bool operator!=(const TFIState& o) const { return node != o.node && rhspos != o.rhspos; }
+ unsigned short node;
+ unsigned short rhspos;
+};
+
+class BreadthFirstIterator : public std::iterator<std::forward_iterator_tag, unsigned> {
+ const TreeFragment* tf_;
+ std::queue<TFIState> q_;
+ unsigned sym;
+ public:
+ explicit BreadthFirstIterator(const TreeFragment* tf) : tf_(tf) {
+ q_.push(TFIState(tf->nodes.size() - 1, 0));
+ Stage();
+ }
+ BreadthFirstIterator(const TreeFragment* tf, int) : tf_(tf) {}
+ const unsigned& operator*() const { return sym; }
+ const unsigned* operator->() const { return &sym; }
+ bool operator==(const BreadthFirstIterator& other) const {
+ return (tf_ == other.tf_) && (q_ == other.q_);
+ }
+ bool operator!=(const BreadthFirstIterator& other) const {
+ return (tf_ != other.tf_) || (q_ != other.q_);
+ }
+ void Stage() {
+ if (q_.empty()) return;
+ const TFIState& s = q_.front();
+ if (s.rhspos < 0) {
+ sym = tf_->nodes[s.node].lhs;
+ } else {
+ sym = tf_->nodes[s.node].rhs[s.rhspos];
+ if (IsInternalNT(sym)) {
+ q_.push(TFIState(sym & ALL_MASK, 0));
+ sym = tf_->nodes[sym & ALL_MASK].lhs;
+ }
+ }
+ }
+ const BreadthFirstIterator& operator++() {
+ TFIState& s = q_.front();
+ const unsigned len = tf_->nodes[s.node].rhs.size();
+ s.rhspos++;
+ if (s.rhspos > len) {
+ q_.pop();
+ Stage();
+ } else if (s.rhspos == len) {
+ sym = 0;
+ } else {
+ Stage();
+ }
+ return *this;
+ }
+ BreadthFirstIterator operator++(int) {
+ BreadthFirstIterator res = *this;
+ ++(*this);
+ return res;
+ }
+};
+
inline std::ostream& operator<<(std::ostream& os, const TreeFragment& x) {
x.DebugRec(x.nodes.size() - 1, &os);
return os;