summaryrefslogtreecommitdiff
path: root/decoder/tree_fragment.h
diff options
context:
space:
mode:
authorChris Dyer <redpony@gmail.com>2014-03-30 23:50:17 -0400
committerChris Dyer <redpony@gmail.com>2014-03-30 23:50:17 -0400
commit8372086f2fc4bd765fdd05e8cf95faeb147a6587 (patch)
treefa4ac0342bc1259ce96c61fa9fffb5f8252d0333 /decoder/tree_fragment.h
parentca29417acd47dbbd2aa68cd31fcd3129e6482bf7 (diff)
almost complete tree to string translator
Diffstat (limited to 'decoder/tree_fragment.h')
-rw-r--r--decoder/tree_fragment.h109
1 files changed, 79 insertions, 30 deletions
diff --git a/decoder/tree_fragment.h b/decoder/tree_fragment.h
index a38dbdfa..b83afc27 100644
--- a/decoder/tree_fragment.h
+++ b/decoder/tree_fragment.h
@@ -1,7 +1,7 @@
#ifndef TREE_FRAGMENT
#define TREE_FRAGMENT
-#include <queue>
+#include <deque>
#include <iostream>
#include <vector>
#include <string>
@@ -12,18 +12,32 @@ namespace cdec {
class BreadthFirstIterator;
-static const unsigned NT_BIT = 0x40000000u;
-static const unsigned FRONTIER_BIT = 0x80000000u;
-static const unsigned ALL_MASK = 0x0FFFFFFFu;
+static const unsigned LHS_BIT = 0x10000000u;
+static const unsigned RHS_BIT = 0x20000000u;
+static const unsigned FRONTIER_BIT = 0x40000000u;
+static const unsigned RESERVED_BIT = 0x80000000u;
+static const unsigned ALL_MASK = 0x0FFFFFFFu;
-inline bool IsInternalNT(unsigned x) {
- return (x & NT_BIT);
+inline bool IsNT(unsigned x) {
+ return (x & (LHS_BIT | RHS_BIT | FRONTIER_BIT));
+}
+
+inline bool IsLHS(unsigned x) {
+ return (x & LHS_BIT);
+}
+
+inline bool IsRHS(unsigned x) {
+ return (x & RHS_BIT);
}
inline bool IsFrontier(unsigned x) {
return (x & FRONTIER_BIT);
}
+inline bool IsTerminal(unsigned x) {
+ return (x & ALL_MASK) == x;
+}
+
struct TreeFragmentProduction {
TreeFragmentProduction() {}
TreeFragmentProduction(int nttype, const std::vector<unsigned>& r) : lhs(nttype), rhs(r) {}
@@ -46,6 +60,7 @@ class TreeFragment {
typedef const unsigned & reference;
iterator begin() const;
+ iterator begin(unsigned node_idx) const;
iterator end() const;
private:
@@ -62,24 +77,28 @@ class TreeFragment {
};
struct TFIState {
- TFIState() : node(), rhspos() {}
- TFIState(unsigned n, unsigned p) : node(n), rhspos(p) {}
- bool operator==(const TFIState& o) const { return node == o.node && rhspos == o.rhspos; }
- bool operator!=(const TFIState& o) const { return node != o.node && rhspos != o.rhspos; }
+ TFIState() : node(), rhspos(), state() {}
+ TFIState(unsigned n, unsigned p, unsigned s) : node(n), rhspos(p), state(s) {}
+ bool operator==(const TFIState& o) const { return node == o.node && rhspos == o.rhspos && state == o.state; }
+ bool operator!=(const TFIState& o) const { return node != o.node || rhspos != o.rhspos || state != o.state; }
unsigned short node;
unsigned short rhspos;
+ unsigned char state;
};
class BreadthFirstIterator : public std::iterator<std::forward_iterator_tag, unsigned> {
const TreeFragment* tf_;
- std::queue<TFIState> q_;
+ std::deque<TFIState> q_;
unsigned sym;
public:
- explicit BreadthFirstIterator(const TreeFragment* tf) : tf_(tf) {
- q_.push(TFIState(tf->nodes.size() - 1, 0));
+ BreadthFirstIterator() : tf_(), sym() {}
+ // used for begin
+ explicit BreadthFirstIterator(const TreeFragment* tf, unsigned node_idx) : tf_(tf) {
+ q_.push_back(TFIState(node_idx, 0, 0));
Stage();
}
- BreadthFirstIterator(const TreeFragment* tf, int) : tf_(tf) {}
+ // used for end
+ explicit BreadthFirstIterator(const TreeFragment* tf) : tf_(tf) {}
const unsigned& operator*() const { return sym; }
const unsigned* operator->() const { return &sym; }
bool operator==(const BreadthFirstIterator& other) const {
@@ -88,26 +107,20 @@ class BreadthFirstIterator : public std::iterator<std::forward_iterator_tag, uns
bool operator!=(const BreadthFirstIterator& other) const {
return (tf_ != other.tf_) || (q_ != other.q_);
}
- void Stage() {
- if (q_.empty()) return;
- const TFIState& s = q_.front();
- sym = tf_->nodes[s.node].rhs[s.rhspos];
- if (IsInternalNT(sym)) {
- q_.push(TFIState(sym & ALL_MASK, 0));
- sym = tf_->nodes[sym & ALL_MASK].lhs;
- }
- }
const BreadthFirstIterator& operator++() {
TFIState& s = q_.front();
- const unsigned len = tf_->nodes[s.node].rhs.size();
- s.rhspos++;
- if (s.rhspos > len) {
- q_.pop();
+ if (s.state == 0) {
+ s.state++;
Stage();
- } else if (s.rhspos == len) {
- sym = 0;
} else {
- Stage();
+ const unsigned len = tf_->nodes[s.node].rhs.size();
+ s.rhspos++;
+ if (s.rhspos >= len) {
+ q_.pop_front();
+ Stage();
+ } else {
+ Stage();
+ }
}
return *this;
}
@@ -116,6 +129,42 @@ class BreadthFirstIterator : public std::iterator<std::forward_iterator_tag, uns
++(*this);
return res;
}
+ // tell iterator not to explore the subtree rooted at sym
+ // should only be called once per NT symbol encountered
+ const BreadthFirstIterator& truncate() {
+ assert(IsRHS(sym));
+ sym &= ALL_MASK;
+ sym |= FRONTIER_BIT;
+ q_.pop_back();
+ return *this;
+ }
+ BreadthFirstIterator remainder() const {
+ assert(IsRHS(sym));
+ return BreadthFirstIterator(tf_, q_.back());
+ }
+ bool at_end() const {
+ return q_.empty();
+ }
+ private:
+ void Stage() {
+ if (q_.empty()) return;
+ const TFIState& s = q_.front();
+ if (s.state == 0) {
+ sym = (tf_->nodes[s.node].lhs & ALL_MASK) | LHS_BIT;
+ } else {
+ sym = tf_->nodes[s.node].rhs[s.rhspos];
+ if (IsRHS(sym)) {
+ q_.push_back(TFIState(sym & ALL_MASK, 0, 0));
+ sym = tf_->nodes[sym & ALL_MASK].lhs | RHS_BIT;
+ }
+ }
+ }
+
+ // used by remainder
+ BreadthFirstIterator(const TreeFragment* tf, const TFIState& s) : tf_(tf) {
+ q_.push_back(s);
+ Stage();
+ }
};
inline std::ostream& operator<<(std::ostream& os, const TreeFragment& x) {