diff options
author | Chris Dyer <redpony@gmail.com> | 2014-03-30 23:50:17 -0400 |
---|---|---|
committer | Chris Dyer <redpony@gmail.com> | 2014-03-30 23:50:17 -0400 |
commit | 8372086f2fc4bd765fdd05e8cf95faeb147a6587 (patch) | |
tree | fa4ac0342bc1259ce96c61fa9fffb5f8252d0333 /decoder/tree_fragment.h | |
parent | ca29417acd47dbbd2aa68cd31fcd3129e6482bf7 (diff) |
almost complete tree to string translator
Diffstat (limited to 'decoder/tree_fragment.h')
-rw-r--r-- | decoder/tree_fragment.h | 109 |
1 files changed, 79 insertions, 30 deletions
diff --git a/decoder/tree_fragment.h b/decoder/tree_fragment.h index a38dbdfa..b83afc27 100644 --- a/decoder/tree_fragment.h +++ b/decoder/tree_fragment.h @@ -1,7 +1,7 @@ #ifndef TREE_FRAGMENT #define TREE_FRAGMENT -#include <queue> +#include <deque> #include <iostream> #include <vector> #include <string> @@ -12,18 +12,32 @@ namespace cdec { class BreadthFirstIterator; -static const unsigned NT_BIT = 0x40000000u; -static const unsigned FRONTIER_BIT = 0x80000000u; -static const unsigned ALL_MASK = 0x0FFFFFFFu; +static const unsigned LHS_BIT = 0x10000000u; +static const unsigned RHS_BIT = 0x20000000u; +static const unsigned FRONTIER_BIT = 0x40000000u; +static const unsigned RESERVED_BIT = 0x80000000u; +static const unsigned ALL_MASK = 0x0FFFFFFFu; -inline bool IsInternalNT(unsigned x) { - return (x & NT_BIT); +inline bool IsNT(unsigned x) { + return (x & (LHS_BIT | RHS_BIT | FRONTIER_BIT)); +} + +inline bool IsLHS(unsigned x) { + return (x & LHS_BIT); +} + +inline bool IsRHS(unsigned x) { + return (x & RHS_BIT); } inline bool IsFrontier(unsigned x) { return (x & FRONTIER_BIT); } +inline bool IsTerminal(unsigned x) { + return (x & ALL_MASK) == x; +} + struct TreeFragmentProduction { TreeFragmentProduction() {} TreeFragmentProduction(int nttype, const std::vector<unsigned>& r) : lhs(nttype), rhs(r) {} @@ -46,6 +60,7 @@ class TreeFragment { typedef const unsigned & reference; iterator begin() const; + iterator begin(unsigned node_idx) const; iterator end() const; private: @@ -62,24 +77,28 @@ class TreeFragment { }; struct TFIState { - TFIState() : node(), rhspos() {} - TFIState(unsigned n, unsigned p) : node(n), rhspos(p) {} - bool operator==(const TFIState& o) const { return node == o.node && rhspos == o.rhspos; } - bool operator!=(const TFIState& o) const { return node != o.node && rhspos != o.rhspos; } + TFIState() : node(), rhspos(), state() {} + TFIState(unsigned n, unsigned p, unsigned s) : node(n), rhspos(p), state(s) {} + bool operator==(const TFIState& o) const { return node == o.node && rhspos == o.rhspos && state == o.state; } + bool operator!=(const TFIState& o) const { return node != o.node || rhspos != o.rhspos || state != o.state; } unsigned short node; unsigned short rhspos; + unsigned char state; }; class BreadthFirstIterator : public std::iterator<std::forward_iterator_tag, unsigned> { const TreeFragment* tf_; - std::queue<TFIState> q_; + std::deque<TFIState> q_; unsigned sym; public: - explicit BreadthFirstIterator(const TreeFragment* tf) : tf_(tf) { - q_.push(TFIState(tf->nodes.size() - 1, 0)); + BreadthFirstIterator() : tf_(), sym() {} + // used for begin + explicit BreadthFirstIterator(const TreeFragment* tf, unsigned node_idx) : tf_(tf) { + q_.push_back(TFIState(node_idx, 0, 0)); Stage(); } - BreadthFirstIterator(const TreeFragment* tf, int) : tf_(tf) {} + // used for end + explicit BreadthFirstIterator(const TreeFragment* tf) : tf_(tf) {} const unsigned& operator*() const { return sym; } const unsigned* operator->() const { return &sym; } bool operator==(const BreadthFirstIterator& other) const { @@ -88,26 +107,20 @@ class BreadthFirstIterator : public std::iterator<std::forward_iterator_tag, uns bool operator!=(const BreadthFirstIterator& other) const { return (tf_ != other.tf_) || (q_ != other.q_); } - void Stage() { - if (q_.empty()) return; - const TFIState& s = q_.front(); - sym = tf_->nodes[s.node].rhs[s.rhspos]; - if (IsInternalNT(sym)) { - q_.push(TFIState(sym & ALL_MASK, 0)); - sym = tf_->nodes[sym & ALL_MASK].lhs; - } - } const BreadthFirstIterator& operator++() { TFIState& s = q_.front(); - const unsigned len = tf_->nodes[s.node].rhs.size(); - s.rhspos++; - if (s.rhspos > len) { - q_.pop(); + if (s.state == 0) { + s.state++; Stage(); - } else if (s.rhspos == len) { - sym = 0; } else { - Stage(); + const unsigned len = tf_->nodes[s.node].rhs.size(); + s.rhspos++; + if (s.rhspos >= len) { + q_.pop_front(); + Stage(); + } else { + Stage(); + } } return *this; } @@ -116,6 +129,42 @@ class BreadthFirstIterator : public std::iterator<std::forward_iterator_tag, uns ++(*this); return res; } + // tell iterator not to explore the subtree rooted at sym + // should only be called once per NT symbol encountered + const BreadthFirstIterator& truncate() { + assert(IsRHS(sym)); + sym &= ALL_MASK; + sym |= FRONTIER_BIT; + q_.pop_back(); + return *this; + } + BreadthFirstIterator remainder() const { + assert(IsRHS(sym)); + return BreadthFirstIterator(tf_, q_.back()); + } + bool at_end() const { + return q_.empty(); + } + private: + void Stage() { + if (q_.empty()) return; + const TFIState& s = q_.front(); + if (s.state == 0) { + sym = (tf_->nodes[s.node].lhs & ALL_MASK) | LHS_BIT; + } else { + sym = tf_->nodes[s.node].rhs[s.rhspos]; + if (IsRHS(sym)) { + q_.push_back(TFIState(sym & ALL_MASK, 0, 0)); + sym = tf_->nodes[sym & ALL_MASK].lhs | RHS_BIT; + } + } + } + + // used by remainder + BreadthFirstIterator(const TreeFragment* tf, const TFIState& s) : tf_(tf) { + q_.push_back(s); + Stage(); + } }; inline std::ostream& operator<<(std::ostream& os, const TreeFragment& x) { |