diff options
author | Chris Dyer <redpony@gmail.com> | 2014-03-27 00:07:41 -0400 |
---|---|---|
committer | Chris Dyer <redpony@gmail.com> | 2014-03-27 00:07:41 -0400 |
commit | 34785db78a0ad12f0fe74d98924acc20a8cab79a (patch) | |
tree | ad7bf89ee42021e167174a64f47f0141b3ac774c /decoder | |
parent | 63a3894f2f2649787d5656adb09579e494c791d2 (diff) |
breadth first iterator for tree fragment
Diffstat (limited to 'decoder')
-rw-r--r-- | decoder/tree2string_translator.cc | 32 | ||||
-rw-r--r-- | decoder/tree_fragment.cc | 8 | ||||
-rw-r--r-- | decoder/tree_fragment.h | 73 |
3 files changed, 93 insertions, 20 deletions
diff --git a/decoder/tree2string_translator.cc b/decoder/tree2string_translator.cc index ac9c0d74..1c249836 100644 --- a/decoder/tree2string_translator.cc +++ b/decoder/tree2string_translator.cc @@ -13,25 +13,6 @@ using namespace std; -// root: S -// A implication: (S [A] *INCOMPLETE* -// B implication: (S [A] [B] *INCOMPLETE* -// *0* implication: (S _[A] [B]) -// a implication: (S (A a *INCOMPLETE* [B]) -// a implication: (S (A a a *INCOMPLETE* [B]) -// *0* implication: (S (A a a) _[B]) -// D implication: (S (A a a) (B [D] *INCOMPLETE*) -// *0* implication: (S (A a a) (B _[D])) -// d implication: (S (A a a) (B (D d *INCOMPLETE*)) -// *0* implication: (S (A a a) (B (D d))) -// --there are no further outgoing links possible-- - -// root: S -// A implication: (S [A] *INCOMPLETE* -// B implication: (S [A] [B] *INCOMPLETE* -// *0* implication: (S _[A] [B]) -// *0* implication: (S [A] _[B]) -// b implication: (S [A] (B b *INCOMPLETE*)) struct Tree2StringGrammarNode { map<unsigned, Tree2StringGrammarNode> next; string rules; @@ -44,8 +25,19 @@ void ReadTree2StringGrammar(istream* in, unordered_map<unsigned, Tree2StringGram size_t pos = line.find("|||"); assert(pos != string::npos); assert(pos > 3); - if (line[pos - 1] == ' ') --pos; + unsigned xc = 0; + while (line[pos - 1] == ' ') { --pos; xc++; } cdec::TreeFragment rule_src(line.substr(0, pos), true); + Tree2StringGrammarNode* cur = &roots[rule_src.root]; + for (auto sym : rule_src) + cur = &cur->next[sym]; + pos += 3 + xc; + while(line[pos] == ' ') { ++pos; } + size_t pos2 = line.find("|||", pos); + assert(pos2 != string::npos); + while (line[pos2 - 1] == ' ') { --pos2; } + cur->rules = line.substr(pos, pos2 - pos); + cerr << "OUTPUT = '" << cur->rules << "'\n"; } } diff --git a/decoder/tree_fragment.cc b/decoder/tree_fragment.cc index d5c30f58..93aad64e 100644 --- a/decoder/tree_fragment.cc +++ b/decoder/tree_fragment.cc @@ -112,4 +112,12 @@ void TreeFragment::ParseRec(const string& tree, bool afs, unsigned cp, unsigned *psymp = symp; } +BreadthFirstIterator TreeFragment::begin() const { + return BreadthFirstIterator(this); +} + +BreadthFirstIterator TreeFragment::end() const { + return BreadthFirstIterator(this, 0); +} + } diff --git a/decoder/tree_fragment.h b/decoder/tree_fragment.h index 83cd1c1e..b1dbbae0 100644 --- a/decoder/tree_fragment.h +++ b/decoder/tree_fragment.h @@ -1,6 +1,7 @@ #ifndef TREE_FRAGMENT #define TREE_FRAGMENT +#include <queue> #include <iostream> #include <vector> #include <string> @@ -9,6 +10,8 @@ namespace cdec { +class BreadthFirstIterator; + static const unsigned NT_BIT = 0x40000000u; static const unsigned FRONTIER_BIT = 0x80000000u; static const unsigned ALL_MASK = 0x0FFFFFFFu; @@ -36,6 +39,15 @@ class TreeFragment { // (S (NP a (X b) c d) (VP (V foo) (NP (NN bar)))) explicit TreeFragment(const std::string& tree, bool allow_frontier_sites = false); void DebugRec(unsigned cur, std::ostream* out) const; + typedef BreadthFirstIterator iterator; + typedef ptrdiff_t difference_type; + typedef unsigned value_type; + typedef const unsigned * pointer; + typedef const unsigned & reference; + + iterator begin() const; + iterator end() const; + private: // cp is the character index in the tree // np keeps track of the nodes (nonterminals) that have been built @@ -49,6 +61,67 @@ class TreeFragment { std::vector<TreeFragmentProduction> nodes; }; +struct TFIState { + TFIState() : node(), rhspos() {} + TFIState(unsigned n, unsigned p) : node(n), rhspos(p) {} + bool operator==(const TFIState& o) const { return node == o.node && rhspos == o.rhspos; } + bool operator!=(const TFIState& o) const { return node != o.node && rhspos != o.rhspos; } + unsigned short node; + unsigned short rhspos; +}; + +class BreadthFirstIterator : public std::iterator<std::forward_iterator_tag, unsigned> { + const TreeFragment* tf_; + std::queue<TFIState> q_; + unsigned sym; + public: + explicit BreadthFirstIterator(const TreeFragment* tf) : tf_(tf) { + q_.push(TFIState(tf->nodes.size() - 1, 0)); + Stage(); + } + BreadthFirstIterator(const TreeFragment* tf, int) : tf_(tf) {} + const unsigned& operator*() const { return sym; } + const unsigned* operator->() const { return &sym; } + bool operator==(const BreadthFirstIterator& other) const { + return (tf_ == other.tf_) && (q_ == other.q_); + } + bool operator!=(const BreadthFirstIterator& other) const { + return (tf_ != other.tf_) || (q_ != other.q_); + } + void Stage() { + if (q_.empty()) return; + const TFIState& s = q_.front(); + if (s.rhspos < 0) { + sym = tf_->nodes[s.node].lhs; + } else { + sym = tf_->nodes[s.node].rhs[s.rhspos]; + if (IsInternalNT(sym)) { + q_.push(TFIState(sym & ALL_MASK, 0)); + sym = tf_->nodes[sym & ALL_MASK].lhs; + } + } + } + const BreadthFirstIterator& operator++() { + TFIState& s = q_.front(); + const unsigned len = tf_->nodes[s.node].rhs.size(); + s.rhspos++; + if (s.rhspos > len) { + q_.pop(); + Stage(); + } else if (s.rhspos == len) { + sym = 0; + } else { + Stage(); + } + return *this; + } + BreadthFirstIterator operator++(int) { + BreadthFirstIterator res = *this; + ++(*this); + return res; + } +}; + inline std::ostream& operator<<(std::ostream& os, const TreeFragment& x) { x.DebugRec(x.nodes.size() - 1, &os); return os; |