diff options
Diffstat (limited to 'decoder')
| -rw-r--r-- | decoder/tree2string_translator.cc | 32 | ||||
| -rw-r--r-- | decoder/tree_fragment.cc | 8 | ||||
| -rw-r--r-- | decoder/tree_fragment.h | 73 | 
3 files changed, 93 insertions, 20 deletions
diff --git a/decoder/tree2string_translator.cc b/decoder/tree2string_translator.cc index ac9c0d74..1c249836 100644 --- a/decoder/tree2string_translator.cc +++ b/decoder/tree2string_translator.cc @@ -13,25 +13,6 @@  using namespace std; -// root: S -// A          implication: (S [A] *INCOMPLETE* -// B          implication: (S [A] [B] *INCOMPLETE* -// *0*        implication: (S _[A] [B]) -// a          implication: (S (A a *INCOMPLETE* [B]) -// a          implication: (S (A a a *INCOMPLETE* [B]) -// *0*        implication: (S (A a a) _[B]) -// D          implication: (S (A a a) (B [D] *INCOMPLETE*) -// *0*        implication: (S (A a a) (B _[D])) -// d          implication: (S (A a a) (B (D d *INCOMPLETE*)) -// *0*        implication: (S (A a a) (B (D d))) -// --there are no further outgoing links possible-- - -// root: S -// A          implication: (S [A] *INCOMPLETE* -// B          implication: (S [A] [B] *INCOMPLETE* -// *0*        implication: (S _[A] [B]) -// *0*        implication: (S [A] _[B]) -// b          implication: (S [A] (B b *INCOMPLETE*))  struct Tree2StringGrammarNode {    map<unsigned, Tree2StringGrammarNode> next;    string rules; @@ -44,8 +25,19 @@ void ReadTree2StringGrammar(istream* in, unordered_map<unsigned, Tree2StringGram      size_t pos = line.find("|||");      assert(pos != string::npos);      assert(pos > 3); -    if (line[pos - 1] == ' ') --pos; +    unsigned xc = 0; +    while (line[pos - 1] == ' ') { --pos; xc++; }      cdec::TreeFragment rule_src(line.substr(0, pos), true); +    Tree2StringGrammarNode* cur = &roots[rule_src.root]; +    for (auto sym : rule_src) +      cur = &cur->next[sym]; +    pos += 3 + xc; +    while(line[pos] == ' ') { ++pos; } +    size_t pos2 = line.find("|||", pos); +    assert(pos2 != string::npos); +    while (line[pos2 - 1] == ' ') { --pos2; } +    cur->rules = line.substr(pos, pos2 - pos); +    cerr << "OUTPUT = '" << cur->rules << "'\n";    }  } diff --git a/decoder/tree_fragment.cc b/decoder/tree_fragment.cc index d5c30f58..93aad64e 100644 --- a/decoder/tree_fragment.cc +++ b/decoder/tree_fragment.cc @@ -112,4 +112,12 @@ void TreeFragment::ParseRec(const string& tree, bool afs, unsigned cp, unsigned    *psymp = symp;  } +BreadthFirstIterator TreeFragment::begin() const { +  return BreadthFirstIterator(this); +} + +BreadthFirstIterator TreeFragment::end() const { +  return BreadthFirstIterator(this, 0); +} +  } diff --git a/decoder/tree_fragment.h b/decoder/tree_fragment.h index 83cd1c1e..b1dbbae0 100644 --- a/decoder/tree_fragment.h +++ b/decoder/tree_fragment.h @@ -1,6 +1,7 @@  #ifndef TREE_FRAGMENT  #define TREE_FRAGMENT +#include <queue>  #include <iostream>  #include <vector>  #include <string> @@ -9,6 +10,8 @@  namespace cdec { +class BreadthFirstIterator; +  static const unsigned NT_BIT       = 0x40000000u;  static const unsigned FRONTIER_BIT = 0x80000000u;  static const unsigned ALL_MASK     = 0x0FFFFFFFu; @@ -36,6 +39,15 @@ class TreeFragment {    // (S (NP a (X b) c d) (VP (V foo) (NP (NN bar))))    explicit TreeFragment(const std::string& tree, bool allow_frontier_sites = false);    void DebugRec(unsigned cur, std::ostream* out) const; +  typedef BreadthFirstIterator iterator; +  typedef ptrdiff_t difference_type; +  typedef unsigned value_type; +  typedef const unsigned * pointer; +  typedef const unsigned & reference; + +  iterator begin() const; +  iterator end() const; +   private:    // cp is the character index in the tree    // np keeps track of the nodes (nonterminals) that have been built @@ -49,6 +61,67 @@ class TreeFragment {    std::vector<TreeFragmentProduction> nodes;  }; +struct TFIState { +  TFIState() : node(), rhspos() {} +  TFIState(unsigned n, unsigned p) : node(n), rhspos(p) {} +  bool operator==(const TFIState& o) const { return node == o.node && rhspos == o.rhspos; } +  bool operator!=(const TFIState& o) const { return node != o.node && rhspos != o.rhspos; } +  unsigned short node; +  unsigned short rhspos; +}; + +class BreadthFirstIterator : public std::iterator<std::forward_iterator_tag, unsigned> { +  const TreeFragment* tf_; +  std::queue<TFIState> q_; +  unsigned sym; + public: +  explicit BreadthFirstIterator(const TreeFragment* tf) : tf_(tf) { +    q_.push(TFIState(tf->nodes.size() - 1, 0)); +    Stage(); +  } +  BreadthFirstIterator(const TreeFragment* tf, int) : tf_(tf) {} +  const unsigned& operator*() const { return sym; } +  const unsigned* operator->() const { return &sym; } +  bool operator==(const BreadthFirstIterator& other) const { +    return (tf_ == other.tf_) && (q_ == other.q_); +  } +  bool operator!=(const BreadthFirstIterator& other) const { +    return (tf_ != other.tf_) || (q_ != other.q_); +  } +  void Stage() { +    if (q_.empty()) return; +    const TFIState& s = q_.front(); +    if (s.rhspos < 0) { +      sym = tf_->nodes[s.node].lhs; +    } else { +      sym = tf_->nodes[s.node].rhs[s.rhspos]; +      if (IsInternalNT(sym)) { +        q_.push(TFIState(sym & ALL_MASK, 0)); +        sym = tf_->nodes[sym & ALL_MASK].lhs; +      } +    } +  } +  const BreadthFirstIterator& operator++() { +    TFIState& s = q_.front(); +    const unsigned len = tf_->nodes[s.node].rhs.size(); +    s.rhspos++; +    if (s.rhspos > len) { +      q_.pop(); +      Stage(); +    } else if (s.rhspos == len) { +      sym = 0; +    } else { +      Stage(); +    } +    return *this; +  } +  BreadthFirstIterator operator++(int) { +    BreadthFirstIterator res = *this; +    ++(*this); +    return res; +  } +}; +  inline std::ostream& operator<<(std::ostream& os, const TreeFragment& x) {    x.DebugRec(x.nodes.size() - 1, &os);    return os;  | 
