From d124d4aaa78b52b46f7ac8d7306be342d3405124 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 1 Jul 2014 13:49:47 -0400 Subject: track spans in t2s translation --- decoder/tree_fragment.cc | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'decoder/tree_fragment.cc') diff --git a/decoder/tree_fragment.cc b/decoder/tree_fragment.cc index aad0b2c4..42f7793a 100644 --- a/decoder/tree_fragment.cc +++ b/decoder/tree_fragment.cc @@ -28,12 +28,14 @@ TreeFragment::TreeFragment(const StringPiece& tree, bool allow_frontier_sites) { unsigned cp = 0, symp = 0, np = 0; ParseRec(tree, allow_frontier_sites, cp, symp, np, &cp, &symp, &np); root = nodes.back().lhs; + if (!allow_frontier_sites) SetupSpansRec(open - 1, 0); //cerr << "ROOT: " << TD::Convert(root & ALL_MASK) << endl; //DebugRec(open - 1, &cerr); cerr << "\n"; } void TreeFragment::DebugRec(unsigned cur, ostream* out) const { *out << '(' << TD::Convert(nodes[cur].lhs & ALL_MASK); + // *out << "_{" << nodes[cur].span.first << ',' << nodes[cur].span.second << '}'; for (auto& x : nodes[cur].rhs) { *out << ' '; if (IsFrontier(x)) { @@ -47,6 +49,21 @@ void TreeFragment::DebugRec(unsigned cur, ostream* out) const { *out << ')'; } +// returns left + the number of terminals rooted at NT cur, +int TreeFragment::SetupSpansRec(unsigned cur, int left) { + int right = left; + for (auto& x : nodes[cur].rhs) { + if (IsRHS(x)) { + right = SetupSpansRec(x & ALL_MASK, right); + } else { + ++right; + } + } + nodes[cur].span.first = left; + nodes[cur].span.second = right; + return right; +} + // cp is the character index in the tree // np keeps track of the nodes (nonterminals) that have been built // symp keeps track of the terminal symbols that have been built -- cgit v1.2.3