summaryrefslogtreecommitdiff
path: root/gi/evaluation/extract_ccg_labels.py
diff options
context:
space:
mode:
Diffstat (limited to 'gi/evaluation/extract_ccg_labels.py')
-rw-r--r--gi/evaluation/extract_ccg_labels.py20
1 files changed, 20 insertions, 0 deletions
diff --git a/gi/evaluation/extract_ccg_labels.py b/gi/evaluation/extract_ccg_labels.py
index 014e0399..5dd6eb65 100644
--- a/gi/evaluation/extract_ccg_labels.py
+++ b/gi/evaluation/extract_ccg_labels.py
@@ -60,12 +60,32 @@ def frontier(node, indices):
else:
return [node]
+def project_heads(node):
+ #print 'project_heads', node
+ is_head = node.data.tag.endswith('-HEAD')
+ if node.children:
+ found = 0
+ for child in node.children:
+ x = project_heads(child)
+ if x:
+ node.data.tag = x
+ found += 1
+ assert found == 1
+ elif is_head:
+ node.data.tag = node.data.tag[:-len('-HEAD')]
+
+ if is_head:
+ return node.data.tag
+ else:
+ return None
+
for tline, eline in itertools.izip(tinfile, einfile):
if tline.strip() != '(())':
if tline.startswith('( '):
tline = tline[2:-1].strip()
tr = tree.parse_PST(tline)
number_leaves(tr)
+ #project_heads(tr) # assumes Bikel-style head annotation for the input trees
else:
tr = None