summaryrefslogtreecommitdiff
path: root/gi/evaluation/extract_ccg_labels.py
diff options
context:
space:
mode:
authortrevor.cohn <trevor.cohn@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-12 18:23:01 +0000
committertrevor.cohn <trevor.cohn@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-12 18:23:01 +0000
commit17f8fa666dd0614dbbc520985d0cdcb8b0e69b05 (patch)
tree88ace1d2b8d190e4e1b3dc71947ba394ad9f2649 /gi/evaluation/extract_ccg_labels.py
parent5558d8fc9b67eb4dd98414587082ff3df27daaf9 (diff)
Added slash limits
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@224 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'gi/evaluation/extract_ccg_labels.py')
-rw-r--r--gi/evaluation/extract_ccg_labels.py20
1 files changed, 20 insertions, 0 deletions
diff --git a/gi/evaluation/extract_ccg_labels.py b/gi/evaluation/extract_ccg_labels.py
index 014e0399..5dd6eb65 100644
--- a/gi/evaluation/extract_ccg_labels.py
+++ b/gi/evaluation/extract_ccg_labels.py
@@ -60,12 +60,32 @@ def frontier(node, indices):
else:
return [node]
+def project_heads(node):
+ #print 'project_heads', node
+ is_head = node.data.tag.endswith('-HEAD')
+ if node.children:
+ found = 0
+ for child in node.children:
+ x = project_heads(child)
+ if x:
+ node.data.tag = x
+ found += 1
+ assert found == 1
+ elif is_head:
+ node.data.tag = node.data.tag[:-len('-HEAD')]
+
+ if is_head:
+ return node.data.tag
+ else:
+ return None
+
for tline, eline in itertools.izip(tinfile, einfile):
if tline.strip() != '(())':
if tline.startswith('( '):
tline = tline[2:-1].strip()
tr = tree.parse_PST(tline)
number_leaves(tr)
+ #project_heads(tr) # assumes Bikel-style head annotation for the input trees
else:
tr = None