From 899f78c7c4c3a8cff97494665ed52ddb3460d44a Mon Sep 17 00:00:00 2001 From: Victor Chahuneau Date: Thu, 21 Jun 2012 17:27:48 -0400 Subject: Allow SA rule extraction to write to a python buffer + very small sa-extract cleanup --- sa-extract/rulefactory.pyx | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'sa-extract/rulefactory.pyx') diff --git a/sa-extract/rulefactory.pyx b/sa-extract/rulefactory.pyx index 20ea80d2..792489c4 100644 --- a/sa-extract/rulefactory.pyx +++ b/sa-extract/rulefactory.pyx @@ -1321,7 +1321,7 @@ cdef class HieroCachingRuleFactory: candidate.append([next_id,curr[1]+jump]) return sorted(result); - def input(self, fwords, meta): + def input(self, fwords, meta=None, output=None): '''When this function is called on the RuleFactory, it looks up all of the rules that can be used to translate the input sentence''' @@ -1342,13 +1342,14 @@ cdef class HieroCachingRuleFactory: nodes_isteps_away_buffer = {} hit = 0 reachable_buffer = {} - #print "id = ",meta - #print "rule_file = ",self.rule_file - dattrs = sgml.attrs_to_dict(meta) - id = dattrs.get('id', 'NOID') - if self.per_sentence_grammar: + if meta: + dattrs = sgml.attrs_to_dict(meta) + id = dattrs.get('id', 'NOID') + self.excluded_sent_id = int(dattrs.get('exclude', '-1')) + if output: + self.rule_filehandler = output + elif self.per_sentence_grammar: self.rule_filehandler = open(self.rule_file+'.'+id, 'w') - self.excluded_sent_id = int(dattrs.get('exclude', '-1')) #print "max_initial_size = %i" % self.max_initial_size -- cgit v1.2.3