From 5d159b948ad71850bcb03d0882ea7183a3a59b7e Mon Sep 17 00:00:00 2001 From: Adam Lopez Date: Thu, 6 Sep 2012 15:00:45 -0400 Subject: add FeatureContext.input_span --- python/src/sa/rulefactory.pxi | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'python/src/sa/rulefactory.pxi') diff --git a/python/src/sa/rulefactory.pxi b/python/src/sa/rulefactory.pxi index 248105d3..5f6558b3 100644 --- a/python/src/sa/rulefactory.pxi +++ b/python/src/sa/rulefactory.pxi @@ -19,6 +19,7 @@ FeatureContext = namedtuple('FeatureContext', 'fsample_count', 'input_span', 'matches', + 'input_match', 'test_sentence', 'f_text', 'e_text', @@ -960,7 +961,7 @@ cdef class HieroCachingRuleFactory: for i in range(len(fwords)): for alt in range(0, len(fwords[i])): if fwords[i][alt][0] != EPSILON: - frontier.append((i, i, alt, 0, self.rules.root, (), False)) + frontier.append((i, i, (i,), alt, 0, self.rules.root, (), False)) xroot = None x1 = sym_setindex(self.category, 1) @@ -973,7 +974,7 @@ cdef class HieroCachingRuleFactory: for i in range(self.min_gap_size, len(fwords)): for alt in range(0, len(fwords[i])): if fwords[i][alt][0] != EPSILON: - frontier.append((i-self.min_gap_size, i, alt, self.min_gap_size, xroot, (x1,), True)) + frontier.append((i-self.min_gap_size, i, (i,), alt, self.min_gap_size, xroot, (x1,), True)) next_states = [] for i in range(len(fwords)): @@ -981,7 +982,7 @@ cdef class HieroCachingRuleFactory: while len(frontier) > 0: new_frontier = [] - for k, i, alt, pathlen, node, prefix, is_shadow_path in frontier: + for k, i, input_match, alt, pathlen, node, prefix, is_shadow_path in frontier: word_id = fwords[i][alt][0] spanlen = fwords[i][alt][2] # TODO get rid of k -- pathlen is replacing it @@ -990,7 +991,7 @@ cdef class HieroCachingRuleFactory: if i+spanlen >= len(fwords): continue for nualt in range(0,len(fwords[i+spanlen])): - frontier.append((k, i+spanlen, nualt, pathlen, node, prefix, is_shadow_path)) + frontier.append((k, i+spanlen, input_match, nualt, pathlen, node, prefix, is_shadow_path)) continue phrase = prefix + (word_id,) @@ -1106,13 +1107,14 @@ cdef class HieroCachingRuleFactory: count = len(locs) scores = self.scorer.score(FeatureContext( f, e, count, fcount[f], num_samples, - (k,i+spanlen), locs, fwords, self.fda, self.eda, + (k,i+spanlen), locs, input_match, + fwords, self.fda, self.eda, meta)) yield Rule(self.category, f, e, scores, alignment) if len(phrase) < self.max_length and i+spanlen < len(fwords) and pathlen+1 <= self.max_initial_size: for alt_id in range(len(fwords[i+spanlen])): - new_frontier.append((k, i+spanlen, alt_id, pathlen + 1, node, phrase, is_shadow_path)) + new_frontier.append((k, i+spanlen, input_match, alt_id, pathlen + 1, node, phrase, is_shadow_path)) num_subpatterns = arity if not is_shadow_path: num_subpatterns = num_subpatterns + 1 @@ -1129,7 +1131,7 @@ cdef class HieroCachingRuleFactory: nodes_isteps_away_buffer[key] = frontier_nodes for (i, alt, pathlen) in frontier_nodes: - new_frontier.append((k, i, alt, pathlen, xnode, phrase +(xcat,), is_shadow_path)) + new_frontier.append((k, i, input_match + (i,), alt, pathlen, xnode, phrase +(xcat,), is_shadow_path)) frontier = new_frontier stop_time = monitor_cpu() -- cgit v1.2.3