summaryrefslogtreecommitdiff
path: root/python/src/sa/rulefactory.pxi
diff options
context:
space:
mode:
authorChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2013-04-23 19:35:18 -0400
committerChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2013-04-23 19:35:18 -0400
commitc164dc0ed8a32e4095ba1b36495e0f743b8cc1ea (patch)
tree78b81e4c63adfa67adb7b8f80c3e6be87b4a2b2a /python/src/sa/rulefactory.pxi
parent0e46089cafa4e8e2f060e370d7afaceeda6b90a9 (diff)
parentd467e14b28085809c31431be0478eb3d9322fe96 (diff)
merge paul's extractor code
Diffstat (limited to 'python/src/sa/rulefactory.pxi')
-rw-r--r--python/src/sa/rulefactory.pxi17
1 files changed, 11 insertions, 6 deletions
diff --git a/python/src/sa/rulefactory.pxi b/python/src/sa/rulefactory.pxi
index 7063c2da..559e8396 100644
--- a/python/src/sa/rulefactory.pxi
+++ b/python/src/sa/rulefactory.pxi
@@ -260,6 +260,7 @@ cdef class HieroCachingRuleFactory:
cdef prev_norm_prefix
cdef float extract_time
+ cdef float intersect_time
cdef SuffixArray fsa
cdef DataArray fda
cdef DataArray eda
@@ -979,10 +980,11 @@ cdef class HieroCachingRuleFactory:
cdef IntList sample, chunklen
cdef Matching matching
cdef Phrase hiero_phrase
-
+
flen = len(fwords)
start_time = monitor_cpu()
self.extract_time = 0.0
+ self.intersect_time = 0.0
nodes_isteps_away_buffer = {}
hit = 0
reachable_buffer = {}
@@ -1072,7 +1074,10 @@ cdef class HieroCachingRuleFactory:
else:
if arity > 0:
# Intersecting because of arity > 0
+ intersect_start_time = monitor_cpu()
phrase_location = self.intersect(node, node.suffix_link.children[word_id], hiero_phrase)
+ intersect_stop_time = monitor_cpu()
+ self.intersect_time += intersect_stop_time - intersect_start_time
else:
# Suffix array search
phrase_location = node.phrase_location
@@ -1203,6 +1208,7 @@ cdef class HieroCachingRuleFactory:
logger.info("Total time for rule lookup, extraction, and scoring = %f seconds", (stop_time - start_time))
gc.collect()
logger.info(" Extract time = %f seconds", self.extract_time)
+ logger.info(" Intersect time = %f seconds", self.intersect_time)
cdef int find_fixpoint(self,
@@ -1674,7 +1680,6 @@ cdef class HieroCachingRuleFactory:
for (phrase2,eindexes) in phrase_list:
als1 = self.create_alignments(sent_links,num_links,self.findexes,eindexes)
extracts.append((fphr, phrase2, pair_count, tuple(als1)))
-
if (num_gaps < self.max_nonterminals and
phrase_len < self.max_length and
f_back_high - f_back_low + self.train_min_gap_size <= self.train_max_initial_size):
@@ -1690,12 +1695,12 @@ cdef class HieroCachingRuleFactory:
met_constraints = 0
if (met_constraints and
- self.find_fixpoint(f_x_low, f_back_high,
+ (self.find_fixpoint(f_x_low, f_back_high,
f_links_low, f_links_high, e_links_low, e_links_high,
e_low, e_high, &e_x_low, &e_x_high, &f_x_low, &f_x_high,
f_sent_len, e_sent_len,
self.train_max_initial_size, self.train_max_initial_size,
- 1, 1, 1, 1, 0, 1, 0) and
+ 1, 1, 1, 1, 0, 1, 0) == 1) and
((not self.tight_phrases) or f_links_low[f_x_low] != -1) and
self.find_fixpoint(f_x_low, f_low, # check integrity of new subphrase
f_links_low, f_links_high, e_links_low, e_links_high,
@@ -1809,12 +1814,12 @@ cdef class HieroCachingRuleFactory:
met_constraints = 0
if (met_constraints and
- self.find_fixpoint(f_x_low, f_x_high,
+ (self.find_fixpoint(f_x_low, f_x_high,
f_links_low, f_links_high, e_links_low, e_links_high,
e_low, e_high, &e_x_low, &e_x_high, &f_x_low, &f_x_high,
f_sent_len, e_sent_len,
self.train_max_initial_size, self.train_max_initial_size,
- 1, 1, 2, 1, 1, 1, 1) and
+ 1, 1, 2, 1, 1, 1, 1) == 1) and
((not self.tight_phrases) or (f_links_low[f_x_low] != -1 and f_links_low[f_x_high-1] != -1)) and
self.find_fixpoint(f_x_low, f_low,
f_links_low, f_links_high, e_links_low, e_links_high,